Compare commits
12 commits
48b07c0b67
...
e7ccbe37ca
Author | SHA1 | Date | |
---|---|---|---|
e7ccbe37ca | |||
956df2043b | |||
9fae048a5a | |||
eb80a30c42 | |||
22847ec78a | |||
3cb8e6111a | |||
21cd44ec04 | |||
c9a3baab5d | |||
380cfcaeed | |||
44d658bbc5 | |||
a1c5b5c911 | |||
302509d84d |
80 changed files with 1447 additions and 1207 deletions
|
@ -1,11 +1,11 @@
|
|||
# Development
|
||||
|
||||
* **Windows**: [Windows Building Guide](./docs/build/Windows.md)
|
||||
* **Linux**: [Linux Building Guide](./docs/build/Linux.md)
|
||||
* **Android**: [Android Building Guide](./docs/build/Android.md)
|
||||
* **Solaris**: [Solaris Building Guide](./docs/build/Solaris.md)
|
||||
* **FreeBSD**: [FreeBSD Building Guide](./docs/build/FreeBSD.md)
|
||||
* **macOS**: [macOS Building Guide](./docs/build/macOS.md)
|
||||
* **Windows**: [Windows Building Guide](./build/Windows.md)
|
||||
* **Linux**: [Linux Building Guide](./build/Linux.md)
|
||||
* **Android**: [Android Building Guide](./build/Android.md)
|
||||
* **Solaris**: [Solaris Building Guide](./build/Solaris.md)
|
||||
* **FreeBSD**: [FreeBSD Building Guide](./build/FreeBSD.md)
|
||||
* **macOS**: [macOS Building Guide](./build/macOS.md)
|
||||
|
||||
# CPM
|
||||
|
||||
|
@ -104,7 +104,7 @@ Then type `target remote localhost:1234` and type `c` (for continue) - and then
|
|||
|
||||
### gdb cheatsheet
|
||||
|
||||
- `mo <cmd>`: Monitor commands, `get info`, `get fastmem` and `get mappings` are available.
|
||||
- `mo <cmd>`: Monitor commands, `get info`, `get fastmem` and `get mappings` are available. Type `mo help` for more info.
|
||||
- `detach`: Detach from remote (i.e restarting the emulator).
|
||||
- `c`: Continue
|
||||
- `p <expr>`: Print variable, `p/x <expr>` for hexadecimal.
|
||||
|
|
11
externals/CMakeLists.txt
vendored
11
externals/CMakeLists.txt
vendored
|
@ -151,6 +151,17 @@ if (ENABLE_WEB_SERVICE)
|
|||
)
|
||||
endif()
|
||||
|
||||
# unordered_dense
|
||||
AddPackage(
|
||||
NAME unordered_dense
|
||||
REPO "Lizzie841/unordered_dense"
|
||||
SHA e59d30b7b1
|
||||
HASH 71eff7bd9ba4b9226967bacd56a8ff000946f8813167cb5664bb01e96fb79e4e220684d824fe9c59c4d1cc98c606f13aff05b7940a1ed8ab3c95d6974ee34fa0
|
||||
FIND_PACKAGE_ARGUMENTS "CONFIG"
|
||||
OPTIONS
|
||||
"UNORDERED_DENSE_INSTALL OFF"
|
||||
)
|
||||
|
||||
# FFMpeg
|
||||
if (YUZU_USE_BUNDLED_FFMPEG)
|
||||
add_subdirectory(ffmpeg)
|
||||
|
|
|
@ -262,13 +262,13 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
|
|||
endif()
|
||||
|
||||
if (BOOST_NO_HEADERS)
|
||||
target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool)
|
||||
target_link_libraries(common PUBLIC Boost::algorithm Boost::icl Boost::pool)
|
||||
else()
|
||||
target_link_libraries(common PUBLIC Boost::headers)
|
||||
target_link_libraries(common PUBLIC Boost::headers)
|
||||
endif()
|
||||
|
||||
if (lz4_ADDED)
|
||||
target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib)
|
||||
target_include_directories(common PRIVATE ${lz4_SOURCE_DIR}/lib)
|
||||
endif()
|
||||
|
||||
target_link_libraries(common PUBLIC fmt::fmt stb::headers Threads::Threads)
|
||||
|
@ -280,6 +280,11 @@ else()
|
|||
target_link_libraries(common PRIVATE zstd)
|
||||
endif()
|
||||
|
||||
if (TARGET unordered_dense::unordered_dense)
|
||||
# weird quirk of system installs
|
||||
target_link_libraries(common PUBLIC unordered_dense::unordered_dense)
|
||||
endif()
|
||||
|
||||
if(ANDROID)
|
||||
# For ASharedMemory_create
|
||||
target_link_libraries(common PRIVATE android)
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -34,68 +36,60 @@ HeapTracker::~HeapTracker() = default;
|
|||
|
||||
void HeapTracker::Map(size_t virtual_offset, size_t host_offset, size_t length,
|
||||
MemoryPermission perm, bool is_separate_heap) {
|
||||
bool rebuild_required = false;
|
||||
// When mapping other memory, map pages immediately.
|
||||
if (!is_separate_heap) {
|
||||
m_buffer.Map(virtual_offset, host_offset, length, perm, false);
|
||||
return;
|
||||
}
|
||||
|
||||
{
|
||||
// We are mapping part of a separate heap.
|
||||
// We are mapping part of a separate heap and insert into mappings.
|
||||
std::scoped_lock lk{m_lock};
|
||||
|
||||
auto* const map = new SeparateHeapMap{
|
||||
.vaddr = virtual_offset,
|
||||
m_map_count++;
|
||||
const auto it = m_mappings.insert_or_assign(virtual_offset, SeparateHeapMap{
|
||||
.paddr = host_offset,
|
||||
.size = length,
|
||||
.tick = m_tick++,
|
||||
.perm = perm,
|
||||
.is_resident = false,
|
||||
};
|
||||
|
||||
// Insert into mappings.
|
||||
m_map_count++;
|
||||
m_mappings.insert(*map);
|
||||
});
|
||||
// Update tick before possible rebuild.
|
||||
it.first->second.tick = m_tick++;
|
||||
// Check if we need to rebuild.
|
||||
if (m_resident_map_count >= m_max_resident_map_count)
|
||||
rebuild_required = true;
|
||||
// Map the area.
|
||||
m_buffer.Map(it.first->first, it.first->second.paddr, it.first->second.size, it.first->second.perm, false);
|
||||
// This map is now resident.
|
||||
it.first->second.is_resident = true;
|
||||
m_resident_map_count++;
|
||||
m_resident_mappings.insert(*it.first);
|
||||
}
|
||||
|
||||
// Finally, map.
|
||||
this->DeferredMapSeparateHeap(virtual_offset);
|
||||
// A rebuild was required, so perform it now.
|
||||
if (rebuild_required)
|
||||
this->RebuildSeparateHeapAddressSpace();
|
||||
}
|
||||
|
||||
void HeapTracker::Unmap(size_t virtual_offset, size_t size, bool is_separate_heap) {
|
||||
// If this is a separate heap...
|
||||
if (is_separate_heap) {
|
||||
std::scoped_lock lk{m_lock};
|
||||
|
||||
const SeparateHeapMap key{
|
||||
.vaddr = virtual_offset,
|
||||
};
|
||||
|
||||
// Split at the boundaries of the region we are removing.
|
||||
this->SplitHeapMapLocked(virtual_offset);
|
||||
this->SplitHeapMapLocked(virtual_offset + size);
|
||||
|
||||
// Erase all mappings in range.
|
||||
auto it = m_mappings.find(key);
|
||||
while (it != m_mappings.end() && it->vaddr < virtual_offset + size) {
|
||||
// Get underlying item.
|
||||
auto* const item = std::addressof(*it);
|
||||
|
||||
auto it = m_mappings.find(virtual_offset);
|
||||
while (it != m_mappings.end() && it->first < virtual_offset + size) {
|
||||
// If resident, erase from resident map.
|
||||
if (item->is_resident) {
|
||||
if (it->second.is_resident) {
|
||||
ASSERT(--m_resident_map_count >= 0);
|
||||
m_resident_mappings.erase(m_resident_mappings.iterator_to(*item));
|
||||
m_resident_mappings.erase(m_resident_mappings.find(it->first));
|
||||
}
|
||||
|
||||
// Erase from map.
|
||||
ASSERT(--m_map_count >= 0);
|
||||
it = m_mappings.erase(it);
|
||||
|
||||
// Free the item.
|
||||
delete item;
|
||||
}
|
||||
}
|
||||
|
||||
// Unmap pages.
|
||||
m_buffer.Unmap(virtual_offset, size, false);
|
||||
}
|
||||
|
@ -117,110 +111,51 @@ void HeapTracker::Protect(size_t virtual_offset, size_t size, MemoryPermission p
|
|||
|
||||
{
|
||||
std::scoped_lock lk2{m_lock};
|
||||
|
||||
const SeparateHeapMap key{
|
||||
.vaddr = next,
|
||||
};
|
||||
|
||||
// Try to get the next mapping corresponding to this address.
|
||||
const auto it = m_mappings.nfind(key);
|
||||
|
||||
const auto it = m_mappings.find(next);
|
||||
if (it == m_mappings.end()) {
|
||||
// There are no separate heap mappings remaining.
|
||||
next = end;
|
||||
should_protect = true;
|
||||
} else if (it->vaddr == cur) {
|
||||
} else if (it->first == cur) {
|
||||
// We are in range.
|
||||
// Update permission bits.
|
||||
it->perm = perm;
|
||||
it->second.perm = perm;
|
||||
|
||||
// Determine next address and whether we should protect.
|
||||
next = cur + it->size;
|
||||
should_protect = it->is_resident;
|
||||
next = cur + it->second.size;
|
||||
should_protect = it->second.is_resident;
|
||||
} else /* if (it->vaddr > cur) */ {
|
||||
// We weren't in range, but there is a block coming up that will be.
|
||||
next = it->vaddr;
|
||||
next = it->first;
|
||||
should_protect = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp to end.
|
||||
next = std::min(next, end);
|
||||
|
||||
// Reprotect, if we need to.
|
||||
if (should_protect) {
|
||||
if (should_protect)
|
||||
m_buffer.Protect(cur, next - cur, perm);
|
||||
}
|
||||
|
||||
// Advance.
|
||||
cur = next;
|
||||
}
|
||||
}
|
||||
|
||||
bool HeapTracker::DeferredMapSeparateHeap(u8* fault_address) {
|
||||
if (m_buffer.IsInVirtualRange(fault_address)) {
|
||||
return this->DeferredMapSeparateHeap(fault_address - m_buffer.VirtualBasePointer());
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool HeapTracker::DeferredMapSeparateHeap(size_t virtual_offset) {
|
||||
bool rebuild_required = false;
|
||||
|
||||
{
|
||||
std::scoped_lock lk{m_lock};
|
||||
|
||||
// Check to ensure this was a non-resident separate heap mapping.
|
||||
const auto it = this->GetNearestHeapMapLocked(virtual_offset);
|
||||
if (it == m_mappings.end() || it->is_resident) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Update tick before possible rebuild.
|
||||
it->tick = m_tick++;
|
||||
|
||||
// Check if we need to rebuild.
|
||||
if (m_resident_map_count > m_max_resident_map_count) {
|
||||
rebuild_required = true;
|
||||
}
|
||||
|
||||
// Map the area.
|
||||
m_buffer.Map(it->vaddr, it->paddr, it->size, it->perm, false);
|
||||
|
||||
// This map is now resident.
|
||||
it->is_resident = true;
|
||||
m_resident_map_count++;
|
||||
m_resident_mappings.insert(*it);
|
||||
}
|
||||
|
||||
if (rebuild_required) {
|
||||
// A rebuild was required, so perform it now.
|
||||
this->RebuildSeparateHeapAddressSpace();
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void HeapTracker::RebuildSeparateHeapAddressSpace() {
|
||||
std::scoped_lock lk{m_rebuild_lock, m_lock};
|
||||
|
||||
ASSERT(!m_resident_mappings.empty());
|
||||
|
||||
// Dump half of the mappings.
|
||||
//
|
||||
// Despite being worse in theory, this has proven to be better in practice than more
|
||||
// regularly dumping a smaller amount, because it significantly reduces average case
|
||||
// lock contention.
|
||||
const size_t desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2;
|
||||
const size_t evict_count = m_resident_map_count - desired_count;
|
||||
std::size_t const desired_count = std::min(m_resident_map_count, m_max_resident_map_count) / 2;
|
||||
std::size_t const evict_count = m_resident_map_count - desired_count;
|
||||
auto it = m_resident_mappings.begin();
|
||||
|
||||
for (size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) {
|
||||
for (std::size_t i = 0; i < evict_count && it != m_resident_mappings.end(); i++) {
|
||||
// Unmark and unmap.
|
||||
it->is_resident = false;
|
||||
m_buffer.Unmap(it->vaddr, it->size, false);
|
||||
|
||||
it->second.is_resident = false;
|
||||
m_buffer.Unmap(it->first, it->second.size, false);
|
||||
// Advance.
|
||||
ASSERT(--m_resident_map_count >= 0);
|
||||
it = m_resident_mappings.erase(it);
|
||||
|
@ -229,53 +164,32 @@ void HeapTracker::RebuildSeparateHeapAddressSpace() {
|
|||
|
||||
void HeapTracker::SplitHeapMap(VAddr offset, size_t size) {
|
||||
std::scoped_lock lk{m_lock};
|
||||
|
||||
this->SplitHeapMapLocked(offset);
|
||||
this->SplitHeapMapLocked(offset + size);
|
||||
}
|
||||
|
||||
void HeapTracker::SplitHeapMapLocked(VAddr offset) {
|
||||
const auto it = this->GetNearestHeapMapLocked(offset);
|
||||
if (it == m_mappings.end() || it->vaddr == offset) {
|
||||
// Not contained or no split required.
|
||||
return;
|
||||
auto it = this->GetNearestHeapMapLocked(offset);
|
||||
if (it != m_mappings.end() && it->first != offset) {
|
||||
// Adjust left iterator
|
||||
auto const orig_size = it->second.size;
|
||||
auto const left_size = offset - it->first;
|
||||
it->second.size = left_size;
|
||||
// Insert the new right map.
|
||||
auto const right = SeparateHeapMap{
|
||||
.paddr = it->second.paddr + left_size,
|
||||
.size = orig_size - left_size,
|
||||
.tick = it->second.tick,
|
||||
.perm = it->second.perm,
|
||||
.is_resident = it->second.is_resident,
|
||||
};
|
||||
m_map_count++;
|
||||
auto rit = m_mappings.insert_or_assign(it->first + left_size, right);
|
||||
if (rit.first->second.is_resident) {
|
||||
m_resident_map_count++;
|
||||
m_resident_mappings.insert(*rit.first);
|
||||
}
|
||||
}
|
||||
|
||||
// Cache the original values.
|
||||
auto* const left = std::addressof(*it);
|
||||
const size_t orig_size = left->size;
|
||||
|
||||
// Adjust the left map.
|
||||
const size_t left_size = offset - left->vaddr;
|
||||
left->size = left_size;
|
||||
|
||||
// Create the new right map.
|
||||
auto* const right = new SeparateHeapMap{
|
||||
.vaddr = left->vaddr + left_size,
|
||||
.paddr = left->paddr + left_size,
|
||||
.size = orig_size - left_size,
|
||||
.tick = left->tick,
|
||||
.perm = left->perm,
|
||||
.is_resident = left->is_resident,
|
||||
};
|
||||
|
||||
// Insert the new right map.
|
||||
m_map_count++;
|
||||
m_mappings.insert(*right);
|
||||
|
||||
// If resident, also insert into resident map.
|
||||
if (right->is_resident) {
|
||||
m_resident_map_count++;
|
||||
m_resident_mappings.insert(*right);
|
||||
}
|
||||
}
|
||||
|
||||
HeapTracker::AddrTree::iterator HeapTracker::GetNearestHeapMapLocked(VAddr offset) {
|
||||
const SeparateHeapMap key{
|
||||
.vaddr = offset,
|
||||
};
|
||||
|
||||
return m_mappings.find(key);
|
||||
}
|
||||
|
||||
} // namespace Common
|
||||
|
|
|
@ -1,93 +1,55 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
#include <set>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include <ankerl/unordered_dense.h>
|
||||
#include "common/host_memory.h"
|
||||
#include "common/intrusive_red_black_tree.h"
|
||||
|
||||
namespace Common {
|
||||
|
||||
struct SeparateHeapMap {
|
||||
Common::IntrusiveRedBlackTreeNode addr_node{};
|
||||
Common::IntrusiveRedBlackTreeNode tick_node{};
|
||||
VAddr vaddr{};
|
||||
PAddr paddr{};
|
||||
size_t size{};
|
||||
size_t tick{};
|
||||
MemoryPermission perm{};
|
||||
bool is_resident{};
|
||||
};
|
||||
|
||||
struct SeparateHeapMapAddrComparator {
|
||||
static constexpr int Compare(const SeparateHeapMap& lhs, const SeparateHeapMap& rhs) {
|
||||
if (lhs.vaddr < rhs.vaddr) {
|
||||
return -1;
|
||||
} else if (lhs.vaddr <= (rhs.vaddr + rhs.size - 1)) {
|
||||
return 0;
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct SeparateHeapMapTickComparator {
|
||||
static constexpr int Compare(const SeparateHeapMap& lhs, const SeparateHeapMap& rhs) {
|
||||
if (lhs.tick < rhs.tick) {
|
||||
return -1;
|
||||
} else if (lhs.tick > rhs.tick) {
|
||||
return 1;
|
||||
} else {
|
||||
return SeparateHeapMapAddrComparator::Compare(lhs, rhs);
|
||||
}
|
||||
}
|
||||
PAddr paddr{}; //8
|
||||
std::size_t size{}; //8 (16)
|
||||
std::size_t tick{}; //8 (24)
|
||||
// 4 bits needed, sync with host_memory.h if needed
|
||||
MemoryPermission perm : 4 = MemoryPermission::Read;
|
||||
bool is_resident : 1 = false;
|
||||
};
|
||||
static_assert(sizeof(SeparateHeapMap) == 32); //half a cache line! good for coherency
|
||||
|
||||
class HeapTracker {
|
||||
public:
|
||||
explicit HeapTracker(Common::HostMemory& buffer);
|
||||
~HeapTracker();
|
||||
|
||||
void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm,
|
||||
bool is_separate_heap);
|
||||
void Map(size_t virtual_offset, size_t host_offset, size_t length, MemoryPermission perm, bool is_separate_heap);
|
||||
void Unmap(size_t virtual_offset, size_t size, bool is_separate_heap);
|
||||
void Protect(size_t virtual_offset, size_t length, MemoryPermission perm);
|
||||
u8* VirtualBasePointer() {
|
||||
inline u8* VirtualBasePointer() noexcept {
|
||||
return m_buffer.VirtualBasePointer();
|
||||
}
|
||||
|
||||
bool DeferredMapSeparateHeap(u8* fault_address);
|
||||
bool DeferredMapSeparateHeap(size_t virtual_offset);
|
||||
|
||||
private:
|
||||
using AddrTreeTraits =
|
||||
Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&SeparateHeapMap::addr_node>;
|
||||
using AddrTree = AddrTreeTraits::TreeType<SeparateHeapMapAddrComparator>;
|
||||
|
||||
using TickTreeTraits =
|
||||
Common::IntrusiveRedBlackTreeMemberTraitsDeferredAssert<&SeparateHeapMap::tick_node>;
|
||||
using TickTree = TickTreeTraits::TreeType<SeparateHeapMapTickComparator>;
|
||||
|
||||
AddrTree m_mappings{};
|
||||
TickTree m_resident_mappings{};
|
||||
|
||||
// TODO: You may want to "fake-map" the first 2GB of 64-bit address space
|
||||
// and dedicate it entirely to a recursive PTE mapping :)
|
||||
// However Ankerl is way better than using an RB tree, in all senses
|
||||
using AddrTree = ankerl::unordered_dense::map<VAddr, SeparateHeapMap>;
|
||||
AddrTree m_mappings;
|
||||
using TicksTree = ankerl::unordered_dense::map<VAddr, SeparateHeapMap>;
|
||||
TicksTree m_resident_mappings;
|
||||
private:
|
||||
void SplitHeapMap(VAddr offset, size_t size);
|
||||
void SplitHeapMapLocked(VAddr offset);
|
||||
|
||||
AddrTree::iterator GetNearestHeapMapLocked(VAddr offset);
|
||||
|
||||
void RebuildSeparateHeapAddressSpace();
|
||||
|
||||
inline HeapTracker::AddrTree::iterator GetNearestHeapMapLocked(VAddr offset) noexcept {
|
||||
return m_mappings.find(offset);
|
||||
}
|
||||
private:
|
||||
Common::HostMemory& m_buffer;
|
||||
const s64 m_max_resident_map_count;
|
||||
|
||||
std::shared_mutex m_rebuild_lock{};
|
||||
std::mutex m_lock{};
|
||||
s64 m_map_count{};
|
||||
|
|
|
@ -47,6 +47,7 @@ constexpr std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
|
|||
constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
|
||||
constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
|
||||
|
||||
constexpr inline Xbyak::Reg ABI_JIT_REG = Xbyak::util::rbx;
|
||||
#ifdef _WIN32
|
||||
|
||||
// Microsoft x64 ABI
|
||||
|
|
|
@ -3,47 +3,9 @@
|
|||
|
||||
#ifdef __linux__
|
||||
|
||||
#include "common/signal_chain.h"
|
||||
|
||||
//#include "common/signal_chain.h"
|
||||
#include "core/arm/dynarmic/arm_dynarmic.h"
|
||||
#include "core/hle/kernel/k_process.h"
|
||||
#include "core/memory.h"
|
||||
|
||||
namespace Core {
|
||||
|
||||
namespace {
|
||||
|
||||
thread_local Core::Memory::Memory* g_current_memory{};
|
||||
std::once_flag g_registered{};
|
||||
struct sigaction g_old_segv {};
|
||||
|
||||
void HandleSigSegv(int sig, siginfo_t* info, void* ctx) {
|
||||
if (g_current_memory && g_current_memory->InvalidateSeparateHeap(info->si_addr)) {
|
||||
return;
|
||||
}
|
||||
|
||||
return g_old_segv.sa_sigaction(sig, info, ctx);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ScopedJitExecution::ScopedJitExecution(Kernel::KProcess* process) {
|
||||
g_current_memory = std::addressof(process->GetMemory());
|
||||
}
|
||||
|
||||
ScopedJitExecution::~ScopedJitExecution() {
|
||||
g_current_memory = nullptr;
|
||||
}
|
||||
|
||||
void ScopedJitExecution::RegisterHandler() {
|
||||
std::call_once(g_registered, [] {
|
||||
struct sigaction sa {};
|
||||
sa.sa_sigaction = &HandleSigSegv;
|
||||
sa.sa_flags = SA_SIGINFO | SA_ONSTACK;
|
||||
Common::SigAction(SIGSEGV, std::addressof(sa), std::addressof(g_old_segv));
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
//#include "core/hle/kernel/k_process.h"
|
||||
//#include "core/memory.h"
|
||||
|
||||
#endif
|
||||
|
|
|
@ -26,24 +26,4 @@ constexpr HaltReason TranslateHaltReason(Dynarmic::HaltReason hr) {
|
|||
return static_cast<HaltReason>(hr);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
class ScopedJitExecution {
|
||||
public:
|
||||
explicit ScopedJitExecution(Kernel::KProcess* process);
|
||||
~ScopedJitExecution();
|
||||
static void RegisterHandler();
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
class ScopedJitExecution {
|
||||
public:
|
||||
explicit ScopedJitExecution(Kernel::KProcess* process) {}
|
||||
~ScopedJitExecution() {}
|
||||
static void RegisterHandler() {}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace Core
|
||||
|
|
|
@ -336,15 +336,11 @@ bool ArmDynarmic32::IsInThumbMode() const {
|
|||
}
|
||||
|
||||
HaltReason ArmDynarmic32::RunThread(Kernel::KThread* thread) {
|
||||
ScopedJitExecution sj(thread->GetOwnerProcess());
|
||||
|
||||
m_jit->ClearExclusiveState();
|
||||
return TranslateHaltReason(m_jit->Run());
|
||||
}
|
||||
|
||||
HaltReason ArmDynarmic32::StepThread(Kernel::KThread* thread) {
|
||||
ScopedJitExecution sj(thread->GetOwnerProcess());
|
||||
|
||||
m_jit->ClearExclusiveState();
|
||||
return TranslateHaltReason(m_jit->Step());
|
||||
}
|
||||
|
@ -386,7 +382,6 @@ ArmDynarmic32::ArmDynarmic32(System& system, bool uses_wall_clock, Kernel::KProc
|
|||
m_cp15(std::make_shared<DynarmicCP15>(*this)), m_core_index{core_index} {
|
||||
auto& page_table_impl = process->GetPageTable().GetBasePageTable().GetImpl();
|
||||
m_jit = MakeJit(&page_table_impl);
|
||||
ScopedJitExecution::RegisterHandler();
|
||||
}
|
||||
|
||||
ArmDynarmic32::~ArmDynarmic32() = default;
|
||||
|
|
|
@ -136,6 +136,7 @@ public:
|
|||
case Dynarmic::A64::Exception::SendEvent:
|
||||
case Dynarmic::A64::Exception::SendEventLocal:
|
||||
case Dynarmic::A64::Exception::Yield:
|
||||
LOG_TRACE(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast<std::size_t>(exception), pc, m_memory.Read32(pc));
|
||||
return;
|
||||
case Dynarmic::A64::Exception::NoExecuteFault:
|
||||
LOG_CRITICAL(Core_ARM, "Cannot execute instruction at unmapped address {:#016x}", pc);
|
||||
|
@ -144,12 +145,10 @@ public:
|
|||
default:
|
||||
if (m_debugger_enabled) {
|
||||
ReturnException(pc, InstructionBreakpoint);
|
||||
return;
|
||||
} else {
|
||||
m_parent.LogBacktrace(m_process);
|
||||
LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})", static_cast<std::size_t>(exception), pc, m_memory.Read32(pc));
|
||||
}
|
||||
|
||||
m_parent.LogBacktrace(m_process);
|
||||
LOG_CRITICAL(Core_ARM, "ExceptionRaised(exception = {}, pc = {:08X}, code = {:08X})",
|
||||
static_cast<std::size_t>(exception), pc, m_memory.Read32(pc));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -367,15 +366,11 @@ std::shared_ptr<Dynarmic::A64::Jit> ArmDynarmic64::MakeJit(Common::PageTable* pa
|
|||
}
|
||||
|
||||
HaltReason ArmDynarmic64::RunThread(Kernel::KThread* thread) {
|
||||
ScopedJitExecution sj(thread->GetOwnerProcess());
|
||||
|
||||
m_jit->ClearExclusiveState();
|
||||
return TranslateHaltReason(m_jit->Run());
|
||||
}
|
||||
|
||||
HaltReason ArmDynarmic64::StepThread(Kernel::KThread* thread) {
|
||||
ScopedJitExecution sj(thread->GetOwnerProcess());
|
||||
|
||||
m_jit->ClearExclusiveState();
|
||||
return TranslateHaltReason(m_jit->Step());
|
||||
}
|
||||
|
@ -415,7 +410,6 @@ ArmDynarmic64::ArmDynarmic64(System& system, bool uses_wall_clock, Kernel::KProc
|
|||
auto& page_table = process->GetPageTable().GetBasePageTable();
|
||||
auto& page_table_impl = page_table.GetImpl();
|
||||
m_jit = MakeJit(&page_table_impl, page_table.GetAddressSpaceWidth());
|
||||
ScopedJitExecution::RegisterHandler();
|
||||
}
|
||||
|
||||
ArmDynarmic64::~ArmDynarmic64() = default;
|
||||
|
|
|
@ -554,32 +554,31 @@ void GDBStub::HandleVCont(std::string_view command, std::vector<DebuggerAction>&
|
|||
}
|
||||
}
|
||||
|
||||
constexpr std::array<std::pair<const char*, Kernel::Svc::MemoryState>, 22> MemoryStateNames{{
|
||||
{"----- Free ------", Kernel::Svc::MemoryState::Free},
|
||||
{"Io ", Kernel::Svc::MemoryState::Io},
|
||||
{"Static ", Kernel::Svc::MemoryState::Static},
|
||||
{"Code ", Kernel::Svc::MemoryState::Code},
|
||||
{"CodeData ", Kernel::Svc::MemoryState::CodeData},
|
||||
{"Normal ", Kernel::Svc::MemoryState::Normal},
|
||||
{"Shared ", Kernel::Svc::MemoryState::Shared},
|
||||
{"AliasCode ", Kernel::Svc::MemoryState::AliasCode},
|
||||
{"AliasCodeData ", Kernel::Svc::MemoryState::AliasCodeData},
|
||||
{"Ipc ", Kernel::Svc::MemoryState::Ipc},
|
||||
{"Stack ", Kernel::Svc::MemoryState::Stack},
|
||||
{"ThreadLocal ", Kernel::Svc::MemoryState::ThreadLocal},
|
||||
{"Transferred ", Kernel::Svc::MemoryState::Transferred},
|
||||
{"SharedTransferred", Kernel::Svc::MemoryState::SharedTransferred},
|
||||
{"SharedCode ", Kernel::Svc::MemoryState::SharedCode},
|
||||
{"Inaccessible ", Kernel::Svc::MemoryState::Inaccessible},
|
||||
{"NonSecureIpc ", Kernel::Svc::MemoryState::NonSecureIpc},
|
||||
{"NonDeviceIpc ", Kernel::Svc::MemoryState::NonDeviceIpc},
|
||||
{"Kernel ", Kernel::Svc::MemoryState::Kernel},
|
||||
{"GeneratedCode ", Kernel::Svc::MemoryState::GeneratedCode},
|
||||
{"CodeOut ", Kernel::Svc::MemoryState::CodeOut},
|
||||
{"Coverage ", Kernel::Svc::MemoryState::Coverage},
|
||||
}};
|
||||
|
||||
static constexpr const char* GetMemoryStateName(Kernel::Svc::MemoryState state) {
|
||||
constexpr std::array<std::pair<const char*, Kernel::Svc::MemoryState>, 22> MemoryStateNames{{
|
||||
{"----- Free ------", Kernel::Svc::MemoryState::Free},
|
||||
{"Io ", Kernel::Svc::MemoryState::Io},
|
||||
{"Static ", Kernel::Svc::MemoryState::Static},
|
||||
{"Code ", Kernel::Svc::MemoryState::Code},
|
||||
{"CodeData ", Kernel::Svc::MemoryState::CodeData},
|
||||
{"Normal ", Kernel::Svc::MemoryState::Normal},
|
||||
{"Shared ", Kernel::Svc::MemoryState::Shared},
|
||||
{"AliasCode ", Kernel::Svc::MemoryState::AliasCode},
|
||||
{"AliasCodeData ", Kernel::Svc::MemoryState::AliasCodeData},
|
||||
{"Ipc ", Kernel::Svc::MemoryState::Ipc},
|
||||
{"Stack ", Kernel::Svc::MemoryState::Stack},
|
||||
{"ThreadLocal ", Kernel::Svc::MemoryState::ThreadLocal},
|
||||
{"Transferred ", Kernel::Svc::MemoryState::Transferred},
|
||||
{"SharedTransferred", Kernel::Svc::MemoryState::SharedTransferred},
|
||||
{"SharedCode ", Kernel::Svc::MemoryState::SharedCode},
|
||||
{"Inaccessible ", Kernel::Svc::MemoryState::Inaccessible},
|
||||
{"NonSecureIpc ", Kernel::Svc::MemoryState::NonSecureIpc},
|
||||
{"NonDeviceIpc ", Kernel::Svc::MemoryState::NonDeviceIpc},
|
||||
{"Kernel ", Kernel::Svc::MemoryState::Kernel},
|
||||
{"GeneratedCode ", Kernel::Svc::MemoryState::GeneratedCode},
|
||||
{"CodeOut ", Kernel::Svc::MemoryState::CodeOut},
|
||||
{"Coverage ", Kernel::Svc::MemoryState::Coverage},
|
||||
}};
|
||||
for (size_t i = 0; i < MemoryStateNames.size(); i++) {
|
||||
if (std::get<1>(MemoryStateNames[i]) == state) {
|
||||
return std::get<0>(MemoryStateNames[i]);
|
||||
|
@ -611,13 +610,7 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) {
|
|||
|
||||
auto* process = GetProcess();
|
||||
auto& page_table = process->GetPageTable();
|
||||
|
||||
const char* commands = "Commands:\n"
|
||||
" get fastmem\n"
|
||||
" get info\n"
|
||||
" get mappings\n";
|
||||
|
||||
if (command_str == "get fastmem") {
|
||||
if (command_str == "fastmem" || command_str == "get fastmem") {
|
||||
if (Settings::IsFastmemEnabled()) {
|
||||
const auto& impl = page_table.GetImpl();
|
||||
const auto region = reinterpret_cast<uintptr_t>(impl.fastmem_arena);
|
||||
|
@ -630,7 +623,7 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) {
|
|||
} else {
|
||||
reply = "Fastmem is not enabled.\n";
|
||||
}
|
||||
} else if (command_str == "get info") {
|
||||
} else if (command_str == "info" || command_str == "get info") {
|
||||
auto modules = Core::FindModules(process);
|
||||
|
||||
reply = fmt::format("Process: {:#x} ({})\n"
|
||||
|
@ -648,8 +641,7 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) {
|
|||
GetInteger(page_table.GetHeapRegionStart()),
|
||||
GetInteger(page_table.GetHeapRegionStart()) + page_table.GetHeapRegionSize() - 1,
|
||||
GetInteger(page_table.GetAliasCodeRegionStart()),
|
||||
GetInteger(page_table.GetAliasCodeRegionStart()) + page_table.GetAliasCodeRegionSize() -
|
||||
1,
|
||||
GetInteger(page_table.GetAliasCodeRegionStart()) + page_table.GetAliasCodeRegionSize() - 1,
|
||||
GetInteger(page_table.GetStackRegionStart()),
|
||||
GetInteger(page_table.GetStackRegionStart()) + page_table.GetStackRegionSize() - 1);
|
||||
|
||||
|
@ -657,7 +649,7 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) {
|
|||
reply += fmt::format(" {:#012x} - {:#012x} {}\n", vaddr,
|
||||
GetInteger(Core::GetModuleEnd(process, vaddr)), name);
|
||||
}
|
||||
} else if (command_str == "get mappings") {
|
||||
} else if (command_str == "mappings" || command_str == "get mappings") {
|
||||
reply = "Mappings:\n";
|
||||
VAddr cur_addr = 0;
|
||||
|
||||
|
@ -675,15 +667,11 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) {
|
|||
std::numeric_limits<u64>::max()) {
|
||||
const char* state = GetMemoryStateName(svc_mem_info.state);
|
||||
const char* perm = GetMemoryPermissionString(svc_mem_info);
|
||||
|
||||
const char l = True(svc_mem_info.attribute & MemoryAttribute::Locked) ? 'L' : '-';
|
||||
const char i =
|
||||
True(svc_mem_info.attribute & MemoryAttribute::IpcLocked) ? 'I' : '-';
|
||||
const char d =
|
||||
True(svc_mem_info.attribute & MemoryAttribute::DeviceShared) ? 'D' : '-';
|
||||
const char i = True(svc_mem_info.attribute & MemoryAttribute::IpcLocked) ? 'I' : '-';
|
||||
const char d = True(svc_mem_info.attribute & MemoryAttribute::DeviceShared) ? 'D' : '-';
|
||||
const char u = True(svc_mem_info.attribute & MemoryAttribute::Uncached) ? 'U' : '-';
|
||||
const char p =
|
||||
True(svc_mem_info.attribute & MemoryAttribute::PermissionLocked) ? 'P' : '-';
|
||||
const char p =True(svc_mem_info.attribute & MemoryAttribute::PermissionLocked) ? 'P' : '-';
|
||||
|
||||
reply += fmt::format(
|
||||
" {:#012x} - {:#012x} {} {} {}{}{}{}{} [{}, {}]\n", svc_mem_info.base_address,
|
||||
|
@ -698,11 +686,8 @@ void GDBStub::HandleRcmd(const std::vector<u8>& command) {
|
|||
|
||||
cur_addr = next_address;
|
||||
}
|
||||
} else if (command_str == "help") {
|
||||
reply = commands;
|
||||
} else {
|
||||
reply = "Unknown command.\n";
|
||||
reply += commands;
|
||||
reply += "Commands: fastmem, info, mappings\n";
|
||||
}
|
||||
|
||||
std::span<const u8> reply_span{reinterpret_cast<u8*>(&reply.front()), reply.size()};
|
||||
|
|
|
@ -15,7 +15,7 @@ namespace Hardware {
|
|||
|
||||
constexpr u64 BASE_CLOCK_RATE = 1'020'000'000; // Default CPU Frequency = 1020 MHz
|
||||
constexpr u64 CNTFREQ = 19'200'000; // CNTPCT_EL0 Frequency = 19.2 MHz
|
||||
constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores
|
||||
constexpr u32 NUM_CPU_CORES = 4; // Number of CPU Cores - sync with dynarmic exclusive_monitor.h
|
||||
|
||||
// Virtual to Physical core map.
|
||||
constexpr std::array<s32, Common::BitSize<u64>()> VirtualToPhysicalCoreMap{
|
||||
|
|
|
@ -1266,10 +1266,6 @@ void KProcess::InitializeInterfaces() {
|
|||
|
||||
#ifdef HAS_NCE
|
||||
if (this->IsApplication() && Settings::IsNceEnabled()) {
|
||||
// Register the scoped JIT handler before creating any NCE instances
|
||||
// so that its signal handler will appear first in the signal chain.
|
||||
Core::ScopedJitExecution::RegisterHandler();
|
||||
|
||||
for (size_t i = 0; i < Core::Hardware::NUM_CPU_CORES; i++) {
|
||||
m_arm_interfaces[i] = std::make_unique<Core::ArmNce>(m_kernel.System(), true, i);
|
||||
}
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -101,12 +104,12 @@ Result ILibraryAppletAccessor::PushInData(SharedPointer<IStorage> storage) {
|
|||
|
||||
Result ILibraryAppletAccessor::PopOutData(Out<SharedPointer<IStorage>> out_storage) {
|
||||
LOG_DEBUG(Service_AM, "called");
|
||||
if (auto caller_applet = m_applet->caller_applet.lock(); caller_applet) {
|
||||
Event m_system_event = caller_applet->lifecycle_manager.GetSystemEvent();
|
||||
m_system_event.Signal();
|
||||
caller_applet->lifecycle_manager.RequestResumeNotification();
|
||||
m_system_event.Clear();
|
||||
}
|
||||
if (auto caller_applet = m_applet->caller_applet.lock(); caller_applet) {
|
||||
caller_applet->lifecycle_manager.GetSystemEvent().Signal();
|
||||
caller_applet->lifecycle_manager.RequestResumeNotification();
|
||||
caller_applet->lifecycle_manager.GetSystemEvent().Clear();
|
||||
caller_applet->lifecycle_manager.UpdateRequestedFocusState();
|
||||
}
|
||||
R_RETURN(m_broker->GetOutData().Pop(out_storage.Get()));
|
||||
}
|
||||
|
||||
|
|
|
@ -61,8 +61,7 @@ struct Memory::Impl {
|
|||
}
|
||||
|
||||
#ifdef __linux__
|
||||
heap_tracker.emplace(system.DeviceMemory().buffer);
|
||||
buffer = std::addressof(*heap_tracker);
|
||||
buffer.emplace(system.DeviceMemory().buffer);
|
||||
#else
|
||||
buffer = std::addressof(system.DeviceMemory().buffer);
|
||||
#endif
|
||||
|
@ -1024,9 +1023,8 @@ struct Memory::Impl {
|
|||
std::span<Core::GPUDirtyMemoryManager> gpu_dirty_managers;
|
||||
std::mutex sys_core_guard;
|
||||
|
||||
std::optional<Common::HeapTracker> heap_tracker;
|
||||
#ifdef __linux__
|
||||
Common::HeapTracker* buffer{};
|
||||
std::optional<Common::HeapTracker> buffer;
|
||||
#else
|
||||
Common::HostMemory* buffer{};
|
||||
#endif
|
||||
|
@ -1230,22 +1228,7 @@ bool Memory::InvalidateNCE(Common::ProcessAddress vaddr, size_t size) {
|
|||
if (rasterizer) {
|
||||
impl->InvalidateGPUMemory(ptr, size);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
if (!rasterizer && mapped) {
|
||||
impl->buffer->DeferredMapSeparateHeap(GetInteger(vaddr));
|
||||
}
|
||||
#endif
|
||||
|
||||
return mapped && ptr != nullptr;
|
||||
}
|
||||
|
||||
bool Memory::InvalidateSeparateHeap(void* fault_address) {
|
||||
#ifdef __linux__
|
||||
return impl->buffer->DeferredMapSeparateHeap(static_cast<u8*>(fault_address));
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace Core::Memory
|
||||
|
|
|
@ -487,13 +487,8 @@ public:
|
|||
* marked as debug or non-debug.
|
||||
*/
|
||||
void MarkRegionDebug(Common::ProcessAddress vaddr, u64 size, bool debug);
|
||||
|
||||
void SetGPUDirtyManagers(std::span<Core::GPUDirtyMemoryManager> managers);
|
||||
|
||||
bool InvalidateNCE(Common::ProcessAddress vaddr, size_t size);
|
||||
|
||||
bool InvalidateSeparateHeap(void* fault_address);
|
||||
|
||||
private:
|
||||
Core::System& system;
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ contain a prediction with the same `UniqueHash`.
|
|||
? u64(unique_hash_to_code_ptr[imm64])
|
||||
: u64(code->GetReturnFromRunCodeAddress());
|
||||
|
||||
code->mov(index_reg, dword[r15 + offsetof(JitState, rsb_ptr)]);
|
||||
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
|
||||
code->add(index_reg, 1);
|
||||
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
||||
|
||||
|
@ -91,13 +91,13 @@ contain a prediction with the same `UniqueHash`.
|
|||
|
||||
Xbyak::Label label;
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(loc_desc_reg, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->je(label, code->T_SHORT);
|
||||
}
|
||||
|
||||
code->mov(dword[r15 + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[r15 + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->L(label);
|
||||
}
|
||||
|
||||
|
@ -122,14 +122,14 @@ To check if a predicition is in the RSB, we linearly scan the RSB.
|
|||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[r15 + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[r15 + offsetof(JitState, CPSR_et)]);
|
||||
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
|
||||
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(rbx, qword[r15 + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[r15 + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
}
|
||||
|
||||
code->jmp(rax);
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
namespace Dynarmic {
|
||||
|
||||
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count)
|
||||
ExclusiveMonitor::ExclusiveMonitor(std::size_t processor_count)
|
||||
: exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {}
|
||||
|
||||
size_t ExclusiveMonitor::GetProcessorCount() const {
|
||||
|
|
|
@ -20,7 +20,7 @@ struct Label;
|
|||
} // namespace oaknut
|
||||
|
||||
namespace Dynarmic::IR {
|
||||
enum class Type;
|
||||
enum class Type : u16;
|
||||
} // namespace Dynarmic::IR
|
||||
|
||||
namespace Dynarmic::Backend::Arm64 {
|
||||
|
|
|
@ -44,21 +44,21 @@ namespace Dynarmic::Backend::X64 {
|
|||
using namespace Xbyak::util;
|
||||
|
||||
static Xbyak::Address MJitStateReg(A32::Reg reg) {
|
||||
return dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
||||
return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * static_cast<size_t>(reg)];
|
||||
}
|
||||
|
||||
static Xbyak::Address MJitStateExtReg(A32::ExtReg reg) {
|
||||
if (A32::IsSingleExtReg(reg)) {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::S0);
|
||||
return dword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u32) * index];
|
||||
return dword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u32) * index];
|
||||
}
|
||||
if (A32::IsDoubleExtReg(reg)) {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::D0);
|
||||
return qword[r15 + offsetof(A32JitState, ExtReg) + sizeof(u64) * index];
|
||||
return qword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + sizeof(u64) * index];
|
||||
}
|
||||
if (A32::IsQuadExtReg(reg)) {
|
||||
const size_t index = static_cast<size_t>(reg) - static_cast<size_t>(A32::ExtReg::Q0);
|
||||
return xword[r15 + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index];
|
||||
return xword[BlockOfCode::ABI_JIT_PTR + offsetof(A32JitState, ExtReg) + 2 * sizeof(u64) * index];
|
||||
}
|
||||
ASSERT_FALSE("Should never happen.");
|
||||
}
|
||||
|
@ -109,12 +109,12 @@ A32EmitX64::BlockDescriptor A32EmitX64::Emit(IR::Block& block) {
|
|||
|
||||
const boost::container::static_vector<HostLoc, 28> gpr_order = [this] {
|
||||
boost::container::static_vector<HostLoc, 28> gprs{any_gpr};
|
||||
if (conf.page_table) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||
}
|
||||
if (conf.fastmem_pointer) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13));
|
||||
}
|
||||
if (conf.page_table) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||
}
|
||||
return gprs;
|
||||
}();
|
||||
|
||||
|
@ -220,7 +220,7 @@ void A32EmitX64::GenTerminalHandlers() {
|
|||
// PC ends up in ebp, location_descriptor ends up in rbx
|
||||
const auto calculate_location_descriptor = [this] {
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code.mov(ebx, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.shl(rbx, 32);
|
||||
code.mov(ecx, MJitStateReg(A32::Reg::PC));
|
||||
code.mov(ebp, ecx);
|
||||
|
@ -232,17 +232,17 @@ void A32EmitX64::GenTerminalHandlers() {
|
|||
code.align();
|
||||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.mov(eax, dword[r15 + offsetof(A32JitState, rsb_ptr)]);
|
||||
code.dec(eax);
|
||||
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)]);
|
||||
code.sub(eax, 1);
|
||||
code.and_(eax, u32(A32JitState::RSBPtrMask));
|
||||
code.mov(dword[r15 + offsetof(A32JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[r15 + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
|
||||
code.jne(rsb_cache_miss);
|
||||
} else {
|
||||
code.jne(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.mov(rax, qword[r15 + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A32JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.jmp(rax);
|
||||
PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a32_terminal_handler_pop_rsb_hint");
|
||||
|
||||
|
@ -392,17 +392,17 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
// so we load them both at the same time with one 64-bit read. This allows us to
|
||||
// extract all of their bits together at once with one pext.
|
||||
static_assert(offsetof(A32JitState, upper_location_descriptor) + 4 == offsetof(A32JitState, cpsr_ge));
|
||||
code.mov(result.cvt64(), qword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.mov(result.cvt64(), qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.mov(tmp.cvt64(), 0x80808080'00000003ull);
|
||||
code.pext(result.cvt64(), result.cvt64(), tmp.cvt64());
|
||||
code.mov(tmp, 0x000f0220);
|
||||
code.pdep(result, result, tmp);
|
||||
} else {
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)]);
|
||||
code.imul(result, result, 0x120);
|
||||
code.and_(result, 0x00000220);
|
||||
|
||||
code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_ge)]);
|
||||
code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
|
||||
code.and_(tmp, 0x80808080);
|
||||
code.imul(tmp, tmp, 0x00204081);
|
||||
code.shr(tmp, 12);
|
||||
|
@ -410,11 +410,11 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.or_(result, tmp);
|
||||
}
|
||||
|
||||
code.mov(tmp, dword[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.mov(tmp, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
code.shl(tmp, 27);
|
||||
code.or_(result, tmp);
|
||||
|
||||
code.mov(tmp2, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.mov(tmp2, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pext(tmp2, tmp2, tmp);
|
||||
|
@ -426,7 +426,7 @@ void A32EmitX64::EmitA32GetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
code.or_(result, tmp2);
|
||||
|
||||
code.or_(result, dword[r15 + offsetof(A32JitState, cpsr_jaifm)]);
|
||||
code.or_(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)]);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -444,7 +444,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
// cpsr_q
|
||||
code.bt(cpsr, 27);
|
||||
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
|
||||
// cpsr_nzcv
|
||||
code.mov(tmp, cpsr);
|
||||
|
@ -456,12 +456,12 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.imul(tmp, tmp, NZCV::to_x64_multiplier);
|
||||
code.and_(tmp, NZCV::x64_mask);
|
||||
}
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], tmp);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], tmp);
|
||||
|
||||
// cpsr_jaifm
|
||||
code.mov(tmp, cpsr);
|
||||
code.and_(tmp, 0x010001DF);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_jaifm)], tmp);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_jaifm)], tmp);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
// cpsr_et and cpsr_ge
|
||||
|
@ -469,7 +469,7 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
// This mask is 0x7FFF0000, because we do not want the MSB to be sign extended to the upper dword.
|
||||
static_assert((A32::LocationDescriptor::FPSCR_MODE_MASK & ~0x7FFF0000) == 0);
|
||||
|
||||
code.and_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000));
|
||||
code.and_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0x7FFF0000));
|
||||
code.mov(tmp, 0x000f0220);
|
||||
code.pext(cpsr, cpsr, tmp);
|
||||
code.mov(tmp.cvt64(), 0x01010101'00000003ull);
|
||||
|
@ -479,14 +479,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(tmp2.cvt64(), tmp.cvt64());
|
||||
code.sub(tmp.cvt64(), cpsr.cvt64());
|
||||
code.xor_(tmp.cvt64(), tmp2.cvt64());
|
||||
code.or_(qword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64());
|
||||
code.or_(qword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp.cvt64());
|
||||
} else {
|
||||
code.and_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000));
|
||||
code.and_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], u32(0xFFFF0000));
|
||||
code.mov(tmp, cpsr);
|
||||
code.and_(tmp, 0x00000220);
|
||||
code.imul(tmp, tmp, 0x00900000);
|
||||
code.shr(tmp, 28);
|
||||
code.or_(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], tmp);
|
||||
code.or_(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], tmp);
|
||||
|
||||
code.and_(cpsr, 0x000f0000);
|
||||
code.shr(cpsr, 16);
|
||||
|
@ -495,14 +495,14 @@ void A32EmitX64::EmitA32SetCpsr(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(tmp, 0x80808080);
|
||||
code.sub(tmp, cpsr);
|
||||
code.xor_(tmp, 0x80808080);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], tmp);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], tmp);
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCpsrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -510,7 +510,7 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (args[0].IsImmediate()) {
|
||||
const u32 imm = args[0].GetImmediateU32();
|
||||
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -518,14 +518,14 @@ void A32EmitX64::EmitA32SetCpsrNZCVRaw(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.shr(a, 28);
|
||||
code.mov(b, NZCV::x64_mask);
|
||||
code.pdep(a, a, b);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.imul(a, a, NZCV::to_x64_multiplier);
|
||||
code.and_(a, NZCV::x64_mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -534,25 +534,25 @@ void A32EmitX64::EmitA32SetCpsrNZCVQ(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (args[0].IsImmediate()) {
|
||||
const u32 imm = args[0].GetImmediateU32();
|
||||
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], NZCV::ToX64(imm));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], u8((imm & 0x08000000) != 0 ? 1 : 0));
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
code.mov(b, NZCV::x64_mask);
|
||||
code.pdep(a, a, b);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
code.shr(a, 28);
|
||||
code.setc(code.byte[r15 + offsetof(A32JitState, cpsr_q)]);
|
||||
code.setc(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)]);
|
||||
code.imul(a, a, NZCV::to_x64_multiplier);
|
||||
code.and_(a, NZCV::x64_mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)], a);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -562,10 +562,10 @@ void A32EmitX64::EmitA32SetCpsrNZ(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
const Xbyak::Reg32 nz = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.movzx(tmp, code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1]);
|
||||
code.movzx(tmp, code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1]);
|
||||
code.and_(tmp, 1);
|
||||
code.or_(tmp, nz);
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8());
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], tmp.cvt8());
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -575,11 +575,11 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
if (args[1].IsImmediate()) {
|
||||
const bool c = args[1].GetImmediateU1();
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
} else {
|
||||
const Xbyak::Reg8 c = ctx.reg_alloc.UseGpr(args[1]).cvt8();
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], c);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg32 nz = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
@ -588,19 +588,19 @@ void A32EmitX64::EmitA32SetCpsrNZC(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
const bool c = args[1].GetImmediateU1();
|
||||
|
||||
code.or_(nz, c);
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
} else {
|
||||
const Xbyak::Reg32 c = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
|
||||
code.or_(nz, c);
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv) + 1], nz.cvt8());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void EmitGetFlag(BlockOfCode& code, A32EmitContext& ctx, IR::Inst* inst, size_t flag_bit) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_nzcv)]);
|
||||
if (flag_bit != 0) {
|
||||
code.shr(result, static_cast<int>(flag_bit));
|
||||
}
|
||||
|
@ -616,18 +616,18 @@ void A32EmitX64::EmitA32OrQFlag(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (args[0].IsImmediate()) {
|
||||
if (args[0].GetImmediateU1()) {
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_q)], 1);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], 1);
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Reg8 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt8();
|
||||
|
||||
code.or_(code.byte[r15 + offsetof(A32JitState, cpsr_q)], to_store);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_q)], to_store);
|
||||
}
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code.movd(result, dword[r15 + offsetof(A32JitState, cpsr_ge)]);
|
||||
code.movd(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
|
@ -637,10 +637,10 @@ void A32EmitX64::EmitA32SetGEFlags(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
if (args[0].IsInXmm()) {
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[0]);
|
||||
code.movd(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
code.movd(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
} else {
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseGpr(args[0]).cvt32();
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], to_store);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -654,7 +654,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
|
|||
ge |= mcl::bit::get_bit<17>(imm) ? 0x0000FF00 : 0;
|
||||
ge |= mcl::bit::get_bit<16>(imm) ? 0x000000FF : 0;
|
||||
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], ge);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], ge);
|
||||
} else if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 b = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -663,7 +663,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
|
|||
code.shr(a, 16);
|
||||
code.pdep(a, a, b);
|
||||
code.imul(a, a, 0xFF);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
|
||||
} else {
|
||||
const Xbyak::Reg32 a = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
|
||||
|
@ -672,7 +672,7 @@ void A32EmitX64::EmitA32SetGEFlagsCompressed(A32EmitContext& ctx, IR::Inst* inst
|
|||
code.imul(a, a, 0x00204081);
|
||||
code.and_(a, 0x01010101);
|
||||
code.imul(a, a, 0xFF);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, cpsr_ge)], a);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, cpsr_ge)], a);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -716,7 +716,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
const u32 new_upper = upper_without_t | (mcl::bit::get_bit<0>(new_pc) ? 1 : 0);
|
||||
|
||||
code.mov(MJitStateReg(A32::Reg::PC), new_pc & mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
} else {
|
||||
const Xbyak::Reg32 new_pc = ctx.reg_alloc.UseScratchGpr(arg).cvt32();
|
||||
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -728,7 +728,7 @@ void A32EmitX64::EmitA32BXWritePC(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.lea(mask, ptr[mask.cvt64() + mask.cvt64() * 1 - 4]); // mask = pc & 1 ? 0xFFFFFFFE : 0xFFFFFFFC
|
||||
code.and_(new_pc, mask);
|
||||
code.mov(MJitStateReg(A32::Reg::PC), new_pc);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -798,9 +798,9 @@ static u32 GetFpscrImpl(A32JitState* jit_state) {
|
|||
|
||||
void A32EmitX64::EmitA32GetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
code.mov(code.ABI_PARAM1, code.r15);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
|
||||
code.stmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]);
|
||||
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
|
||||
code.CallFunction(&GetFpscrImpl);
|
||||
}
|
||||
|
||||
|
@ -811,15 +811,15 @@ static void SetFpscrImpl(u32 value, A32JitState* jit_state) {
|
|||
void A32EmitX64::EmitA32SetFpscr(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, args[0]);
|
||||
code.mov(code.ABI_PARAM2, code.r15);
|
||||
code.mov(code.ABI_PARAM2, code.ABI_JIT_PTR);
|
||||
|
||||
code.CallFunction(&SetFpscrImpl);
|
||||
code.ldmxcsr(code.dword[code.r15 + offsetof(A32JitState, guest_MXCSR)]);
|
||||
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A32JitState, guest_MXCSR)]);
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32GetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A32JitState, fpsr_nzcv)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
|
@ -833,7 +833,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(tmp, NZCV::x64_mask);
|
||||
code.pext(tmp, value, tmp);
|
||||
code.shl(tmp, 28);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], tmp);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], tmp);
|
||||
|
||||
return;
|
||||
}
|
||||
|
@ -843,7 +843,7 @@ void A32EmitX64::EmitA32SetFpscrNZCV(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
code.and_(value, NZCV::x64_mask);
|
||||
code.imul(value, value, NZCV::from_x64_multiplier);
|
||||
code.and_(value, NZCV::arm_mask);
|
||||
code.mov(dword[r15 + offsetof(A32JitState, fpsr_nzcv)], value);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, fpsr_nzcv)], value);
|
||||
}
|
||||
|
||||
static void EmitCoprocessorException() {
|
||||
|
@ -1155,7 +1155,7 @@ void A32EmitX64::EmitSetUpperLocationDescriptor(IR::LocationDescriptor new_locat
|
|||
}();
|
||||
|
||||
if (old_upper != new_upper) {
|
||||
code.mov(dword[r15 + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, upper_location_descriptor)], new_upper);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1165,32 +1165,28 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDesc
|
|||
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
|
||||
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
|
||||
code.ReturnFromRunCode();
|
||||
return;
|
||||
}
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
|
||||
patch_information[terminal.next].jg.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJg(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJg(terminal.next);
|
||||
}
|
||||
} else {
|
||||
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
|
||||
|
||||
patch_information[terminal.next].jz.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJz(terminal.next, next_bb->entrypoint);
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
patch_information[terminal.next].jg.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJg(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJg(terminal.next);
|
||||
}
|
||||
} else {
|
||||
EmitPatchJz(terminal.next);
|
||||
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
|
||||
patch_information[terminal.next].jz.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJz(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJz(terminal.next);
|
||||
}
|
||||
}
|
||||
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
|
||||
PushRSBHelper(rax, rbx, terminal.next);
|
||||
code.ForceReturnFromRunCode();
|
||||
}
|
||||
|
||||
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
|
||||
PushRSBHelper(rax, rbx, terminal.next);
|
||||
code.ForceReturnFromRunCode();
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
|
@ -1199,14 +1195,13 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::Location
|
|||
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
|
||||
code.mov(MJitStateReg(A32::Reg::PC), A32::LocationDescriptor{terminal.next}.PC());
|
||||
code.ReturnFromRunCode();
|
||||
return;
|
||||
}
|
||||
|
||||
patch_information[terminal.next].jmp.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJmp(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJmp(terminal.next);
|
||||
patch_information[terminal.next].jmp.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJmp(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJmp(terminal.next);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1245,7 +1240,7 @@ void A32EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
|
|||
}
|
||||
|
||||
void A32EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
code.cmp(dword[r15 + offsetof(A32JitState, halt_reason)], 0);
|
||||
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], 0);
|
||||
code.jne(code.GetForceReturnFromRunCodeAddress());
|
||||
EmitTerminal(terminal.else_, initial_location, is_single_step);
|
||||
}
|
||||
|
|
|
@ -168,7 +168,7 @@ void A32EmitX64::EmitA32WriteMemory64(A32EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
void A32EmitX64::EmitA32ClearExclusive(A32EmitContext&, IR::Inst*) {
|
||||
code.mov(code.byte[r15 + offsetof(A32JitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A32JitState, exclusive_state)], u8(0));
|
||||
}
|
||||
|
||||
void A32EmitX64::EmitA32ExclusiveReadMemory8(A32EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -244,14 +244,14 @@ void A32EmitX64::EmitCheckMemoryAbort(A32EmitContext& ctx, IR::Inst* inst, Xbyak
|
|||
|
||||
const A32::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
|
||||
|
||||
code.test(dword[r15 + offsetof(A32JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
|
||||
code.test(dword[code.ABI_JIT_PTR + offsetof(A32JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
|
||||
if (end) {
|
||||
code.jz(*end, code.T_NEAR);
|
||||
} else {
|
||||
code.jz(skip, code.T_NEAR);
|
||||
}
|
||||
EmitSetUpperLocationDescriptor(current_location, ctx.Location());
|
||||
code.mov(dword[r15 + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC());
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A32JitState, Reg) + sizeof(u32) * 15], current_location.PC());
|
||||
code.ForceReturnFromRunCode();
|
||||
code.L(skip);
|
||||
}
|
||||
|
|
|
@ -80,12 +80,12 @@ A64EmitX64::BlockDescriptor A64EmitX64::Emit(IR::Block& block) noexcept {
|
|||
|
||||
const boost::container::static_vector<HostLoc, 28> gpr_order = [this] {
|
||||
boost::container::static_vector<HostLoc, 28> gprs{any_gpr};
|
||||
if (conf.page_table) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||
}
|
||||
if (conf.fastmem_pointer) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R13));
|
||||
}
|
||||
if (conf.page_table) {
|
||||
gprs.erase(std::find(gprs.begin(), gprs.end(), HostLoc::R14));
|
||||
}
|
||||
return gprs;
|
||||
}();
|
||||
|
||||
|
@ -192,10 +192,10 @@ void A64EmitX64::GenTerminalHandlers() {
|
|||
const auto calculate_location_descriptor = [this] {
|
||||
// This calculation has to match up with A64::LocationDescriptor::UniqueHash
|
||||
// TODO: Optimization is available here based on known state of fpcr.
|
||||
code.mov(rbp, qword[r15 + offsetof(A64JitState, pc)]);
|
||||
code.mov(rbp, qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)]);
|
||||
code.mov(rcx, A64::LocationDescriptor::pc_mask);
|
||||
code.and_(rcx, rbp);
|
||||
code.mov(ebx, dword[r15 + offsetof(A64JitState, fpcr)]);
|
||||
code.mov(ebx, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
|
||||
code.and_(ebx, A64::LocationDescriptor::fpcr_mask);
|
||||
code.shl(rbx, A64::LocationDescriptor::fpcr_shift);
|
||||
code.or_(rbx, rcx);
|
||||
|
@ -207,17 +207,17 @@ void A64EmitX64::GenTerminalHandlers() {
|
|||
code.align();
|
||||
terminal_handler_pop_rsb_hint = code.getCurr<const void*>();
|
||||
calculate_location_descriptor();
|
||||
code.mov(eax, dword[r15 + offsetof(A64JitState, rsb_ptr)]);
|
||||
code.dec(eax);
|
||||
code.mov(eax, dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)]);
|
||||
code.sub(eax, 1);
|
||||
code.and_(eax, u32(A64JitState::RSBPtrMask));
|
||||
code.mov(dword[r15 + offsetof(A64JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[r15 + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_ptr)], eax);
|
||||
code.cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_location_descriptors) + rax * sizeof(u64)]);
|
||||
if (conf.HasOptimization(OptimizationFlag::FastDispatch)) {
|
||||
code.jne(rsb_cache_miss, code.T_NEAR);
|
||||
} else {
|
||||
code.jne(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.mov(rax, qword[r15 + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.mov(rax, qword[code.ABI_JIT_PTR + offsetof(A64JitState, rsb_codeptrs) + rax * sizeof(u64)]);
|
||||
code.jmp(rax);
|
||||
PerfMapRegister(terminal_handler_pop_rsb_hint, code.getCurr(), "a64_terminal_handler_pop_rsb_hint");
|
||||
|
||||
|
@ -272,7 +272,7 @@ void A64EmitX64::EmitA64SetCheckBit(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.shr(result, NZCV::x64_c_flag_bit);
|
||||
code.and_(result, 1);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -281,7 +281,7 @@ void A64EmitX64::EmitA64GetCFlag(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64GetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 nzcv_raw = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.mov(nzcv_raw, dword[r15 + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
code.mov(nzcv_raw, dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)]);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::FastBMI2)) {
|
||||
const Xbyak::Reg32 tmp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
@ -310,20 +310,20 @@ void A64EmitX64::EmitA64SetNZCVRaw(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
code.imul(nzcv_raw, nzcv_raw, NZCV::to_x64_multiplier);
|
||||
code.and_(nzcv_raw, NZCV::x64_mask);
|
||||
}
|
||||
code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], nzcv_raw);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetNZCV(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const Xbyak::Reg32 to_store = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
code.mov(dword[r15 + offsetof(A64JitState, cpsr_nzcv)], to_store);
|
||||
code.mov(dword[code.ABI_JIT_PTR + offsetof(A64JitState, cpsr_nzcv)], to_store);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetW(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
code.mov(result, dword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
|
@ -331,13 +331,13 @@ void A64EmitX64::EmitA64GetX(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
|
||||
code.mov(result, qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code.movd(result, addr);
|
||||
|
@ -346,7 +346,7 @@ void A64EmitX64::EmitA64GetS(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code.movq(result, addr);
|
||||
|
@ -355,7 +355,7 @@ void A64EmitX64::EmitA64GetD(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm result = ctx.reg_alloc.ScratchXmm();
|
||||
code.movaps(result, addr);
|
||||
|
@ -364,13 +364,13 @@ void A64EmitX64::EmitA64GetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64GetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr();
|
||||
code.mov(result, qword[r15 + offsetof(A64JitState, sp)]);
|
||||
code.mov(result, qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64GetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.mov(result, dword[r15 + offsetof(A64JitState, fpcr)]);
|
||||
code.mov(result, dword[code.ABI_JIT_PTR + offsetof(A64JitState, fpcr)]);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
||||
|
@ -380,15 +380,15 @@ static u32 GetFPSRImpl(A64JitState* jit_state) {
|
|||
|
||||
void A64EmitX64::EmitA64GetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
ctx.reg_alloc.HostCall(inst);
|
||||
code.mov(code.ABI_PARAM1, code.r15);
|
||||
code.stmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.stmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.CallFunction(GetFPSRImpl);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
if (args[1].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[1].GetImmediateS32());
|
||||
} else {
|
||||
|
@ -402,7 +402,7 @@ void A64EmitX64::EmitA64SetW(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Reg reg = inst->GetArg(0).GetA64RegRef();
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, reg) + sizeof(u64) * static_cast<size_t>(reg)];
|
||||
if (args[1].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[1].GetImmediateS32());
|
||||
} else if (args[1].IsInXmm()) {
|
||||
|
@ -417,7 +417,7 @@ void A64EmitX64::EmitA64SetX(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -430,7 +430,7 @@ void A64EmitX64::EmitA64SetS(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
code.movq(to_store, to_store); // TODO: Remove when able
|
||||
|
@ -440,7 +440,7 @@ void A64EmitX64::EmitA64SetD(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const A64::Vec vec = inst->GetArg(0).GetA64VecRef();
|
||||
const auto addr = xword[r15 + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
const auto addr = xword[code.ABI_JIT_PTR + offsetof(A64JitState, vec) + sizeof(u64) * 2 * static_cast<size_t>(vec)];
|
||||
|
||||
const Xbyak::Xmm to_store = ctx.reg_alloc.UseXmm(args[1]);
|
||||
code.movaps(addr, to_store);
|
||||
|
@ -448,7 +448,7 @@ void A64EmitX64::EmitA64SetQ(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
void A64EmitX64::EmitA64SetSP(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, sp)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, sp)];
|
||||
if (args[0].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[0].GetImmediateS32());
|
||||
} else if (args[0].IsInXmm()) {
|
||||
|
@ -467,9 +467,9 @@ static void SetFPCRImpl(A64JitState* jit_state, u32 value) {
|
|||
void A64EmitX64::EmitA64SetFPCR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
||||
code.mov(code.ABI_PARAM1, code.r15);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.CallFunction(SetFPCRImpl);
|
||||
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
}
|
||||
|
||||
static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
|
||||
|
@ -479,14 +479,14 @@ static void SetFPSRImpl(A64JitState* jit_state, u32 value) {
|
|||
void A64EmitX64::EmitA64SetFPSR(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(nullptr, {}, args[0]);
|
||||
code.mov(code.ABI_PARAM1, code.r15);
|
||||
code.mov(code.ABI_PARAM1, code.ABI_JIT_PTR);
|
||||
code.CallFunction(SetFPSRImpl);
|
||||
code.ldmxcsr(code.dword[code.r15 + offsetof(A64JitState, guest_MXCSR)]);
|
||||
code.ldmxcsr(code.dword[code.ABI_JIT_PTR + offsetof(A64JitState, guest_MXCSR)]);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64SetPC(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
const auto addr = qword[r15 + offsetof(A64JitState, pc)];
|
||||
const auto addr = qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)];
|
||||
if (args[0].FitsInImmediateS32()) {
|
||||
code.mov(addr, args[0].GetImmediateS32());
|
||||
} else if (args[0].IsInXmm()) {
|
||||
|
@ -507,7 +507,7 @@ void A64EmitX64::EmitA64CallSupervisor(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(param[0], imm);
|
||||
});
|
||||
// The kernel would have to execute ERET to get here, which would clear exclusive state.
|
||||
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64ExceptionRaised(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -621,7 +621,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::Interpret terminal, IR::LocationDesc
|
|||
code.SwitchMxcsrOnExit();
|
||||
Devirtualize<&A64::UserCallbacks::InterpreterFallback>(conf.callbacks).EmitCall(code, [&](RegList param) {
|
||||
code.mov(param[0], A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], param[0]);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], param[0]);
|
||||
code.mov(param[1].cvt32(), terminal.num_instructions);
|
||||
});
|
||||
code.ReturnFromRunCode(true); // TODO: Check cycles
|
||||
|
@ -632,61 +632,56 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::ReturnToDispatch, IR::LocationDescri
|
|||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlock terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
|
||||
// Used for patches and linking
|
||||
if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
patch_information[terminal.next].jg.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJg(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJg(terminal.next);
|
||||
}
|
||||
} else {
|
||||
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
|
||||
patch_information[terminal.next].jz.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJz(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJz(terminal.next);
|
||||
}
|
||||
}
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
return;
|
||||
}
|
||||
|
||||
if (conf.enable_cycle_counting) {
|
||||
code.cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
|
||||
patch_information[terminal.next].jg.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJg(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJg(terminal.next);
|
||||
}
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ForceReturnFromRunCode();
|
||||
} else {
|
||||
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
|
||||
|
||||
patch_information[terminal.next].jz.push_back(code.getCurr());
|
||||
if (const auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJz(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJz(terminal.next);
|
||||
}
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
}
|
||||
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.ForceReturnFromRunCode();
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::LinkBlockFast terminal, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (!conf.HasOptimization(OptimizationFlag::BlockLinking) || is_single_step) {
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
return;
|
||||
}
|
||||
|
||||
patch_information[terminal.next].jmp.push_back(code.getCurr());
|
||||
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJmp(terminal.next, next_bb->entrypoint);
|
||||
if (conf.HasOptimization(OptimizationFlag::BlockLinking) && !is_single_step) {
|
||||
patch_information[terminal.next].jmp.push_back(code.getCurr());
|
||||
if (auto next_bb = GetBasicBlock(terminal.next)) {
|
||||
EmitPatchJmp(terminal.next, next_bb->entrypoint);
|
||||
} else {
|
||||
EmitPatchJmp(terminal.next);
|
||||
}
|
||||
} else {
|
||||
EmitPatchJmp(terminal.next);
|
||||
code.mov(rax, A64::LocationDescriptor{terminal.next}.PC());
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ReturnFromRunCode();
|
||||
}
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::PopRSBHint, IR::LocationDescriptor, bool is_single_step) {
|
||||
if (!conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) || is_single_step) {
|
||||
if (conf.HasOptimization(OptimizationFlag::ReturnStackBuffer) && !is_single_step) {
|
||||
code.jmp(terminal_handler_pop_rsb_hint);
|
||||
} else {
|
||||
code.ReturnFromRunCode();
|
||||
return;
|
||||
}
|
||||
|
||||
code.jmp(terminal_handler_pop_rsb_hint);
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::FastDispatchHint, IR::LocationDescriptor, bool is_single_step) {
|
||||
|
@ -723,7 +718,7 @@ void A64EmitX64::EmitTerminalImpl(IR::Term::CheckBit terminal, IR::LocationDescr
|
|||
}
|
||||
|
||||
void A64EmitX64::EmitTerminalImpl(IR::Term::CheckHalt terminal, IR::LocationDescriptor initial_location, bool is_single_step) {
|
||||
code.cmp(dword[r15 + offsetof(A64JitState, halt_reason)], 0);
|
||||
code.cmp(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], 0);
|
||||
code.jne(code.GetForceReturnFromRunCodeAddress());
|
||||
EmitTerminal(terminal.else_, initial_location, is_single_step);
|
||||
}
|
||||
|
@ -734,7 +729,7 @@ void A64EmitX64::EmitPatchJg(const IR::LocationDescriptor& target_desc, CodePtr
|
|||
code.jg(target_code_ptr);
|
||||
} else {
|
||||
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.jg(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.EnsurePatchLocationSize(patch_location, 23);
|
||||
|
@ -746,7 +741,7 @@ void A64EmitX64::EmitPatchJz(const IR::LocationDescriptor& target_desc, CodePtr
|
|||
code.jz(target_code_ptr);
|
||||
} else {
|
||||
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.jz(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.EnsurePatchLocationSize(patch_location, 23);
|
||||
|
@ -758,7 +753,7 @@ void A64EmitX64::EmitPatchJmp(const IR::LocationDescriptor& target_desc, CodePtr
|
|||
code.jmp(target_code_ptr);
|
||||
} else {
|
||||
code.mov(rax, A64::LocationDescriptor{target_desc}.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.jmp(code.GetReturnFromRunCodeAddress());
|
||||
}
|
||||
code.EnsurePatchLocationSize(patch_location, 22);
|
||||
|
|
|
@ -127,10 +127,10 @@ protected:
|
|||
BlockRangeInformation<u64> block_ranges;
|
||||
std::array<FastDispatchEntry, fast_dispatch_table_size> fast_dispatch_table;
|
||||
ankerl::unordered_dense::map<u64, FastmemPatchInfo> fastmem_patch_info;
|
||||
std::map<std::tuple<bool, size_t, int, int>, void (*)()> read_fallbacks;
|
||||
std::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
||||
std::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
||||
std::set<DoNotFastmemMarker> do_not_fastmem;
|
||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> read_fallbacks;
|
||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> write_fallbacks;
|
||||
ankerl::unordered_dense::map<std::tuple<bool, size_t, int, int>, void (*)()> exclusive_write_fallbacks;
|
||||
ankerl::unordered_dense::set<DoNotFastmemMarker> do_not_fastmem;
|
||||
const void* terminal_handler_pop_rsb_hint = nullptr;
|
||||
const void* terminal_handler_fast_dispatch_hint = nullptr;
|
||||
FastDispatchEntry& (*fast_dispatch_table_lookup)(u64) = nullptr;
|
||||
|
|
|
@ -324,7 +324,7 @@ void A64EmitX64::EmitA64WriteMemory128(A64EmitContext& ctx, IR::Inst* inst) {
|
|||
}
|
||||
|
||||
void A64EmitX64::EmitA64ClearExclusive(A64EmitContext&, IR::Inst*) {
|
||||
code.mov(code.byte[r15 + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(A64JitState, exclusive_state)], u8(0));
|
||||
}
|
||||
|
||||
void A64EmitX64::EmitA64ExclusiveReadMemory8(A64EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -416,14 +416,14 @@ void A64EmitX64::EmitCheckMemoryAbort(A64EmitContext&, IR::Inst* inst, Xbyak::La
|
|||
|
||||
const A64::LocationDescriptor current_location{IR::LocationDescriptor{inst->GetArg(0).GetU64()}};
|
||||
|
||||
code.test(dword[r15 + offsetof(A64JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
|
||||
code.test(dword[code.ABI_JIT_PTR + offsetof(A64JitState, halt_reason)], static_cast<u32>(HaltReason::MemoryAbort));
|
||||
if (end) {
|
||||
code.jz(*end, code.T_NEAR);
|
||||
} else {
|
||||
code.jz(skip, code.T_NEAR);
|
||||
}
|
||||
code.mov(rax, current_location.PC());
|
||||
code.mov(qword[r15 + offsetof(A64JitState, pc)], rax);
|
||||
code.mov(qword[code.ABI_JIT_PTR + offsetof(A64JitState, pc)], rax);
|
||||
code.ForceReturnFromRunCode();
|
||||
code.L(skip);
|
||||
}
|
||||
|
|
|
@ -49,16 +49,11 @@ void ABI_PushRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
|
||||
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
for (auto const gpr : regs) {
|
||||
if (HostLocIsGPR(gpr)) {
|
||||
for (auto const gpr : regs)
|
||||
if (HostLocIsGPR(gpr))
|
||||
code.push(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.stack_subtraction != 0) {
|
||||
if (frame_info.stack_subtraction != 0)
|
||||
code.sub(rsp, u32(frame_info.stack_subtraction));
|
||||
}
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset;
|
||||
for (auto const xmm : regs) {
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
|
@ -80,27 +75,22 @@ void ABI_PopRegistersAndAdjustStack(BlockOfCode& code, const size_t frame_size,
|
|||
const size_t num_xmms = std::count_if(regs.begin(), regs.end(), HostLocIsXMM);
|
||||
const FrameInfo frame_info = CalculateFrameInfo(num_gprs, num_xmms, frame_size);
|
||||
|
||||
size_t xmm_offset = frame_info.xmm_offset;
|
||||
for (auto const xmm : regs) {
|
||||
size_t xmm_offset = frame_info.xmm_offset + (num_xmms * XMM_SIZE);
|
||||
for (auto const xmm : mcl::iterator::reverse(regs)) {
|
||||
if (HostLocIsXMM(xmm)) {
|
||||
xmm_offset -= XMM_SIZE;
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
code.vmovaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
|
||||
} else {
|
||||
code.movaps(HostLocToXmm(xmm), code.xword[rsp + xmm_offset]);
|
||||
}
|
||||
xmm_offset += XMM_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
if (frame_info.stack_subtraction != 0) {
|
||||
if (frame_info.stack_subtraction != 0)
|
||||
code.add(rsp, u32(frame_info.stack_subtraction));
|
||||
}
|
||||
|
||||
for (auto const gpr : mcl::iterator::reverse(regs)) {
|
||||
if (HostLocIsGPR(gpr)) {
|
||||
for (auto const gpr : mcl::iterator::reverse(regs))
|
||||
if (HostLocIsGPR(gpr))
|
||||
code.pop(HostLocToReg64(gpr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ABI_PushCalleeSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size_t frame_size) {
|
||||
|
@ -119,6 +109,20 @@ void ABI_PopCallerSaveRegistersAndAdjustStack(BlockOfCode& code, const std::size
|
|||
ABI_PopRegistersAndAdjustStack(code, frame_size, ABI_ALL_CALLER_SAVE);
|
||||
}
|
||||
|
||||
// Windows ABI registers are not in the same allocation algorithm as unix's
|
||||
#ifdef _MSC_VER
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, regs);
|
||||
}
|
||||
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, regs);
|
||||
}
|
||||
#else
|
||||
static consteval size_t ABI_AllCallerSaveSize() noexcept {
|
||||
return ABI_ALL_CALLER_SAVE.max_size();
|
||||
}
|
||||
|
@ -166,24 +170,14 @@ alignas(64) static constinit std::array<HostLoc, ABI_AllCallerSaveSize() - 1> AB
|
|||
};
|
||||
|
||||
void ABI_PushCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
#ifdef _MSC_VER
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, regs);
|
||||
#else
|
||||
ASSUME(size_t(exception) < 32);
|
||||
ABI_PushRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ABI_PopCallerSaveRegistersAndAdjustStackExcept(BlockOfCode& code, const HostLoc exception) {
|
||||
#ifdef _MSC_VER
|
||||
std::vector<HostLoc> regs;
|
||||
std::remove_copy(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end(), std::back_inserter(regs), exception);
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, regs);
|
||||
#else
|
||||
ASSUME(size_t(exception) < 32);
|
||||
ABI_PopRegistersAndAdjustStack(code, 0, ABI_CALLER_SAVED_EXCEPT_TABLE[size_t(exception)]);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -17,6 +17,7 @@ namespace Dynarmic::Backend::X64 {
|
|||
|
||||
class BlockOfCode;
|
||||
|
||||
constexpr HostLoc ABI_JIT_PTR = HostLoc::R15;
|
||||
#ifdef _WIN32
|
||||
|
||||
constexpr HostLoc ABI_RETURN = HostLoc::RAX;
|
||||
|
|
|
@ -36,6 +36,7 @@
|
|||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_JIT_PTR = HostLocToReg64(Dynarmic::Backend::X64::ABI_JIT_PTR);
|
||||
#ifdef _WIN32
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_RETURN = HostLocToReg64(Dynarmic::Backend::X64::ABI_RETURN);
|
||||
const Xbyak::Reg64 BlockOfCode::ABI_PARAM1 = HostLocToReg64(Dynarmic::Backend::X64::ABI_PARAM1);
|
||||
|
@ -322,8 +323,8 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
// that the stack is appropriately aligned for CALLs.
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
|
||||
mov(r15, ABI_PARAM1);
|
||||
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
|
||||
mov(ABI_JIT_PTR, ABI_PARAM1);
|
||||
mov(rbx, ABI_PARAM2); // save temporarily in non-volatile register
|
||||
|
||||
if (cb.enable_cycle_counting) {
|
||||
cb.GetTicksRemaining->EmitCall(*this);
|
||||
|
@ -331,9 +332,11 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], ABI_RETURN);
|
||||
}
|
||||
|
||||
// r14 = page table
|
||||
// r13 = fastmem pointer
|
||||
rcp(*this);
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
|
@ -344,7 +347,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
ABI_PushCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
|
||||
mov(r15, ABI_PARAM1);
|
||||
mov(ABI_JIT_PTR, ABI_PARAM1);
|
||||
|
||||
if (cb.enable_cycle_counting) {
|
||||
mov(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_to_run)], 1);
|
||||
|
@ -353,10 +356,10 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
rcp(*this);
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited, T_NEAR);
|
||||
lock();
|
||||
or_(dword[r15 + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
or_(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], static_cast<u32>(HaltReason::Step));
|
||||
|
||||
SwitchMxcsrOnEntry();
|
||||
jmp(ABI_PARAM2);
|
||||
|
@ -366,7 +369,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
align();
|
||||
return_from_run_code[0] = getCurr<const void*>();
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller);
|
||||
if (cb.enable_cycle_counting) {
|
||||
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
|
@ -378,7 +381,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
align();
|
||||
return_from_run_code[MXCSR_ALREADY_EXITED] = getCurr<const void*>();
|
||||
|
||||
cmp(dword[r15 + jsi.offsetof_halt_reason], 0);
|
||||
cmp(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], 0);
|
||||
jne(return_to_caller_mxcsr_already_exited);
|
||||
if (cb.enable_cycle_counting) {
|
||||
cmp(qword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, cycles_remaining)], 0);
|
||||
|
@ -407,7 +410,7 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
xor_(eax, eax);
|
||||
lock();
|
||||
xchg(dword[r15 + jsi.offsetof_halt_reason], eax);
|
||||
xchg(dword[ABI_JIT_PTR + jsi.offsetof_halt_reason], eax);
|
||||
|
||||
ABI_PopCalleeSaveRegistersAndAdjustStack(*this, sizeof(StackLayout));
|
||||
ret();
|
||||
|
@ -417,22 +420,22 @@ void BlockOfCode::GenRunCode(std::function<void(BlockOfCode&)> rcp) {
|
|||
|
||||
void BlockOfCode::SwitchMxcsrOnEntry() {
|
||||
stmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]);
|
||||
ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
|
||||
}
|
||||
|
||||
void BlockOfCode::SwitchMxcsrOnExit() {
|
||||
stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[rsp + ABI_SHADOW_SPACE + offsetof(StackLayout, save_host_MXCSR)]);
|
||||
}
|
||||
|
||||
void BlockOfCode::EnterStandardASIMD() {
|
||||
stmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
|
||||
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
|
||||
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]);
|
||||
}
|
||||
|
||||
void BlockOfCode::LeaveStandardASIMD() {
|
||||
stmxcsr(dword[r15 + jsi.offsetof_asimd_MXCSR]);
|
||||
ldmxcsr(dword[r15 + jsi.offsetof_guest_MXCSR]);
|
||||
stmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_asimd_MXCSR]);
|
||||
ldmxcsr(dword[ABI_JIT_PTR + jsi.offsetof_guest_MXCSR]);
|
||||
}
|
||||
|
||||
void BlockOfCode::UpdateTicks() {
|
||||
|
|
|
@ -155,6 +155,7 @@ public:
|
|||
void SetCodePtr(CodePtr code_ptr);
|
||||
void EnsurePatchLocationSize(CodePtr begin, size_t size);
|
||||
|
||||
static const Xbyak::Reg64 ABI_JIT_PTR;
|
||||
// ABI registers
|
||||
#ifdef _WIN32
|
||||
static const Xbyak::Reg64 ABI_RETURN;
|
||||
|
|
|
@ -91,19 +91,18 @@ void EmitX64::PushRSBHelper(Xbyak::Reg64 loc_desc_reg, Xbyak::Reg64 index_reg, I
|
|||
? iter->second.entrypoint
|
||||
: code.GetReturnFromRunCodeAddress();
|
||||
|
||||
code.mov(index_reg.cvt32(), dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr]);
|
||||
|
||||
code.mov(index_reg.cvt32(), dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr]);
|
||||
code.mov(loc_desc_reg, target.Value());
|
||||
|
||||
patch_information[target].mov_rcx.push_back(code.getCurr());
|
||||
EmitPatchMovRcx(target_code_ptr);
|
||||
|
||||
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
|
||||
code.mov(qword[r15 + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
|
||||
|
||||
code.add(index_reg.cvt32(), 1);
|
||||
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask));
|
||||
code.mov(dword[r15 + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
|
||||
code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_location_descriptors], loc_desc_reg);
|
||||
code.mov(qword[code.ABI_JIT_PTR + index_reg * 8 + code.GetJitStateInfo().offsetof_rsb_codeptrs], rcx);
|
||||
// Byte size hack
|
||||
DEBUG_ASSERT(code.GetJitStateInfo().rsb_ptr_mask <= 0xFF);
|
||||
code.add(index_reg.cvt32(), 1); //flags trashed, 1 single byte, haswell doesn't care
|
||||
code.and_(index_reg.cvt32(), u32(code.GetJitStateInfo().rsb_ptr_mask)); //trashes flags
|
||||
// Results ready and sort by least needed: give OOO some break
|
||||
code.mov(dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_rsb_ptr], index_reg.cvt32());
|
||||
}
|
||||
|
||||
void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) {
|
||||
|
@ -119,7 +118,7 @@ void EmitX64::EmitVerboseDebuggingOutput(RegAlloc& reg_alloc) {
|
|||
code.movaps(xword[rsp + offsetof(RegisterData, xmms) + 2 * sizeof(u64) * i], Xbyak::Xmm{i});
|
||||
}
|
||||
code.lea(rax, ptr[rsp + sizeof(RegisterData) + offsetof(StackLayout, spill)]);
|
||||
code.mov(xword[rsp + offsetof(RegisterData, spill)], rax);
|
||||
code.mov(qword[rsp + offsetof(RegisterData, spill)], rax);
|
||||
|
||||
reg_alloc.EmitVerboseDebuggingOutput();
|
||||
|
||||
|
@ -285,7 +284,7 @@ void EmitX64::EmitAddCycles(size_t cycles) {
|
|||
Xbyak::Label EmitX64::EmitCond(IR::Cond cond) {
|
||||
Xbyak::Label pass;
|
||||
|
||||
code.mov(eax, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
code.mov(eax, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
|
||||
code.LoadRequiredFlagsForCondFromRax(cond);
|
||||
|
||||
|
|
|
@ -18,24 +18,20 @@ namespace CRC32 = Common::Crypto::CRC32;
|
|||
|
||||
static void EmitCRC32Castagnoli(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE42)) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg value = ctx.reg_alloc.UseGpr(args[1]).changeBit(data_size);
|
||||
|
||||
if (data_size != 64) {
|
||||
code.crc32(crc, value);
|
||||
} else {
|
||||
code.crc32(crc.cvt64(), value);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3.cvt32(), data_size / CHAR_BIT); //zext
|
||||
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32Castagnoli);
|
||||
}
|
||||
|
||||
static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, const int data_size) {
|
||||
|
@ -69,10 +65,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
||||
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 32) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 value = ctx.reg_alloc.UseGpr(args[1]).cvt32();
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -90,10 +83,7 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
||||
} else if (code.HasHostFeature(HostFeature::PCLMULQDQ) && data_size == 64) {
|
||||
const Xbyak::Reg32 crc = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg64 value = ctx.reg_alloc.UseGpr(args[1]);
|
||||
const Xbyak::Xmm xmm_value = ctx.reg_alloc.ScratchXmm();
|
||||
|
@ -111,12 +101,11 @@ static void EmitCRC32ISO(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, co
|
|||
code.pextrd(crc, xmm_value, 2);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, crc);
|
||||
return;
|
||||
} else {
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||
}
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1], {});
|
||||
code.mov(code.ABI_PARAM3, data_size / CHAR_BIT);
|
||||
code.CallFunction(&CRC32::ComputeCRC32ISO);
|
||||
}
|
||||
|
||||
void EmitX64::EmitCRC32Castagnoli8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
|
|
@ -143,7 +143,7 @@ static void EmitConditionalSelect(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
const Xbyak::Reg then_ = ctx.reg_alloc.UseGpr(args[1]).changeBit(bitsize);
|
||||
const Xbyak::Reg else_ = ctx.reg_alloc.UseScratchGpr(args[2]).changeBit(bitsize);
|
||||
|
||||
code.mov(nzcv, dword[r15 + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
code.mov(nzcv, dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_cpsr_nzcv]);
|
||||
|
||||
code.LoadRequiredFlagsForCondFromRax(args[0].GetImmediateCond());
|
||||
|
||||
|
@ -909,11 +909,11 @@ static Xbyak::Reg8 DoCarry(RegAlloc& reg_alloc, Argument& carry_in, IR::Inst* ca
|
|||
}
|
||||
}
|
||||
|
||||
// AL contains flags (after LAHF + SETO sequence)
|
||||
static Xbyak::Reg64 DoNZCV(BlockOfCode& code, RegAlloc& reg_alloc, IR::Inst* nzcv_out) {
|
||||
if (!nzcv_out) {
|
||||
return Xbyak::Reg64{-1};
|
||||
}
|
||||
|
||||
const Xbyak::Reg64 nzcv = reg_alloc.ScratchGpr(HostLoc::RAX);
|
||||
code.xor_(nzcv.cvt32(), nzcv.cvt32());
|
||||
return nzcv;
|
||||
|
@ -1168,7 +1168,7 @@ void EmitX64::EmitUnsignedDiv32(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
code.xor_(eax, eax);
|
||||
code.test(divisor, divisor);
|
||||
code.jz(end);
|
||||
code.jz(end, code.T_NEAR);
|
||||
code.mov(eax, dividend);
|
||||
code.xor_(edx, edx);
|
||||
code.div(divisor);
|
||||
|
@ -1189,7 +1189,7 @@ void EmitX64::EmitUnsignedDiv64(EmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
code.xor_(eax, eax);
|
||||
code.test(divisor, divisor);
|
||||
code.jz(end);
|
||||
code.jz(end, code.T_NEAR);
|
||||
code.mov(rax, dividend);
|
||||
code.xor_(edx, edx);
|
||||
code.div(divisor);
|
||||
|
@ -1568,14 +1568,14 @@ void EmitX64::EmitCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) {
|
|||
} else {
|
||||
const Xbyak::Reg32 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt32();
|
||||
const Xbyak::Reg32 result = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
const Xbyak::Reg32 temp = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
|
||||
// The result of a bsr of zero is undefined, but zf is set after it.
|
||||
code.bsr(result, source);
|
||||
code.mov(source, 0xFFFFFFFF);
|
||||
code.cmovz(result, source);
|
||||
code.neg(result);
|
||||
code.add(result, 31);
|
||||
|
||||
code.mov(temp, 32);
|
||||
code.xor_(result, 31);
|
||||
code.test(source, source);
|
||||
code.cmove(result, temp);
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
}
|
||||
|
@ -1592,14 +1592,14 @@ void EmitX64::EmitCountLeadingZeros64(EmitContext& ctx, IR::Inst* inst) {
|
|||
} else {
|
||||
const Xbyak::Reg64 source = ctx.reg_alloc.UseScratchGpr(args[0]).cvt64();
|
||||
const Xbyak::Reg64 result = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||
const Xbyak::Reg64 temp = ctx.reg_alloc.ScratchGpr().cvt64();
|
||||
|
||||
// The result of a bsr of zero is undefined, but zf is set after it.
|
||||
code.bsr(result, source);
|
||||
code.mov(source.cvt32(), 0xFFFFFFFF);
|
||||
code.cmovz(result.cvt32(), source.cvt32());
|
||||
code.neg(result.cvt32());
|
||||
code.add(result.cvt32(), 63);
|
||||
|
||||
code.mov(temp.cvt32(), 64);
|
||||
code.xor_(result.cvt32(), 63);
|
||||
code.test(source, source);
|
||||
code.cmove(result.cvt32(), temp.cvt32());
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -712,12 +712,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
|
||||
#ifdef _WIN32
|
||||
code.lea(rsp, ptr[rsp - (16 + ABI_SHADOW_SPACE)]);
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
|
||||
code.CallFunction(fallback_fn);
|
||||
code.add(rsp, 16 + ABI_SHADOW_SPACE);
|
||||
#else
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(fallback_fn);
|
||||
#endif
|
||||
code.movq(result, code.ABI_RETURN);
|
||||
|
@ -821,12 +821,12 @@ static void EmitFPMulAdd(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
code.mov(code.ABI_PARAM4.cvt32(), ctx.FPCR().Value());
|
||||
#ifdef _WIN32
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE], rax);
|
||||
code.CallFunction(fallback_fn);
|
||||
ctx.reg_alloc.ReleaseStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
#else
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(fallback_fn);
|
||||
#endif
|
||||
}
|
||||
|
@ -945,7 +945,7 @@ static void EmitFPRecipEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRecipEstimate<FPT>);
|
||||
}
|
||||
|
||||
|
@ -968,7 +968,7 @@ static void EmitFPRecipExponent(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRecipExponent<FPT>);
|
||||
}
|
||||
|
||||
|
@ -1026,7 +1026,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
code.movq(code.ABI_PARAM1, operand1);
|
||||
code.movq(code.ABI_PARAM2, operand2);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRecipStepFused<FPT>);
|
||||
code.movq(result, code.ABI_RETURN);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
|
@ -1055,7 +1055,7 @@ static void EmitFPRecipStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRecipStepFused<FPT>);
|
||||
}
|
||||
|
||||
|
@ -1119,7 +1119,7 @@ static void EmitFPRound(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst, siz
|
|||
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.CallFunction(lut.at(std::make_tuple(fsize, rounding_mode, exact)));
|
||||
}
|
||||
|
@ -1206,7 +1206,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
}
|
||||
|
||||
// a > 0 && a < 0x00800000;
|
||||
code.dec(tmp);
|
||||
code.sub(tmp, 1);
|
||||
code.cmp(tmp, 0x007FFFFF);
|
||||
code.jb(fallback, code.T_NEAR); //within -127,128
|
||||
needs_fallback = true;
|
||||
|
@ -1284,7 +1284,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
ABI_PushCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
code.movq(code.ABI_PARAM1, operand);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
|
||||
code.movq(result, rax);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
|
@ -1298,7 +1298,7 @@ static void EmitFPRSqrtEstimate(BlockOfCode& code, EmitContext& ctx, IR::Inst* i
|
|||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM3, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRSqrtEstimate<FPT>);
|
||||
}
|
||||
}
|
||||
|
@ -1368,7 +1368,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
code.movq(code.ABI_PARAM1, operand1);
|
||||
code.movq(code.ABI_PARAM2, operand2);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
|
||||
code.movq(result, code.ABI_RETURN);
|
||||
ABI_PopCallerSaveRegistersAndAdjustStackExcept(code, HostLocXmmIdx(result.getIdx()));
|
||||
|
@ -1398,7 +1398,7 @@ static void EmitFPRSqrtStepFused(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
|
||||
ctx.reg_alloc.HostCall(inst, args[0], args[1]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPRSqrtStepFused<FPT>);
|
||||
}
|
||||
|
||||
|
@ -1511,7 +1511,7 @@ void EmitX64::EmitFPHalfToDouble(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u64, u16>);
|
||||
}
|
||||
|
||||
|
@ -1535,7 +1535,7 @@ void EmitX64::EmitFPHalfToSingle(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u32, u16>);
|
||||
}
|
||||
|
||||
|
@ -1556,7 +1556,7 @@ void EmitX64::EmitFPSingleToDouble(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u64, u32>);
|
||||
}
|
||||
}
|
||||
|
@ -1581,7 +1581,7 @@ void EmitX64::EmitFPSingleToHalf(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u16, u32>);
|
||||
}
|
||||
|
||||
|
@ -1595,7 +1595,7 @@ void EmitX64::EmitFPDoubleToHalf(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u16, u64>);
|
||||
}
|
||||
|
||||
|
@ -1616,7 +1616,7 @@ void EmitX64::EmitFPDoubleToSingle(EmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.mov(code.ABI_PARAM2.cvt32(), ctx.FPCR().Value());
|
||||
code.mov(code.ABI_PARAM3.cvt32(), static_cast<u32>(rounding_mode));
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.CallFunction(&FP::FPConvert<u32, u64>);
|
||||
}
|
||||
}
|
||||
|
@ -1757,7 +1757,7 @@ static void EmitFPToFixed(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst) {
|
|||
mp::cartesian_product<fbits_list, rounding_list>{});
|
||||
|
||||
ctx.reg_alloc.HostCall(inst, args[0]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM2, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), ctx.FPCR().Value());
|
||||
code.CallFunction(lut.at(std::make_tuple(fbits, rounding_mode)));
|
||||
}
|
||||
|
|
|
@ -28,27 +28,24 @@ std::optional<AxxEmitX64::DoNotFastmemMarker> AxxEmitX64::ShouldFastmem(AxxEmitC
|
|||
|
||||
FakeCall AxxEmitX64::FastmemCallback(u64 rip_) {
|
||||
const auto iter = fastmem_patch_info.find(rip_);
|
||||
|
||||
if (iter == fastmem_patch_info.end()) {
|
||||
if (iter != fastmem_patch_info.end()) {
|
||||
FakeCall result{
|
||||
.call_rip = iter->second.callback,
|
||||
.ret_rip = iter->second.resume_rip,
|
||||
};
|
||||
if (iter->second.recompile) {
|
||||
const auto marker = iter->second.marker;
|
||||
do_not_fastmem.insert(marker);
|
||||
InvalidateBasicBlocks({std::get<0>(marker)});
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
fmt::print("dynarmic: Segfault happened within JITted code at rip = {:016x}\n", rip_);
|
||||
fmt::print("Segfault wasn't at a fastmem patch location!\n");
|
||||
fmt::print("Now dumping code.......\n\n");
|
||||
Common::DumpDisassembledX64((void*)(rip_ & ~u64(0xFFF)), 0x1000);
|
||||
ASSERT_FALSE("iter != fastmem_patch_info.end()");
|
||||
}
|
||||
|
||||
FakeCall result{
|
||||
.call_rip = iter->second.callback,
|
||||
.ret_rip = iter->second.resume_rip,
|
||||
};
|
||||
|
||||
if (iter->second.recompile) {
|
||||
const auto marker = iter->second.marker;
|
||||
do_not_fastmem.insert(marker);
|
||||
InvalidateBasicBlocks({std::get<0>(marker)});
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template<std::size_t bitsize, auto callback>
|
||||
|
@ -95,7 +92,7 @@ void AxxEmitX64::EmitMemoryRead(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
if (fastmem_marker) {
|
||||
// Use fastmem
|
||||
bool require_abort_handling;
|
||||
bool require_abort_handling = false;
|
||||
const auto src_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
|
||||
|
||||
const auto location = EmitReadMemoryMov<bitsize>(code, value_idx, src_ptr, ordered);
|
||||
|
@ -182,7 +179,7 @@ void AxxEmitX64::EmitMemoryWrite(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
if (fastmem_marker) {
|
||||
// Use fastmem
|
||||
bool require_abort_handling;
|
||||
bool require_abort_handling = false;
|
||||
const auto dest_ptr = EmitFastmemVAddr(code, ctx, *abort, vaddr, require_abort_handling);
|
||||
|
||||
const auto location = EmitWriteMemoryMov<bitsize>(code, dest_ptr, value_idx, ordered);
|
||||
|
@ -230,7 +227,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
|
||||
ctx.reg_alloc.HostCall(inst, {}, args[1]);
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
if (ordered) {
|
||||
code.mfence();
|
||||
|
@ -248,7 +245,7 @@ void AxxEmitX64::EmitExclusiveReadMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
ctx.reg_alloc.EndOfAllocScope();
|
||||
ctx.reg_alloc.HostCall(nullptr);
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
ctx.reg_alloc.AllocStackSpace(16 + ABI_SHADOW_SPACE);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE]);
|
||||
|
@ -288,9 +285,9 @@ void AxxEmitX64::EmitExclusiveWriteMemory(AxxEmitContext& ctx, IR::Inst* inst) {
|
|||
Xbyak::Label end;
|
||||
|
||||
code.mov(code.ABI_RETURN, u32(1));
|
||||
code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.je(end);
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(code.ABI_PARAM1, reinterpret_cast<u64>(&conf));
|
||||
if constexpr (bitsize != 128) {
|
||||
using T = mcl::unsigned_integer_of_size<bitsize>;
|
||||
|
@ -358,7 +355,7 @@ void AxxEmitX64::EmitExclusiveReadMemoryInline(AxxEmitContext& ctx, IR::Inst* in
|
|||
|
||||
EmitExclusiveLock(code, conf, tmp, tmp2.cvt32());
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(1));
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
|
||||
code.mov(qword[tmp], vaddr);
|
||||
|
||||
|
@ -442,14 +439,14 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorAddressPointer(conf.global_monitor, conf.processor_id)));
|
||||
code.mov(status, u32(1));
|
||||
code.cmp(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.cmp(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.je(*end, code.T_NEAR);
|
||||
code.cmp(qword[tmp], vaddr);
|
||||
code.jne(*end, code.T_NEAR);
|
||||
|
||||
EmitExclusiveTestAndClear(code, conf, vaddr, tmp, rax);
|
||||
|
||||
code.mov(code.byte[r15 + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(code.byte[code.ABI_JIT_PTR + offsetof(AxxJitState, exclusive_state)], u8(0));
|
||||
code.mov(tmp, mcl::bit_cast<u64>(GetExclusiveMonitorValuePointer(conf.global_monitor, conf.processor_id)));
|
||||
|
||||
if constexpr (bitsize == 128) {
|
||||
|
@ -504,7 +501,6 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
}
|
||||
|
||||
code.setnz(status.cvt8());
|
||||
|
||||
ctx.deferred_emits.emplace_back([=, this] {
|
||||
code.L(*abort);
|
||||
code.call(wrapped_fn);
|
||||
|
@ -518,24 +514,21 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
|||
conf.recompile_on_exclusive_fastmem_failure,
|
||||
});
|
||||
|
||||
code.cmp(al, 0);
|
||||
code.xor_(status.cvt32(), status.cvt32()); //dep-break
|
||||
code.test(code.al, code.al);
|
||||
code.setz(status.cvt8());
|
||||
code.movzx(status.cvt32(), status.cvt8());
|
||||
code.jmp(*end, code.T_NEAR);
|
||||
});
|
||||
} else {
|
||||
code.call(wrapped_fn);
|
||||
code.cmp(al, 0);
|
||||
code.xor_(status.cvt32(), status.cvt32()); //dep-break
|
||||
code.test(code.al, code.al);
|
||||
code.setz(status.cvt8());
|
||||
code.movzx(status.cvt32(), status.cvt8());
|
||||
}
|
||||
|
||||
code.L(*end);
|
||||
|
||||
EmitExclusiveUnlock(code, conf, tmp, eax);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, status);
|
||||
|
||||
EmitCheckMemoryAbort(ctx, inst);
|
||||
}
|
||||
|
||||
|
|
|
@ -46,26 +46,25 @@ void EmitDetectMisalignedVAddr(BlockOfCode& code, EmitContext& ctx, size_t bitsi
|
|||
|
||||
code.test(vaddr, align_mask);
|
||||
|
||||
if (!ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
||||
if (ctx.conf.only_detect_misalignment_via_page_table_on_page_boundary) {
|
||||
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
|
||||
|
||||
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
|
||||
|
||||
code.jnz(*detect_boundary, code.T_NEAR);
|
||||
code.L(*resume);
|
||||
|
||||
ctx.deferred_emits.emplace_back([=, &code] {
|
||||
code.L(*detect_boundary);
|
||||
code.mov(tmp, vaddr);
|
||||
code.and_(tmp, page_align_mask);
|
||||
code.cmp(tmp, page_align_mask);
|
||||
code.jne(*resume, code.T_NEAR);
|
||||
// NOTE: We expect to fallthrough into abort code here.
|
||||
});
|
||||
} else {
|
||||
code.jnz(abort, code.T_NEAR);
|
||||
return;
|
||||
}
|
||||
|
||||
const u32 page_align_mask = static_cast<u32>(page_size - 1) & ~align_mask;
|
||||
|
||||
SharedLabel detect_boundary = GenSharedLabel(), resume = GenSharedLabel();
|
||||
|
||||
code.jnz(*detect_boundary, code.T_NEAR);
|
||||
code.L(*resume);
|
||||
|
||||
ctx.deferred_emits.emplace_back([=, &code] {
|
||||
code.L(*detect_boundary);
|
||||
code.mov(tmp, vaddr);
|
||||
code.and_(tmp, page_align_mask);
|
||||
code.cmp(tmp, page_align_mask);
|
||||
code.jne(*resume, code.T_NEAR);
|
||||
// NOTE: We expect to fallthrough into abort code here.
|
||||
});
|
||||
}
|
||||
|
||||
template<typename EmitContext>
|
||||
|
@ -202,7 +201,7 @@ template<std::size_t bitsize>
|
|||
const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::RegExp& addr, bool ordered) {
|
||||
if (ordered) {
|
||||
if constexpr (bitsize != 128) {
|
||||
code.xor_(Xbyak::Reg32{value_idx}, Xbyak::Reg32{value_idx});
|
||||
code.xor_(Xbyak::Reg32(value_idx), Xbyak::Reg32(value_idx));
|
||||
} else {
|
||||
code.xor_(eax, eax);
|
||||
code.xor_(ebx, ebx);
|
||||
|
@ -214,59 +213,59 @@ const void* EmitReadMemoryMov(BlockOfCode& code, int value_idx, const Xbyak::Reg
|
|||
switch (bitsize) {
|
||||
case 8:
|
||||
code.lock();
|
||||
code.xadd(code.byte[addr], Xbyak::Reg32{value_idx}.cvt8());
|
||||
code.xadd(code.byte[addr], Xbyak::Reg32(value_idx).cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.lock();
|
||||
code.xadd(word[addr], Xbyak::Reg16{value_idx});
|
||||
code.xadd(word[addr], Xbyak::Reg64(value_idx).cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.lock();
|
||||
code.xadd(dword[addr], Xbyak::Reg32{value_idx});
|
||||
code.xadd(dword[addr], Xbyak::Reg64(value_idx).cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.lock();
|
||||
code.xadd(qword[addr], Xbyak::Reg64{value_idx});
|
||||
code.xadd(qword[addr], Xbyak::Reg64(value_idx));
|
||||
break;
|
||||
case 128:
|
||||
code.lock();
|
||||
code.cmpxchg16b(xword[addr]);
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
code.movq(Xbyak::Xmm{value_idx}, rax);
|
||||
code.pinsrq(Xbyak::Xmm{value_idx}, rdx, 1);
|
||||
code.movq(Xbyak::Xmm(value_idx), rax);
|
||||
code.pinsrq(Xbyak::Xmm(value_idx), rdx, 1);
|
||||
} else {
|
||||
code.movq(Xbyak::Xmm{value_idx}, rax);
|
||||
code.movq(Xbyak::Xmm(value_idx), rax);
|
||||
code.movq(xmm0, rdx);
|
||||
code.punpcklqdq(Xbyak::Xmm{value_idx}, xmm0);
|
||||
code.punpcklqdq(Xbyak::Xmm(value_idx), xmm0);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
} else {
|
||||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.movzx(Xbyak::Reg64(value_idx).cvt32(), code.byte[addr]);
|
||||
break;
|
||||
case 16:
|
||||
code.movzx(Xbyak::Reg64(value_idx).cvt32(), word[addr]);
|
||||
break;
|
||||
case 32:
|
||||
code.mov(Xbyak::Reg64(value_idx).cvt32(), dword[addr]);
|
||||
break;
|
||||
case 64:
|
||||
code.mov(Xbyak::Reg64(value_idx), qword[addr]);
|
||||
break;
|
||||
case 128:
|
||||
code.movups(Xbyak::Xmm(value_idx), xword[addr]);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.movzx(Xbyak::Reg32{value_idx}, code.byte[addr]);
|
||||
break;
|
||||
case 16:
|
||||
code.movzx(Xbyak::Reg32{value_idx}, word[addr]);
|
||||
break;
|
||||
case 32:
|
||||
code.mov(Xbyak::Reg32{value_idx}, dword[addr]);
|
||||
break;
|
||||
case 64:
|
||||
code.mov(Xbyak::Reg64{value_idx}, qword[addr]);
|
||||
break;
|
||||
case 128:
|
||||
code.movups(Xbyak::Xmm{value_idx}, xword[addr]);
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
template<std::size_t bitsize>
|
||||
|
@ -276,10 +275,10 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
|
|||
code.xor_(eax, eax);
|
||||
code.xor_(edx, edx);
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
code.movq(rbx, Xbyak::Xmm{value_idx});
|
||||
code.pextrq(rcx, Xbyak::Xmm{value_idx}, 1);
|
||||
code.movq(rbx, Xbyak::Xmm(value_idx));
|
||||
code.pextrq(rcx, Xbyak::Xmm(value_idx), 1);
|
||||
} else {
|
||||
code.movaps(xmm0, Xbyak::Xmm{value_idx});
|
||||
code.movaps(xmm0, Xbyak::Xmm(value_idx));
|
||||
code.movq(rbx, xmm0);
|
||||
code.punpckhqdq(xmm0, xmm0);
|
||||
code.movq(rcx, xmm0);
|
||||
|
@ -289,16 +288,16 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
|
|||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.xchg(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8());
|
||||
code.xchg(code.byte[addr], Xbyak::Reg64(value_idx).cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.xchg(word[addr], Xbyak::Reg16{value_idx});
|
||||
code.xchg(word[addr], Xbyak::Reg64(value_idx).cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.xchg(dword[addr], Xbyak::Reg32{value_idx});
|
||||
code.xchg(dword[addr], Xbyak::Reg64(value_idx).cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.xchg(qword[addr], Xbyak::Reg64{value_idx});
|
||||
code.xchg(qword[addr], Xbyak::Reg64(value_idx));
|
||||
break;
|
||||
case 128: {
|
||||
Xbyak::Label loop;
|
||||
|
@ -312,29 +311,29 @@ const void* EmitWriteMemoryMov(BlockOfCode& code, const Xbyak::RegExp& addr, int
|
|||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
} else {
|
||||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.mov(code.byte[addr], Xbyak::Reg64(value_idx).cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.mov(word[addr], Xbyak::Reg64(value_idx).cvt16());
|
||||
break;
|
||||
case 32:
|
||||
code.mov(dword[addr], Xbyak::Reg64(value_idx).cvt32());
|
||||
break;
|
||||
case 64:
|
||||
code.mov(qword[addr], Xbyak::Reg64(value_idx));
|
||||
break;
|
||||
case 128:
|
||||
code.movups(xword[addr], Xbyak::Xmm(value_idx));
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
const void* fastmem_location = code.getCurr();
|
||||
switch (bitsize) {
|
||||
case 8:
|
||||
code.mov(code.byte[addr], Xbyak::Reg64{value_idx}.cvt8());
|
||||
break;
|
||||
case 16:
|
||||
code.mov(word[addr], Xbyak::Reg16{value_idx});
|
||||
break;
|
||||
case 32:
|
||||
code.mov(dword[addr], Xbyak::Reg32{value_idx});
|
||||
break;
|
||||
case 64:
|
||||
code.mov(qword[addr], Xbyak::Reg64{value_idx});
|
||||
break;
|
||||
case 128:
|
||||
code.movups(xword[addr], Xbyak::Xmm{value_idx});
|
||||
break;
|
||||
default:
|
||||
ASSERT_FALSE("Invalid bitsize");
|
||||
}
|
||||
return fastmem_location;
|
||||
}
|
||||
|
||||
template<typename UserConfig>
|
||||
|
|
|
@ -69,7 +69,7 @@ void EmitSignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst)
|
|||
ctx.reg_alloc.DefineValue(overflow_inst, overflow);
|
||||
}
|
||||
} else {
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
}
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -98,7 +98,7 @@ void EmitUnsignedSaturatedOp(BlockOfCode& code, EmitContext& ctx, IR::Inst* inst
|
|||
|
||||
const Xbyak::Reg overflow = ctx.reg_alloc.ScratchGpr();
|
||||
code.setb(overflow.cvt8());
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, addend);
|
||||
}
|
||||
|
@ -226,7 +226,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh16(EmitContext& ctx,
|
|||
code.cmovns(y, tmp);
|
||||
|
||||
code.sets(tmp.cvt8());
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, y);
|
||||
}
|
||||
|
@ -250,7 +250,7 @@ void EmitX64::EmitSignedSaturatedDoublingMultiplyReturnHigh32(EmitContext& ctx,
|
|||
code.cmovns(y.cvt32(), tmp.cvt32());
|
||||
|
||||
code.sets(tmp.cvt8());
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], tmp.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, y);
|
||||
}
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "dynarmic/backend/x64/constants.h"
|
||||
#include "dynarmic/backend/x64/emit_x64.h"
|
||||
#include "dynarmic/common/math_util.h"
|
||||
#include "dynarmic/interface/optimization_flags.h"
|
||||
#include "dynarmic/ir/basic_block.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/opcodes.h"
|
||||
|
@ -109,7 +110,7 @@ static void EmitOneArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
|||
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -137,7 +138,7 @@ static void EmitTwoArgumentFallbackWithSaturation(BlockOfCode& code, EmitContext
|
|||
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -164,7 +165,7 @@ static void EmitTwoArgumentFallbackWithSaturationAndImmediate(BlockOfCode& code,
|
|||
|
||||
ctx.reg_alloc.ReleaseStackSpace(stack_space + ABI_SHADOW_SPACE);
|
||||
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], code.ABI_RETURN.cvt8());
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -1009,10 +1010,7 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.gf2p8affineqb(result, code.BConst<64>(xword, 0xaaccf0ff'00000000), 8);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||
} else if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
@ -1034,10 +1032,9 @@ void EmitX64::EmitVectorCountLeadingZeros8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.paddb(data, tmp1);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
return;
|
||||
} else {
|
||||
EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros<u8>);
|
||||
}
|
||||
|
||||
EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros<u8>);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -1070,10 +1067,7 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.vpshufb(result, result, data);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||
} else if (code.HasHostFeature(HostFeature::SSSE3)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
|
@ -1106,24 +1100,33 @@ void EmitX64::EmitVectorCountLeadingZeros16(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.pshufb(result, data);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
} else {
|
||||
EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros<u16>);
|
||||
}
|
||||
|
||||
EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros<u16>);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorCountLeadingZeros32(EmitContext& ctx, IR::Inst* inst) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
if (code.HasHostFeature(HostFeature::AVX512_Ortho | HostFeature::AVX512CD)) {
|
||||
auto args = ctx.reg_alloc.GetArgumentInfo(inst);
|
||||
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
code.vplzcntd(data, data);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
return;
|
||||
// See https://stackoverflow.com/questions/58823140/count-leading-zero-bits-for-each-element-in-avx2-vector-emulate-mm256-lzcnt-ep/58827596#58827596
|
||||
} else if (code.HasHostFeature(HostFeature::AVX2)) {
|
||||
const Xbyak::Xmm data = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm temp = ctx.reg_alloc.ScratchXmm();
|
||||
code.vmovdqa(temp, data);
|
||||
code.vpsrld(data, data, 8);
|
||||
code.vpandn(data, data, temp);
|
||||
code.vmovdqa(temp, code.Const(xword, 0x0000009E0000009E, 0x0000009E0000009E));
|
||||
code.vcvtdq2ps(data, data);
|
||||
code.vpsrld(data, data, 23);
|
||||
code.vpsubusw(data, temp, data);
|
||||
code.vpminsw(data, data, code.Const(xword, 0x0000002000000020, 0x0000002000000020));
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
} else {
|
||||
EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros<u32>);
|
||||
}
|
||||
|
||||
EmitOneArgumentFallback(code, ctx, inst, EmitVectorCountLeadingZeros<u32>);
|
||||
}
|
||||
|
||||
void EmitX64::EmitVectorDeinterleaveEven8(EmitContext& ctx, IR::Inst* inst) {
|
||||
|
@ -3323,7 +3326,7 @@ void EmitX64::EmitVectorPolynomialMultiply8(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.paddb(mask, mask);
|
||||
code.paddb(xmm_a, xmm_a);
|
||||
code.pblendvb(result, alternate);
|
||||
code.dec(counter);
|
||||
code.sub(counter, 1);
|
||||
code.jnz(loop);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -3367,7 +3370,7 @@ void EmitX64::EmitVectorPolynomialMultiplyLong8(EmitContext& ctx, IR::Inst* inst
|
|||
code.paddw(mask, mask);
|
||||
code.paddw(xmm_a, xmm_a);
|
||||
code.pblendvb(result, alternate);
|
||||
code.dec(counter);
|
||||
code.sub(counter, 1);
|
||||
code.jnz(loop);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
|
@ -4258,7 +4261,7 @@ static void EmitVectorSignedSaturatedAbs(size_t esize, BlockOfCode& code, EmitCo
|
|||
UNREACHABLE();
|
||||
}
|
||||
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
ctx.reg_alloc.DefineValue(inst, data);
|
||||
}
|
||||
|
||||
|
@ -4393,7 +4396,7 @@ static void EmitVectorSignedSaturatedAccumulateUnsigned(BlockOfCode& code, EmitC
|
|||
|
||||
const Xbyak::Reg32 mask = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.pmovmskb(mask, xmm0);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], mask);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
code.pblendvb(result, tmp);
|
||||
|
@ -4479,7 +4482,7 @@ static void EmitVectorSignedSaturatedDoublingMultiply16(BlockOfCode& code, EmitC
|
|||
|
||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.pmovmskb(bit, upper_tmp);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -4530,7 +4533,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext&
|
|||
code.vpcmpeqd(mask, result, code.Const(xword, 0x8000000080000000, 0x8000000080000000));
|
||||
code.vpxor(result, result, mask);
|
||||
code.pmovmskb(bit, mask);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.Release(mask);
|
||||
ctx.reg_alloc.Release(bit);
|
||||
|
@ -4586,7 +4589,7 @@ void EmitVectorSignedSaturatedDoublingMultiply32(BlockOfCode& code, EmitContext&
|
|||
code.pcmpeqd(tmp, result);
|
||||
code.pxor(result, tmp);
|
||||
code.pmovmskb(bit, tmp);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -4620,7 +4623,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong16(EmitContext& ctx,
|
|||
|
||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.pmovmskb(bit, y);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, x);
|
||||
}
|
||||
|
@ -4673,7 +4676,7 @@ void EmitX64::EmitVectorSignedSaturatedDoublingMultiplyLong32(EmitContext& ctx,
|
|||
code.pxor(x, y);
|
||||
code.pmovmskb(bit, y);
|
||||
}
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, x);
|
||||
}
|
||||
|
@ -4712,7 +4715,7 @@ static void EmitVectorSignedSaturatedNarrowToSigned(size_t original_esize, Block
|
|||
code.pcmpeqd(reconstructed, src);
|
||||
code.movmskps(bit, reconstructed);
|
||||
code.xor_(bit, 0b1111);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, dest);
|
||||
}
|
||||
|
@ -4767,7 +4770,7 @@ static void EmitVectorSignedSaturatedNarrowToUnsigned(size_t original_esize, Blo
|
|||
code.pcmpeqd(reconstructed, src);
|
||||
code.movmskps(bit, reconstructed);
|
||||
code.xor_(bit, 0b1111);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, dest);
|
||||
}
|
||||
|
@ -4870,7 +4873,7 @@ static void EmitVectorSignedSaturatedNeg(size_t esize, BlockOfCode& code, EmitCo
|
|||
// Check if any elements matched the mask prior to performing saturation. If so, set the Q bit.
|
||||
const Xbyak::Reg32 bit = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||
code.pmovmskb(bit, tmp);
|
||||
code.or_(code.dword[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
code.or_(code.dword[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], bit);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, zero);
|
||||
}
|
||||
|
@ -5641,6 +5644,7 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx,
|
|||
break;
|
||||
}
|
||||
case 32:
|
||||
// See https://stackoverflow.com/questions/3380785/compute-the-absolute-difference-between-unsigned-integers-using-sse/3527267#3527267
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseXmm(args[1]);
|
||||
|
@ -5652,16 +5656,33 @@ static void EmitVectorUnsignedAbsoluteDifference(size_t esize, EmitContext& ctx,
|
|||
} else {
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000));
|
||||
code.pxor(x, temp);
|
||||
code.pxor(y, temp);
|
||||
code.movdqa(temp, x);
|
||||
code.psubd(temp, y);
|
||||
code.pcmpgtd(y, x);
|
||||
code.psrld(y, 1);
|
||||
code.pxor(temp, y);
|
||||
code.psubd(temp, y);
|
||||
if (ctx.HasOptimization(OptimizationFlag::CodeSpeed)) {
|
||||
// About 45 bytes
|
||||
const Xbyak::Xmm temp_x = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm temp_y = ctx.reg_alloc.ScratchXmm();
|
||||
code.pcmpeqd(temp, temp);
|
||||
code.pslld(temp, 31);
|
||||
code.movdqa(temp_x, x);
|
||||
code.movdqa(temp_y, y);
|
||||
code.paddd(temp_x, x);
|
||||
code.paddd(temp_y, y);
|
||||
code.pcmpgtd(temp_y, temp_x);
|
||||
code.psubd(x, y);
|
||||
code.pandn(temp, temp_y);
|
||||
code.pxor(x, y);
|
||||
code.psubd(x, y);
|
||||
} else {
|
||||
// Smaller code size - about 36 bytes
|
||||
code.movdqa(temp, code.Const(xword, 0x8000000080000000, 0x8000000080000000));
|
||||
code.pxor(x, temp);
|
||||
code.pxor(y, temp);
|
||||
code.movdqa(temp, x);
|
||||
code.psubd(temp, y);
|
||||
code.pcmpgtd(y, x);
|
||||
code.psrld(y, 1);
|
||||
code.pxor(temp, y);
|
||||
code.psubd(temp, y);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -5727,10 +5748,7 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.vpmulld(result, x, y);
|
||||
|
||||
ctx.reg_alloc.DefineValue(lower_inst, result);
|
||||
return;
|
||||
}
|
||||
|
||||
if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
} else if (code.HasHostFeature(HostFeature::AVX)) {
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
|
||||
|
@ -5749,39 +5767,33 @@ void EmitX64::EmitVectorUnsignedMultiply32(EmitContext& ctx, IR::Inst* inst) {
|
|||
code.shufps(result, x, 0b11011101);
|
||||
|
||||
ctx.reg_alloc.DefineValue(upper_inst, result);
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm upper_result = upper_inst ? ctx.reg_alloc.ScratchXmm() : Xbyak::Xmm{-1};
|
||||
const Xbyak::Xmm lower_result = lower_inst ? ctx.reg_alloc.ScratchXmm() : Xbyak::Xmm{-1};
|
||||
|
||||
const Xbyak::Xmm x = ctx.reg_alloc.UseScratchXmm(args[0]);
|
||||
const Xbyak::Xmm y = ctx.reg_alloc.UseScratchXmm(args[1]);
|
||||
const Xbyak::Xmm tmp = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm upper_result = ctx.reg_alloc.ScratchXmm();
|
||||
const Xbyak::Xmm lower_result = ctx.reg_alloc.ScratchXmm();
|
||||
// calculate unsigned multiply
|
||||
code.movdqa(tmp, x);
|
||||
code.pmuludq(tmp, y);
|
||||
code.psrlq(x, 32);
|
||||
code.psrlq(y, 32);
|
||||
code.pmuludq(x, y);
|
||||
|
||||
// calculate unsigned multiply
|
||||
code.movdqa(tmp, x);
|
||||
code.pmuludq(tmp, y);
|
||||
code.psrlq(x, 32);
|
||||
code.psrlq(y, 32);
|
||||
code.pmuludq(x, y);
|
||||
|
||||
// put everything into place
|
||||
code.pcmpeqw(upper_result, upper_result);
|
||||
code.pcmpeqw(lower_result, lower_result);
|
||||
code.psllq(upper_result, 32);
|
||||
code.psrlq(lower_result, 32);
|
||||
code.pand(upper_result, x);
|
||||
code.pand(lower_result, tmp);
|
||||
code.psrlq(tmp, 32);
|
||||
code.psllq(x, 32);
|
||||
code.por(upper_result, tmp);
|
||||
code.por(lower_result, x);
|
||||
|
||||
if (upper_inst) {
|
||||
ctx.reg_alloc.DefineValue(upper_inst, upper_result);
|
||||
}
|
||||
if (lower_inst) {
|
||||
ctx.reg_alloc.DefineValue(lower_inst, lower_result);
|
||||
// put everything into place - only if needed
|
||||
if (upper_inst) code.pcmpeqw(upper_result, upper_result);
|
||||
if (lower_inst) code.pcmpeqw(lower_result, lower_result);
|
||||
if (upper_inst) code.psllq(upper_result, 32);
|
||||
if (lower_inst) code.psrlq(lower_result, 32);
|
||||
if (upper_inst) code.pand(upper_result, x);
|
||||
if (lower_inst) code.pand(lower_result, tmp);
|
||||
if (upper_inst) code.psrlq(tmp, 32);
|
||||
if (lower_inst) code.psllq(x, 32);
|
||||
if (upper_inst) code.por(upper_result, tmp);
|
||||
if (lower_inst) code.por(lower_result, x);
|
||||
if (upper_inst) ctx.reg_alloc.DefineValue(upper_inst, upper_result);
|
||||
if (lower_inst) ctx.reg_alloc.DefineValue(lower_inst, lower_result);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -450,7 +450,7 @@ void EmitTwoOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbyak
|
|||
code.lea(code.ABI_PARAM1, ptr[rsp + ABI_SHADOW_SPACE + 0 * 16]);
|
||||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.mov(code.ABI_PARAM3.cvt32(), fpcr);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM4, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
|
||||
code.movaps(xword[code.ABI_PARAM2], arg1);
|
||||
code.CallFunction(fn);
|
||||
|
@ -487,7 +487,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
|
|||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
||||
code.mov(code.ABI_PARAM4.cvt32(), fpcr);
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], rax);
|
||||
#else
|
||||
constexpr u32 stack_space = 3 * 16;
|
||||
|
@ -496,7 +496,7 @@ void EmitThreeOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xby
|
|||
code.lea(code.ABI_PARAM2, ptr[rsp + ABI_SHADOW_SPACE + 1 * 16]);
|
||||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
code.mov(code.ABI_PARAM4.cvt32(), fpcr);
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM5, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
#endif
|
||||
|
||||
code.movaps(xword[code.ABI_PARAM2], arg1);
|
||||
|
@ -545,7 +545,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
|
|||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
||||
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 4 * 16]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE + 0], ctx.FPCR(fpcr_controlled).Value());
|
||||
code.lea(rax, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(rax, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.mov(qword[rsp + ABI_SHADOW_SPACE + 8], rax);
|
||||
#else
|
||||
constexpr u32 stack_space = 4 * 16;
|
||||
|
@ -555,7 +555,7 @@ void EmitFourOpFallbackWithoutRegAlloc(BlockOfCode& code, EmitContext& ctx, Xbya
|
|||
code.lea(code.ABI_PARAM3, ptr[rsp + ABI_SHADOW_SPACE + 2 * 16]);
|
||||
code.lea(code.ABI_PARAM4, ptr[rsp + ABI_SHADOW_SPACE + 3 * 16]);
|
||||
code.mov(code.ABI_PARAM5.cvt32(), ctx.FPCR(fpcr_controlled).Value());
|
||||
code.lea(code.ABI_PARAM6, code.ptr[code.r15 + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
code.lea(code.ABI_PARAM6, code.ptr[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_exc]);
|
||||
#endif
|
||||
|
||||
if constexpr (load_previous_result == LoadPreviousResult::Yes) {
|
||||
|
|
|
@ -62,7 +62,7 @@ void EmitVectorSaturatedNative(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
code.test(overflow.cvt32(), overflow.cvt32());
|
||||
}
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
}
|
||||
|
@ -104,7 +104,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
|
||||
code.ktestb(k1, k1);
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
|
@ -160,7 +160,7 @@ void EmitVectorSignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst* in
|
|||
code.test(overflow.cvt32(), overflow.cvt32());
|
||||
}
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
if (code.HasHostFeature(HostFeature::SSE41)) {
|
||||
FCODE(blendvp)(result, tmp);
|
||||
|
@ -204,7 +204,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
|
||||
code.ktestb(k1, k1);
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
ctx.reg_alloc.DefineValue(inst, result);
|
||||
return;
|
||||
|
@ -263,7 +263,7 @@ void EmitVectorUnsignedSaturated(BlockOfCode& code, EmitContext& ctx, IR::Inst*
|
|||
}
|
||||
|
||||
code.setnz(overflow);
|
||||
code.or_(code.byte[code.r15 + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
code.or_(code.byte[code.ABI_JIT_PTR + code.GetJitStateInfo().offsetof_fpsr_qc], overflow);
|
||||
|
||||
if constexpr (op == Op::Add) {
|
||||
code.por(result, tmp);
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
namespace Dynarmic {
|
||||
|
||||
ExclusiveMonitor::ExclusiveMonitor(size_t processor_count)
|
||||
ExclusiveMonitor::ExclusiveMonitor(std::size_t processor_count)
|
||||
: exclusive_addresses(processor_count, INVALID_EXCLUSIVE_ADDRESS), exclusive_values(processor_count) {}
|
||||
|
||||
size_t ExclusiveMonitor::GetProcessorCount() const {
|
||||
|
@ -29,20 +29,16 @@ void ExclusiveMonitor::Unlock() {
|
|||
lock.Unlock();
|
||||
}
|
||||
|
||||
bool ExclusiveMonitor::CheckAndClear(size_t processor_id, VAddr address) {
|
||||
bool ExclusiveMonitor::CheckAndClear(std::size_t processor_id, VAddr address) {
|
||||
const VAddr masked_address = address & RESERVATION_GRANULE_MASK;
|
||||
|
||||
Lock();
|
||||
if (exclusive_addresses[processor_id] != masked_address) {
|
||||
Unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
for (VAddr& other_address : exclusive_addresses) {
|
||||
if (other_address == masked_address) {
|
||||
for (VAddr& other_address : exclusive_addresses)
|
||||
if (other_address == masked_address)
|
||||
other_address = INVALID_EXCLUSIVE_ADDRESS;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@
|
|||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
// Our static vector will contain 32 elements, stt. an uint16_t will fill up 64 bytes
|
||||
// Our static vector will contain 32 elements, stt. an uint8_t will fill up 64 bytes
|
||||
// (an entire cache line). Thanks.
|
||||
enum class HostLoc : uint16_t {
|
||||
enum class HostLoc : std::uint8_t {
|
||||
// Ordering of the registers is intentional. See also: HostLocToX64.
|
||||
RAX,
|
||||
RCX,
|
||||
|
@ -60,48 +60,48 @@ enum class HostLoc : uint16_t {
|
|||
|
||||
constexpr size_t NonSpillHostLocCount = static_cast<size_t>(HostLoc::FirstSpill);
|
||||
|
||||
inline bool HostLocIsGPR(HostLoc reg) {
|
||||
constexpr bool HostLocIsGPR(HostLoc reg) {
|
||||
return reg >= HostLoc::RAX && reg <= HostLoc::R15;
|
||||
}
|
||||
|
||||
inline bool HostLocIsXMM(HostLoc reg) {
|
||||
constexpr bool HostLocIsXMM(HostLoc reg) {
|
||||
return reg >= HostLoc::XMM0 && reg <= HostLoc::XMM15;
|
||||
}
|
||||
|
||||
inline bool HostLocIsRegister(HostLoc reg) {
|
||||
constexpr bool HostLocIsRegister(HostLoc reg) {
|
||||
return HostLocIsGPR(reg) || HostLocIsXMM(reg);
|
||||
}
|
||||
|
||||
inline bool HostLocIsFlag(HostLoc reg) {
|
||||
constexpr bool HostLocIsFlag(HostLoc reg) {
|
||||
return reg >= HostLoc::CF && reg <= HostLoc::OF;
|
||||
}
|
||||
|
||||
inline HostLoc HostLocRegIdx(int idx) {
|
||||
constexpr HostLoc HostLocRegIdx(int idx) {
|
||||
ASSERT(idx >= 0 && idx <= 15);
|
||||
return static_cast<HostLoc>(idx);
|
||||
return HostLoc(idx);
|
||||
}
|
||||
|
||||
inline HostLoc HostLocXmmIdx(int idx) {
|
||||
constexpr HostLoc HostLocXmmIdx(int idx) {
|
||||
ASSERT(idx >= 0 && idx <= 15);
|
||||
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::XMM0) + idx);
|
||||
return HostLoc(size_t(HostLoc::XMM0) + idx);
|
||||
}
|
||||
|
||||
inline HostLoc HostLocSpill(size_t i) {
|
||||
return static_cast<HostLoc>(static_cast<size_t>(HostLoc::FirstSpill) + i);
|
||||
constexpr HostLoc HostLocSpill(size_t i) {
|
||||
return HostLoc(size_t(HostLoc::FirstSpill) + i);
|
||||
}
|
||||
|
||||
inline bool HostLocIsSpill(HostLoc reg) {
|
||||
constexpr bool HostLocIsSpill(HostLoc reg) {
|
||||
return reg >= HostLoc::FirstSpill;
|
||||
}
|
||||
|
||||
inline size_t HostLocBitWidth(HostLoc loc) {
|
||||
constexpr size_t HostLocBitWidth(HostLoc loc) {
|
||||
if (HostLocIsGPR(loc))
|
||||
return 64;
|
||||
if (HostLocIsXMM(loc))
|
||||
else if (HostLocIsXMM(loc))
|
||||
return 128;
|
||||
if (HostLocIsSpill(loc))
|
||||
else if (HostLocIsSpill(loc))
|
||||
return 128;
|
||||
if (HostLocIsFlag(loc))
|
||||
else if (HostLocIsFlag(loc))
|
||||
return 1;
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
@ -109,6 +109,8 @@ inline size_t HostLocBitWidth(HostLoc loc) {
|
|||
using HostLocList = std::initializer_list<HostLoc>;
|
||||
|
||||
// RSP is preserved for function calls
|
||||
// R13 contains fastmem pointer if any
|
||||
// R14 contains the pagetable pointer
|
||||
// R15 contains the JitState pointer
|
||||
const HostLocList any_gpr = {
|
||||
HostLoc::RAX,
|
||||
|
@ -125,12 +127,16 @@ const HostLocList any_gpr = {
|
|||
HostLoc::R12,
|
||||
HostLoc::R13,
|
||||
HostLoc::R14,
|
||||
//HostLoc::R15,
|
||||
};
|
||||
|
||||
// XMM0 is reserved for use by instructions that implicitly use it as an argument
|
||||
// XMM1 is used by 128 mem accessors
|
||||
// XMM2 is also used by that (and other stuff)
|
||||
// Basically dont use either XMM0, XMM1 or XMM2 ever; they're left for the regsel
|
||||
const HostLocList any_xmm = {
|
||||
HostLoc::XMM1,
|
||||
HostLoc::XMM2,
|
||||
//HostLoc::XMM1,
|
||||
//HostLoc::XMM2,
|
||||
HostLoc::XMM3,
|
||||
HostLoc::XMM4,
|
||||
HostLoc::XMM5,
|
||||
|
|
|
@ -357,9 +357,8 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
static const boost::container::static_vector<HostLoc, 28> other_caller_save = [args_hostloc]() noexcept {
|
||||
boost::container::static_vector<HostLoc, 28> ret(ABI_ALL_CALLER_SAVE.begin(), ABI_ALL_CALLER_SAVE.end());
|
||||
ret.erase(std::find(ret.begin(), ret.end(), ABI_RETURN));
|
||||
for (auto const hostloc : args_hostloc) {
|
||||
for (auto const hostloc : args_hostloc)
|
||||
ret.erase(std::find(ret.begin(), ret.end(), hostloc));
|
||||
}
|
||||
return ret;
|
||||
}();
|
||||
|
||||
|
@ -368,7 +367,7 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
DefineValueImpl(result_def, ABI_RETURN);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args_count; i++) {
|
||||
for (size_t i = 0; i < args.size(); i++) {
|
||||
if (args[i] && !args[i]->get().IsVoid()) {
|
||||
UseScratch(*args[i], args_hostloc[i]);
|
||||
// LLVM puts the burden of zero-extension of 8 and 16 bit values on the caller instead of the callee
|
||||
|
@ -383,36 +382,35 @@ void RegAlloc::HostCall(IR::Inst* result_def,
|
|||
case IR::Type::U32:
|
||||
code->mov(reg.cvt32(), reg.cvt32());
|
||||
break;
|
||||
case IR::Type::U64:
|
||||
break; //no op
|
||||
default:
|
||||
break; // Nothing needs to be done
|
||||
UNREACHABLE();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < args_count; i++) {
|
||||
for (size_t i = 0; i < args.size(); i++)
|
||||
if (!args[i]) {
|
||||
// TODO: Force spill
|
||||
ScratchGpr(args_hostloc[i]);
|
||||
}
|
||||
}
|
||||
|
||||
for (HostLoc caller_saved : other_caller_save) {
|
||||
for (auto const caller_saved : other_caller_save)
|
||||
ScratchImpl({caller_saved});
|
||||
}
|
||||
}
|
||||
|
||||
void RegAlloc::AllocStackSpace(const size_t stack_space) noexcept {
|
||||
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
|
||||
ASSERT(stack_space < size_t(std::numeric_limits<s32>::max()));
|
||||
ASSERT(reserved_stack_space == 0);
|
||||
reserved_stack_space = stack_space;
|
||||
code->sub(code->rsp, static_cast<u32>(stack_space));
|
||||
code->sub(code->rsp, u32(stack_space));
|
||||
}
|
||||
|
||||
void RegAlloc::ReleaseStackSpace(const size_t stack_space) noexcept {
|
||||
ASSERT(stack_space < static_cast<size_t>(std::numeric_limits<s32>::max()));
|
||||
ASSERT(stack_space < size_t(std::numeric_limits<s32>::max()));
|
||||
ASSERT(reserved_stack_space == stack_space);
|
||||
reserved_stack_space = 0;
|
||||
code->add(code->rsp, static_cast<u32>(stack_space));
|
||||
code->add(code->rsp, u32(stack_space));
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc, 28>& desired_locations) const noexcept {
|
||||
|
@ -429,13 +427,22 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
|||
auto it_empty_candidate = desired_locations.cend();
|
||||
for (auto it = desired_locations.cbegin(); it != desired_locations.cend(); it++) {
|
||||
auto const& loc_info = LocInfo(*it);
|
||||
DEBUG_ASSERT(*it != ABI_JIT_PTR);
|
||||
// Abstain from using upper registers unless absolutely nescesary
|
||||
if (loc_info.IsLocked()) {
|
||||
// skip, not suitable for allocation
|
||||
// While R13 and R14 are technically available, we avoid allocating for them
|
||||
// at all costs, because theoretically skipping them is better than spilling
|
||||
// all over the place - it also fixes bugs with high reg pressure
|
||||
} else if (*it >= HostLoc::R13 && *it <= HostLoc::R15) {
|
||||
// skip, do not touch
|
||||
// Intel recommends to reuse registers as soon as they're overwritable (DO NOT SPILL)
|
||||
} else if (loc_info.IsEmpty()) {
|
||||
it_empty_candidate = it;
|
||||
break;
|
||||
// No empty registers for some reason (very evil) - just do normal LRU
|
||||
} else {
|
||||
if (loc_info.lru_counter < min_lru_counter) {
|
||||
if (loc_info.IsEmpty())
|
||||
it_empty_candidate = it;
|
||||
// Otherwise a "quasi"-LRU
|
||||
min_lru_counter = loc_info.lru_counter;
|
||||
if (*it >= HostLoc::R8 && *it <= HostLoc::R15) {
|
||||
|
@ -446,9 +453,6 @@ HostLoc RegAlloc::SelectARegister(const boost::container::static_vector<HostLoc,
|
|||
if (min_lru_counter == 0)
|
||||
break; //early exit
|
||||
}
|
||||
// only if not assigned (i.e for failcase of all LRU=0)
|
||||
if (it_empty_candidate == desired_locations.cend() && loc_info.IsEmpty())
|
||||
it_empty_candidate = it;
|
||||
}
|
||||
}
|
||||
// Final resolution goes as follows:
|
||||
|
@ -488,7 +492,6 @@ void RegAlloc::DefineValueImpl(IR::Inst* def_inst, const IR::Value& use_inst) no
|
|||
|
||||
HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept {
|
||||
ASSERT_MSG(imm.IsImmediate(), "imm is not an immediate");
|
||||
|
||||
if (HostLocIsGPR(host_loc)) {
|
||||
const Xbyak::Reg64 reg = HostLocToReg64(host_loc);
|
||||
const u64 imm_value = imm.GetImmediateAsU64();
|
||||
|
@ -497,10 +500,7 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept {
|
|||
} else {
|
||||
code->mov(reg, imm_value);
|
||||
}
|
||||
return host_loc;
|
||||
}
|
||||
|
||||
if (HostLocIsXMM(host_loc)) {
|
||||
} else if (HostLocIsXMM(host_loc)) {
|
||||
const Xbyak::Xmm reg = HostLocToXmm(host_loc);
|
||||
const u64 imm_value = imm.GetImmediateAsU64();
|
||||
if (imm_value == 0) {
|
||||
|
@ -508,22 +508,19 @@ HostLoc RegAlloc::LoadImmediate(IR::Value imm, HostLoc host_loc) noexcept {
|
|||
} else {
|
||||
MAYBE_AVX(movaps, reg, code->Const(code->xword, imm_value));
|
||||
}
|
||||
return host_loc;
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
}
|
||||
|
||||
UNREACHABLE();
|
||||
return host_loc;
|
||||
}
|
||||
|
||||
void RegAlloc::Move(HostLoc to, HostLoc from) noexcept {
|
||||
const size_t bit_width = LocInfo(from).GetMaxBitWidth();
|
||||
|
||||
ASSERT(LocInfo(to).IsEmpty() && !LocInfo(from).IsLocked());
|
||||
ASSERT(bit_width <= HostLocBitWidth(to));
|
||||
|
||||
if (!LocInfo(from).IsEmpty()) {
|
||||
EmitMove(bit_width, to, from);
|
||||
LocInfo(to) = std::exchange(LocInfo(from), {});
|
||||
}
|
||||
ASSERT_MSG(!LocInfo(from).IsEmpty(), "Mov eliminated");
|
||||
EmitMove(bit_width, to, from);
|
||||
LocInfo(to) = std::exchange(LocInfo(from), {});
|
||||
}
|
||||
|
||||
void RegAlloc::CopyToScratch(size_t bit_width, HostLoc to, HostLoc from) noexcept {
|
||||
|
@ -557,30 +554,44 @@ void RegAlloc::SpillRegister(HostLoc loc) noexcept {
|
|||
ASSERT_MSG(HostLocIsRegister(loc), "Only registers can be spilled");
|
||||
ASSERT_MSG(!LocInfo(loc).IsEmpty(), "There is no need to spill unoccupied registers");
|
||||
ASSERT_MSG(!LocInfo(loc).IsLocked(), "Registers that have been allocated must not be spilt");
|
||||
|
||||
const HostLoc new_loc = FindFreeSpill();
|
||||
auto const new_loc = FindFreeSpill(HostLocIsXMM(loc));
|
||||
Move(new_loc, loc);
|
||||
}
|
||||
|
||||
HostLoc RegAlloc::FindFreeSpill() const noexcept {
|
||||
for (size_t i = static_cast<size_t>(HostLoc::FirstSpill); i < hostloc_info.size(); i++) {
|
||||
const auto loc = static_cast<HostLoc>(i);
|
||||
if (LocInfo(loc).IsEmpty()) {
|
||||
return loc;
|
||||
}
|
||||
HostLoc RegAlloc::FindFreeSpill(bool is_xmm) const noexcept {
|
||||
#if 0
|
||||
// TODO(lizzie): Ok, Windows hates XMM spills, this means less perf for windows
|
||||
// but it's fine anyways. We can find other ways to cheat it later - but which?!?!
|
||||
// we should NOT save xmm each block entering... MAYBE xbyak has a bug on start/end?
|
||||
// TODO(lizzie): This needs to be investigated further later.
|
||||
// Do not spill XMM into other XMM silly
|
||||
if (!is_xmm) {
|
||||
// TODO(lizzie): Using lower (xmm0 and such) registers results in issues/crashes - INVESTIGATE WHY
|
||||
// Intel recommends to spill GPR onto XMM registers IF POSSIBLE
|
||||
// TODO(lizzie): Issues on DBZ, theory: Scratch XMM not properly restored after a function call?
|
||||
// Must sync with ABI registers (except XMM0, XMM1 and XMM2)
|
||||
for (size_t i = size_t(HostLoc::XMM15); i >= size_t(HostLoc::XMM3); --i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
}
|
||||
|
||||
#endif
|
||||
// Otherwise go to stack spilling
|
||||
for (size_t i = size_t(HostLoc::FirstSpill); i < hostloc_info.size(); ++i)
|
||||
if (const auto loc = HostLoc(i); LocInfo(loc).IsEmpty())
|
||||
return loc;
|
||||
ASSERT_FALSE("All spill locations are full");
|
||||
}
|
||||
|
||||
inline static Xbyak::RegExp SpillToOpArg_Helper1(HostLoc loc, size_t reserved_stack_space) noexcept {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
size_t i = static_cast<size_t>(loc) - static_cast<size_t>(HostLoc::FirstSpill);
|
||||
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
|
||||
return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]);
|
||||
}
|
||||
};
|
||||
|
||||
void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept {
|
||||
auto const spill_to_op_arg_helper = [&](HostLoc loc, size_t reserved_stack_space) {
|
||||
ASSERT(HostLocIsSpill(loc));
|
||||
size_t i = size_t(loc) - size_t(HostLoc::FirstSpill);
|
||||
ASSERT_MSG(i < SpillCount, "Spill index greater than number of available spill locations");
|
||||
return Xbyak::util::rsp + reserved_stack_space + ABI_SHADOW_SPACE + offsetof(StackLayout, spill) + i * sizeof(StackLayout::spill[0]);
|
||||
};
|
||||
auto const spill_xmm_to_op = [&](const HostLoc loc) {
|
||||
return Xbyak::util::xword[spill_to_op_arg_helper(loc, reserved_stack_space)];
|
||||
};
|
||||
if (HostLocIsXMM(to) && HostLocIsXMM(from)) {
|
||||
MAYBE_AVX(movaps, HostLocToXmm(to), HostLocToXmm(from));
|
||||
} else if (HostLocIsGPR(to) && HostLocIsGPR(from)) {
|
||||
|
@ -605,7 +616,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
|||
MAYBE_AVX(movd, HostLocToReg64(to).cvt32(), HostLocToXmm(from));
|
||||
}
|
||||
} else if (HostLocIsXMM(to) && HostLocIsSpill(from)) {
|
||||
const Xbyak::Address spill_addr = SpillToOpArg(from);
|
||||
const Xbyak::Address spill_addr = spill_xmm_to_op(from);
|
||||
ASSERT(spill_addr.getBit() >= bit_width);
|
||||
switch (bit_width) {
|
||||
case 128:
|
||||
|
@ -623,7 +634,7 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
|||
UNREACHABLE();
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsXMM(from)) {
|
||||
const Xbyak::Address spill_addr = SpillToOpArg(to);
|
||||
const Xbyak::Address spill_addr = spill_xmm_to_op(to);
|
||||
ASSERT(spill_addr.getBit() >= bit_width);
|
||||
switch (bit_width) {
|
||||
case 128:
|
||||
|
@ -643,16 +654,16 @@ void RegAlloc::EmitMove(const size_t bit_width, const HostLoc to, const HostLoc
|
|||
} else if (HostLocIsGPR(to) && HostLocIsSpill(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(HostLocToReg64(to), Xbyak::util::qword[SpillToOpArg_Helper1(from, reserved_stack_space)]);
|
||||
code->mov(HostLocToReg64(to), Xbyak::util::qword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||
} else {
|
||||
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[SpillToOpArg_Helper1(from, reserved_stack_space)]);
|
||||
code->mov(HostLocToReg64(to).cvt32(), Xbyak::util::dword[spill_to_op_arg_helper(from, reserved_stack_space)]);
|
||||
}
|
||||
} else if (HostLocIsSpill(to) && HostLocIsGPR(from)) {
|
||||
ASSERT(bit_width != 128);
|
||||
if (bit_width == 64) {
|
||||
code->mov(Xbyak::util::qword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from));
|
||||
code->mov(Xbyak::util::qword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from));
|
||||
} else {
|
||||
code->mov(Xbyak::util::dword[SpillToOpArg_Helper1(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
|
||||
code->mov(Xbyak::util::dword[spill_to_op_arg_helper(to, reserved_stack_space)], HostLocToReg64(from).cvt32());
|
||||
}
|
||||
} else {
|
||||
ASSERT_FALSE("Invalid RegAlloc::EmitMove");
|
||||
|
@ -669,8 +680,4 @@ void RegAlloc::EmitExchange(const HostLoc a, const HostLoc b) noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
Xbyak::Address RegAlloc::SpillToOpArg(const HostLoc loc) noexcept {
|
||||
return Xbyak::util::xword[SpillToOpArg_Helper1(loc, reserved_stack_space)];
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include "dynarmic/backend/x64/hostloc.h"
|
||||
#include "dynarmic/backend/x64/stack_layout.h"
|
||||
#include "dynarmic/backend/x64/oparg.h"
|
||||
#include "dynarmic/backend/x64/abi.h"
|
||||
#include "dynarmic/ir/cond.h"
|
||||
#include "dynarmic/ir/microinstruction.h"
|
||||
#include "dynarmic/ir/value.h"
|
||||
|
@ -242,20 +243,19 @@ private:
|
|||
void MoveOutOfTheWay(HostLoc reg) noexcept;
|
||||
|
||||
void SpillRegister(HostLoc loc) noexcept;
|
||||
HostLoc FindFreeSpill() const noexcept;
|
||||
HostLoc FindFreeSpill(bool is_xmm) const noexcept;
|
||||
|
||||
inline HostLocInfo& LocInfo(const HostLoc loc) noexcept {
|
||||
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
|
||||
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
}
|
||||
inline const HostLocInfo& LocInfo(const HostLoc loc) const noexcept {
|
||||
ASSERT(loc != HostLoc::RSP && loc != HostLoc::R15);
|
||||
ASSERT(loc != HostLoc::RSP && loc != ABI_JIT_PTR);
|
||||
return hostloc_info[static_cast<size_t>(loc)];
|
||||
}
|
||||
|
||||
void EmitMove(const size_t bit_width, const HostLoc to, const HostLoc from) noexcept;
|
||||
void EmitExchange(const HostLoc a, const HostLoc b) noexcept;
|
||||
Xbyak::Address SpillToOpArg(const HostLoc loc) noexcept;
|
||||
|
||||
//data
|
||||
alignas(64) boost::container::static_vector<HostLoc, 28> gpr_order;
|
||||
|
@ -264,7 +264,7 @@ private:
|
|||
BlockOfCode* code = nullptr;
|
||||
size_t reserved_stack_space = 0;
|
||||
};
|
||||
// Ensure a cache line is used, this is primordial
|
||||
static_assert(sizeof(boost::container::static_vector<HostLoc, 28>) == 64);
|
||||
// Ensure a cache line (or less) is used, this is primordial
|
||||
static_assert(sizeof(boost::container::static_vector<HostLoc, 28>) == 40);
|
||||
|
||||
} // namespace Dynarmic::Backend::X64
|
||||
|
|
|
@ -22,7 +22,7 @@ void PrintVerboseDebuggingOutputLine(RegisterData& reg_data, HostLoc hostloc, si
|
|||
} else if (HostLocIsXMM(hostloc)) {
|
||||
return reg_data.xmms[HostLocToXmm(hostloc).getIdx()];
|
||||
} else if (HostLocIsSpill(hostloc)) {
|
||||
return (*reg_data.spill)[static_cast<size_t>(hostloc) - static_cast<size_t>(HostLoc::FirstSpill)];
|
||||
return (*reg_data.spill)[size_t(hostloc) - size_t(HostLoc::FirstSpill)];
|
||||
} else {
|
||||
fmt::print("invalid hostloc! ");
|
||||
return {0, 0};
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
namespace Dynarmic::Backend::X64 {
|
||||
|
||||
enum class HostLoc : uint16_t;
|
||||
enum class HostLoc : std::uint8_t;
|
||||
using Vector = std::array<u64, 2>;
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
|
|
@ -4,6 +4,8 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include <fmt/format.h>
|
||||
#include <cstdio>
|
||||
#include <exception>
|
||||
|
||||
[[noreturn]] void assert_terminate_impl(const char* expr_str, fmt::string_view msg, fmt::format_args args) {
|
||||
fmt::print(stderr, "assertion failed: {}\n", expr_str);
|
||||
|
|
|
@ -152,11 +152,9 @@ constexpr CRC32Table iso_table{
|
|||
|
||||
static u32 ComputeCRC32(const CRC32Table& table, u32 crc, const u64 value, int length) {
|
||||
const auto* data = reinterpret_cast<const unsigned char*>(&value);
|
||||
|
||||
while (length-- > 0) {
|
||||
crc = (crc >> 8) ^ table[(crc ^ (*data++)) & 0xFF];
|
||||
}
|
||||
|
||||
return crc;
|
||||
}
|
||||
|
||||
|
|
|
@ -8,9 +8,8 @@
|
|||
namespace Dynarmic {
|
||||
|
||||
struct SpinLock {
|
||||
void Lock();
|
||||
void Unlock();
|
||||
|
||||
void Lock() noexcept;
|
||||
void Unlock() noexcept;
|
||||
volatile int storage = 0;
|
||||
};
|
||||
|
||||
|
|
|
@ -73,12 +73,12 @@ void SpinLockImpl::Initialize() {
|
|||
|
||||
} // namespace
|
||||
|
||||
void SpinLock::Lock() {
|
||||
void SpinLock::Lock() noexcept {
|
||||
std::call_once(flag, &SpinLockImpl::Initialize, impl);
|
||||
impl.lock(&storage);
|
||||
}
|
||||
|
||||
void SpinLock::Unlock() {
|
||||
void SpinLock::Unlock() noexcept {
|
||||
std::call_once(flag, &SpinLockImpl::Initialize, impl);
|
||||
impl.unlock(&storage);
|
||||
}
|
||||
|
|
|
@ -16,15 +16,14 @@ namespace Dynarmic {
|
|||
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
|
||||
Xbyak::Label start, loop;
|
||||
|
||||
code.jmp(start);
|
||||
code.jmp(start, code.T_NEAR);
|
||||
code.L(loop);
|
||||
code.pause();
|
||||
code.L(start);
|
||||
code.mov(tmp, 1);
|
||||
code.lock();
|
||||
code.xchg(code.dword[ptr], tmp);
|
||||
/*code.lock();*/ code.xchg(code.dword[ptr], tmp);
|
||||
code.test(tmp, tmp);
|
||||
code.jnz(loop);
|
||||
code.jnz(loop, code.T_NEAR);
|
||||
}
|
||||
|
||||
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
|
||||
|
@ -63,12 +62,12 @@ void SpinLockImpl::Initialize() {
|
|||
|
||||
} // namespace
|
||||
|
||||
void SpinLock::Lock() {
|
||||
void SpinLock::Lock() noexcept {
|
||||
std::call_once(flag, &SpinLockImpl::Initialize, impl);
|
||||
impl.lock(&storage);
|
||||
}
|
||||
|
||||
void SpinLock::Unlock() {
|
||||
void SpinLock::Unlock() noexcept {
|
||||
std::call_once(flag, &SpinLockImpl::Initialize, impl);
|
||||
impl.unlock(&storage);
|
||||
}
|
||||
|
|
|
@ -109,13 +109,11 @@ bool TranslatorVisitor::arm_LDR_imm(Cond cond, bool P, bool U, bool W, Reg n, Re
|
|||
|
||||
if (t == Reg::PC) {
|
||||
ir.LoadWritePC(data);
|
||||
|
||||
if (!P && W && n == Reg::R13) {
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
} else {
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -145,7 +143,11 @@ bool TranslatorVisitor::arm_LDR_reg(Cond cond, bool P, bool U, bool W, Reg n, Re
|
|||
|
||||
if (t == Reg::PC) {
|
||||
ir.LoadWritePC(data);
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
if (!P && W && n == Reg::R13) {
|
||||
ir.SetTerm(IR::Term::PopRSBHint{});
|
||||
} else {
|
||||
ir.SetTerm(IR::Term::FastDispatchHint{});
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,6 +21,7 @@ bool TranslatorVisitor::B_uncond(Imm<26> imm26) {
|
|||
const s64 offset = concatenate(imm26, Imm<2>{0}).SignExtend<s64>();
|
||||
const u64 target = ir.PC() + offset;
|
||||
|
||||
//ir.SetTerm(IR::Term::LinkBlockFast{ir.current_location->SetPC(target)});
|
||||
ir.SetTerm(IR::Term::LinkBlock{ir.current_location->SetPC(target)});
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -6,11 +6,10 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstring>
|
||||
#include <vector>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include <dynarmic/common/spin_lock.h>
|
||||
|
||||
|
@ -80,9 +79,10 @@ private:
|
|||
|
||||
static constexpr VAddr RESERVATION_GRANULE_MASK = 0xFFFF'FFFF'FFFF'FFFFull;
|
||||
static constexpr VAddr INVALID_EXCLUSIVE_ADDRESS = 0xDEAD'DEAD'DEAD'DEADull;
|
||||
static constexpr size_t MAX_NUM_CPU_CORES = 4; // Sync with src/core/hardware_properties
|
||||
boost::container::static_vector<VAddr, MAX_NUM_CPU_CORES> exclusive_addresses;
|
||||
boost::container::static_vector<Vector, MAX_NUM_CPU_CORES> exclusive_values;
|
||||
SpinLock lock;
|
||||
std::vector<VAddr> exclusive_addresses;
|
||||
std::vector<Vector> exclusive_values;
|
||||
};
|
||||
|
||||
} // namespace Dynarmic
|
||||
|
|
|
@ -32,6 +32,8 @@ enum class OptimizationFlag : std::uint32_t {
|
|||
ConstProp = 0x00000010,
|
||||
/// This is enables miscellaneous safe IR optimizations.
|
||||
MiscIROpt = 0x00000020,
|
||||
/// Optimize for code speed rather than for code size (this serves well for tight loops)
|
||||
CodeSpeed = 0x00000040,
|
||||
|
||||
/// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations.
|
||||
/// This unfuses fused instructions to improve performance on host CPUs without FMA support.
|
||||
|
|
|
@ -86,11 +86,9 @@ static std::string TerminalToString(const Terminal& terminal_variant) noexcept {
|
|||
}
|
||||
|
||||
std::string DumpBlock(const IR::Block& block) noexcept {
|
||||
std::string ret;
|
||||
|
||||
ret += fmt::format("Block: location={}\n", block.Location());
|
||||
ret += fmt::format("cycles={}", block.CycleCount());
|
||||
ret += fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
|
||||
std::string ret = fmt::format("Block: location={}-{}\n", block.Location(), block.EndLocation())
|
||||
+ fmt::format("cycles={}", block.CycleCount())
|
||||
+ fmt::format(", entry_cond={}", A64::CondToString(block.GetCondition()));
|
||||
if (block.GetCondition() != Cond::AL) {
|
||||
ret += fmt::format(", cond_fail={}", block.ConditionFailedLocation());
|
||||
}
|
||||
|
@ -116,6 +114,8 @@ std::string DumpBlock(const IR::Block& block) noexcept {
|
|||
return fmt::format("#{:#x}", arg.GetU32());
|
||||
case Type::U64:
|
||||
return fmt::format("#{:#x}", arg.GetU64());
|
||||
case Type::U128:
|
||||
return fmt::format("#<u128 imm>");
|
||||
case Type::A32Reg:
|
||||
return A32::RegToString(arg.GetA32RegRef());
|
||||
case Type::A32ExtReg:
|
||||
|
@ -124,8 +124,18 @@ std::string DumpBlock(const IR::Block& block) noexcept {
|
|||
return A64::RegToString(arg.GetA64RegRef());
|
||||
case Type::A64Vec:
|
||||
return A64::VecToString(arg.GetA64VecRef());
|
||||
case Type::CoprocInfo:
|
||||
return fmt::format("#<coproc>");
|
||||
case Type::NZCVFlags:
|
||||
return fmt::format("#<NZCV flags>");
|
||||
case Type::Cond:
|
||||
return fmt::format("#<cond={}>", A32::CondToString(arg.GetCond()));
|
||||
case Type::Table:
|
||||
return fmt::format("#<table>");
|
||||
case Type::AccType:
|
||||
return fmt::format("#<acc-type={}>", u32(arg.GetAccType()));
|
||||
default:
|
||||
return "<unknown immediate type>";
|
||||
return fmt::format("<unknown immediate type {}>", arg.GetType());
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
namespace Dynarmic::IR {
|
||||
|
||||
enum class Opcode;
|
||||
enum class Type;
|
||||
enum class Type : u16;
|
||||
|
||||
constexpr size_t max_arg_count = 4;
|
||||
|
||||
|
|
|
@ -16,12 +16,6 @@ namespace Dynarmic::IR {
|
|||
|
||||
namespace OpcodeInfo {
|
||||
|
||||
struct Meta {
|
||||
std::vector<Type> arg_types;
|
||||
const char* name;
|
||||
Type type;
|
||||
};
|
||||
|
||||
constexpr Type Void = Type::Void;
|
||||
constexpr Type A32Reg = Type::A32Reg;
|
||||
constexpr Type A32ExtReg = Type::A32ExtReg;
|
||||
|
@ -40,36 +34,62 @@ constexpr Type Cond = Type::Cond;
|
|||
constexpr Type Table = Type::Table;
|
||||
constexpr Type AccType = Type::AccType;
|
||||
|
||||
alignas(64) static const std::array opcode_info{
|
||||
#define OPCODE(name, type, ...) Meta{{__VA_ARGS__}, #name, type},
|
||||
#define A32OPC(name, type, ...) Meta{{__VA_ARGS__}, #name, type},
|
||||
#define A64OPC(name, type, ...) Meta{{__VA_ARGS__}, #name, type},
|
||||
struct Meta {
|
||||
std::vector<Type> arg_types;
|
||||
Type type;
|
||||
};
|
||||
|
||||
// Evil macro magic for Intel C++ compiler
|
||||
// Helper macro to force expanding __VA_ARGS__ to satisfy MSVC compiler.
|
||||
#define PP_EXPAND(x) x
|
||||
#define PP_NARGS(...) PP_EXPAND(PP_ARG_N(__VA_ARGS__, 5, 4, 3, 2, 1, 0))
|
||||
#define PP_ARG_N(_1, _2, _3, _4, _5, N, ...) N
|
||||
|
||||
alignas(64) static const Meta opcode_info[] = {
|
||||
#define OPCODE(name, type, ...) Meta{{__VA_ARGS__}, type},
|
||||
#define A32OPC(name, type, ...) Meta{{__VA_ARGS__}, type},
|
||||
#define A64OPC(name, type, ...) Meta{{__VA_ARGS__}, type},
|
||||
#include "./opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
};
|
||||
|
||||
// Be aware of trailing commas, they can cause PP_NARG to return 2!
|
||||
static_assert(PP_EXPAND(PP_NARGS(u8,)) == 2);
|
||||
static_assert(PP_EXPAND(PP_NARGS(u8)) == 1);
|
||||
static_assert(PP_EXPAND(PP_NARGS(u8, u16)) == 2);
|
||||
static_assert(PP_EXPAND(PP_NARGS(u8, u16, u32)) == 3);
|
||||
|
||||
} // namespace OpcodeInfo
|
||||
|
||||
/// @brief Get return type of an opcode
|
||||
Type GetTypeOf(Opcode op) noexcept {
|
||||
return OpcodeInfo::opcode_info.at(size_t(op)).type;
|
||||
return OpcodeInfo::opcode_info[size_t(op)].type;
|
||||
}
|
||||
|
||||
/// @brief Get the number of arguments an opcode accepts
|
||||
size_t GetNumArgsOf(Opcode op) noexcept {
|
||||
return OpcodeInfo::opcode_info.at(size_t(op)).arg_types.size();
|
||||
return OpcodeInfo::opcode_info[size_t(op)].arg_types.size();
|
||||
}
|
||||
|
||||
/// @brief Get the required type of an argument of an opcode
|
||||
Type GetArgTypeOf(Opcode op, size_t arg_index) noexcept {
|
||||
return OpcodeInfo::opcode_info.at(size_t(op)).arg_types.at(arg_index);
|
||||
return OpcodeInfo::opcode_info[size_t(op)].arg_types[arg_index];
|
||||
}
|
||||
|
||||
/// @brief Get the name of an opcode.
|
||||
std::string GetNameOf(Opcode op) noexcept {
|
||||
return OpcodeInfo::opcode_info.at(size_t(op)).name;
|
||||
std::string_view GetNameOf(Opcode op) noexcept {
|
||||
static const std::string_view opcode_names[] = {
|
||||
#define OPCODE(name, type, ...) #name,
|
||||
#define A32OPC(name, type, ...) #name,
|
||||
#define A64OPC(name, type, ...) #name,
|
||||
#include "./opcodes.inc"
|
||||
#undef OPCODE
|
||||
#undef A32OPC
|
||||
#undef A64OPC
|
||||
};
|
||||
return opcode_names[size_t(op)];
|
||||
}
|
||||
|
||||
} // namespace Dynarmic::IR
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
|
||||
namespace Dynarmic::IR {
|
||||
|
||||
enum class Type;
|
||||
enum class Type : u16;
|
||||
|
||||
/// @brief The Opcodes of our intermediate representation.
|
||||
/// Type signatures for each opcode can be found in opcodes.inc
|
||||
|
@ -35,7 +35,7 @@ constexpr size_t OpcodeCount = static_cast<size_t>(Opcode::NUM_OPCODE);
|
|||
Type GetTypeOf(Opcode op) noexcept;
|
||||
size_t GetNumArgsOf(Opcode op) noexcept;
|
||||
Type GetArgTypeOf(Opcode op, size_t arg_index) noexcept;
|
||||
std::string GetNameOf(Opcode op) noexcept;
|
||||
std::string_view GetNameOf(Opcode op) noexcept;
|
||||
|
||||
/// @brief Determines whether or not this instruction performs an arithmetic shift.
|
||||
constexpr bool IsArithmeticShift(const Opcode op) noexcept {
|
||||
|
|
|
@ -18,7 +18,7 @@ namespace Dynarmic::IR {
|
|||
/**
|
||||
* The intermediate representation is typed. These are the used by our IR.
|
||||
*/
|
||||
enum class Type {
|
||||
enum class Type : u16 {
|
||||
Void = 0,
|
||||
A32Reg = 1 << 0,
|
||||
A32ExtReg = 1 << 1,
|
||||
|
|
|
@ -357,7 +357,7 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
uni.ClearPageCache();
|
||||
|
||||
jit_env.ticks_left = ticks_left;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
uni_env.ticks_left = instructions.size(); // Unicorn counts thumb instructions weirdly.
|
||||
uni.Run();
|
||||
|
@ -445,6 +445,9 @@ static void RunTestInstance(Dynarmic::A32::Jit& jit,
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Why the difference? QEMU what are you doing???
|
||||
jit.Regs()[15] = uni.GetRegisters()[15];
|
||||
|
||||
REQUIRE(uni.GetRegisters() == jit.Regs());
|
||||
REQUIRE(uni.GetExtRegs() == jit.ExtRegs());
|
||||
REQUIRE((uni.GetCpsr() & 0xFFFFFDDF) == (jit.Cpsr() & 0xFFFFFDDF));
|
||||
|
|
|
@ -130,7 +130,7 @@ static void RunInstance(size_t run_number, ThumbTestEnv& test_env, A32Unicorn<Th
|
|||
test_env.code_mem_modified_by_guest = false;
|
||||
test_env.modified_memory.clear();
|
||||
test_env.ticks_left = instructions_to_execute_count;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
const bool jit_code_memory_modified = test_env.code_mem_modified_by_guest;
|
||||
const auto jit_write_records = test_env.modified_memory;
|
||||
test_env.code_mem_modified_by_guest = false;
|
||||
|
|
|
@ -38,7 +38,7 @@ TEST_CASE("arm: Opt Failure: Const folding in MostSignificantWord", "[arm][A32]"
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 6;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
// If we don't trigger the GetCarryFromOp ASSERT, we're fine.
|
||||
}
|
||||
|
@ -83,7 +83,7 @@ TEST_CASE("arm: Unintended modification in SetCFlag", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 6;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 0x00000af1);
|
||||
REQUIRE(jit.Regs()[1] == 0x267ea626);
|
||||
|
@ -123,7 +123,7 @@ TEST_CASE("arm: shsax (Edge-case)", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 0x3a3b8b18);
|
||||
REQUIRE(jit.Regs()[1] == 0x96156555);
|
||||
|
@ -162,7 +162,7 @@ TEST_CASE("arm: uasx (Edge-case)", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[4] == 0x8ed38f4c);
|
||||
REQUIRE(jit.Regs()[5] == 0x0000261d);
|
||||
|
@ -200,7 +200,7 @@ TEST_CASE("arm: smuad (Edge-case)", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 0x80000000);
|
||||
REQUIRE(jit.Regs()[1] == 0x80008000);
|
||||
|
@ -222,7 +222,7 @@ TEST_CASE("arm: Test InvalidateCacheRange", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 5);
|
||||
REQUIRE(jit.Regs()[1] == 13);
|
||||
|
@ -238,8 +238,8 @@ TEST_CASE("arm: Test InvalidateCacheRange", "[arm][A32]") {
|
|||
jit.Regs()[15] = 0;
|
||||
|
||||
test_env.ticks_left = 4;
|
||||
jit.Run();
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 5);
|
||||
REQUIRE(jit.Regs()[1] == 7);
|
||||
|
@ -347,7 +347,7 @@ TEST_CASE("arm: Test stepping", "[arm]") {
|
|||
}
|
||||
|
||||
test_env.ticks_left = 20;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[15] == 80);
|
||||
REQUIRE(jit.Cpsr() == 0x000001d0);
|
||||
|
@ -397,7 +397,7 @@ TEST_CASE("arm: Test stepping 2", "[arm]") {
|
|||
}
|
||||
|
||||
test_env.ticks_left = 20;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[15] == 80);
|
||||
REQUIRE(jit.Cpsr() == 0x000001d0);
|
||||
|
@ -427,7 +427,7 @@ TEST_CASE("arm: Test stepping 3", "[arm]") {
|
|||
REQUIRE(jit.Cpsr() == 0x000001d0);
|
||||
|
||||
test_env.ticks_left = 20;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[15] == 20);
|
||||
REQUIRE(jit.Cpsr() == 0x000001d0);
|
||||
|
@ -466,7 +466,7 @@ TEST_CASE("arm: PackedAbsDiffSumS8", "[arm][A32]") {
|
|||
jit.SetCpsr(0xb0000010);
|
||||
|
||||
test_env.ticks_left = 3;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 0xea85297c);
|
||||
REQUIRE(jit.Regs()[1] == 0x417ad918);
|
||||
|
@ -501,7 +501,7 @@ TEST_CASE("arm: vclt.f32 with zero", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.ExtRegs()[6] == 0x00000000);
|
||||
REQUIRE(jit.ExtRegs()[7] == 0x00000000);
|
||||
|
@ -521,7 +521,7 @@ TEST_CASE("arm: vcvt.s16.f64", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.ExtRegs()[16] == 0xffff8000);
|
||||
REQUIRE(jit.ExtRegs()[17] == 0xffffffff);
|
||||
|
@ -558,7 +558,7 @@ TEST_CASE("arm: Memory access (fastmem)", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
env.ticks_left = 3;
|
||||
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(strncmp(backing_memory + 0x100, backing_memory + 0x1F0, 4) == 0);
|
||||
}
|
||||
|
||||
|
@ -581,7 +581,7 @@ TEST_CASE("arm: vmsr, vcmp, vmrs", "[arm][A32]") {
|
|||
jit.SetCpsr(0x60000000); // User-mode
|
||||
|
||||
test_env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
}
|
||||
|
||||
TEST_CASE("arm: sdiv maximally", "[arm][A32]") {
|
||||
|
@ -598,7 +598,7 @@ TEST_CASE("arm: sdiv maximally", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[2] == 0x80000000);
|
||||
}
|
||||
|
@ -637,7 +637,7 @@ TEST_CASE("arm: tbl", "[arm][A32]") {
|
|||
jit.ExtRegs()[23 * 2 + 1] = 0x1F'1E'1D'1C;
|
||||
|
||||
test_env.ticks_left = 5;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.ExtRegs()[16 * 2 + 0] == 0x05'02'01'00);
|
||||
REQUIRE(jit.ExtRegs()[16 * 2 + 1] == 0x00'00'00'00);
|
||||
|
@ -689,7 +689,7 @@ TEST_CASE("arm: tbx", "[arm][A32]") {
|
|||
jit.ExtRegs()[23 * 2 + 1] = 0x1F'1E'1D'1C;
|
||||
|
||||
test_env.ticks_left = 5;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.ExtRegs()[16 * 2 + 0] == 0x05'02'01'00);
|
||||
REQUIRE(jit.ExtRegs()[16 * 2 + 1] == 0x20'1F'10'0F);
|
||||
|
|
|
@ -156,7 +156,7 @@ TEST_CASE("arm: Test coprocessor (Read TPIDRURO)", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[1] == 0xf00d);
|
||||
}
|
||||
|
@ -178,7 +178,7 @@ TEST_CASE("arm: Test coprocessor (Read TPIDRURW)", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[1] == 0xcafe);
|
||||
}
|
||||
|
@ -200,7 +200,7 @@ TEST_CASE("arm: Test coprocessor (Write TPIDRURW)", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(cp15_state.cp15_thread_uprw == 0xaaaa);
|
||||
}
|
||||
|
@ -222,7 +222,7 @@ TEST_CASE("arm: Test coprocessor (DMB)", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(cp15_state.cp15_data_memory_barrier == 1);
|
||||
}
|
||||
|
|
|
@ -31,7 +31,7 @@ TEST_CASE("arm: svc", "[arm][A32]") {
|
|||
jit.SetCpsr(0x000001d0); // User-mode
|
||||
|
||||
test_env.ticks_left = 3;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(test_env.svc_called == 0x1ee);
|
||||
REQUIRE(jit.Regs()[15] == 0x00000008);
|
||||
|
|
|
@ -32,7 +32,7 @@ TEST_CASE("thumb: lsls r0, r1, #2", "[thumb]") {
|
|||
jit.SetCpsr(0x00000030); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 1;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 8);
|
||||
REQUIRE(jit.Regs()[1] == 2);
|
||||
|
@ -54,7 +54,7 @@ TEST_CASE("thumb: lsls r0, r1, #31", "[thumb]") {
|
|||
jit.SetCpsr(0x00000030); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 1;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 0x80000000);
|
||||
REQUIRE(jit.Regs()[1] == 0xffffffff);
|
||||
|
@ -75,7 +75,7 @@ TEST_CASE("thumb: revsh r4, r3", "[thumb]") {
|
|||
jit.SetCpsr(0x00000030); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 1;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[3] == 0x12345678);
|
||||
REQUIRE(jit.Regs()[4] == 0x00007856);
|
||||
|
@ -96,7 +96,7 @@ TEST_CASE("thumb: ldr r3, [r3, #28]", "[thumb]") {
|
|||
jit.SetCpsr(0x00000030); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 1;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[3] == 0x97969594); // Memory location 0x12345694
|
||||
REQUIRE(jit.Regs()[15] == 2);
|
||||
|
@ -115,7 +115,7 @@ TEST_CASE("thumb: blx +#67712", "[thumb]") {
|
|||
jit.SetCpsr(0x00000030); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 1;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[14] == (0x4 | 1));
|
||||
REQUIRE(jit.Regs()[15] == 0x10880);
|
||||
|
@ -134,7 +134,7 @@ TEST_CASE("thumb: bl +#234584", "[thumb]") {
|
|||
jit.SetCpsr(0x00000030); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 1;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[14] == (0x4 | 1));
|
||||
REQUIRE(jit.Regs()[15] == 0x39458);
|
||||
|
@ -153,7 +153,7 @@ TEST_CASE("thumb: bl -#42", "[thumb]") {
|
|||
jit.SetCpsr(0x00000030); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 1;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[14] == (0x4 | 1));
|
||||
REQUIRE(jit.Regs()[15] == 0xFFFFFFD6);
|
||||
|
@ -208,7 +208,7 @@ TEST_CASE("thumb: Opt Failure: Get/Set Elimination for Flags", "[thumb]") {
|
|||
jit.SetCpsr(0x000001f0); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 7;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.Regs()[0] == 0x2154abb5);
|
||||
REQUIRE(jit.Regs()[1] == 0xdbaa6333);
|
||||
|
@ -248,7 +248,7 @@ TEST_CASE("thumb: Opt Failure: Get/Set Elimination for Flags 2", "[thumb]") {
|
|||
jit.SetCpsr(0x000001f0); // Thumb, User-mode
|
||||
|
||||
test_env.ticks_left = 7;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
const std::array<u32, 16> expected = {0x954d53b0, 0x4caaad40, 0xb0afaead, 0x0da0cdb6, 0x0f43507e, 0xb4b3b2b1, 0x00000066, 0x892a6888,
|
||||
0x3b9ffb23, 0x0a92ef93, 0x38dee619, 0xc0e95e81, 0x6a448690, 0xc2d4d6b9, 0xe93600b9, 0x0000000a};
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/interface/A32/a32.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
template<typename InstructionType_, u32 infinite_loop_u32>
|
||||
class A32TestEnv : public Dynarmic::A32::UserCallbacks {
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -8,7 +8,7 @@
|
|||
|
||||
#include <array>
|
||||
#include <exception>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
@ -23,7 +23,7 @@ namespace {
|
|||
class MyEnvironment final : public A64::UserCallbacks {
|
||||
public:
|
||||
u64 ticks_left = 0;
|
||||
std::map<u64, u8> memory{};
|
||||
std::unordered_map<u64, u8> memory{};
|
||||
|
||||
u8 MemoryRead8(u64 vaddr) override {
|
||||
return memory[vaddr];
|
||||
|
|
|
@ -87,7 +87,7 @@ void run_test(u32 instruction, Fn fn) {
|
|||
jit.SetPC(0);
|
||||
|
||||
env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.GetVector(0)[0] == fn(test_case));
|
||||
|
||||
|
@ -97,7 +97,7 @@ void run_test(u32 instruction, Fn fn) {
|
|||
jit.SetPC(0);
|
||||
|
||||
env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.GetVector(0)[0] == fn(test_case));
|
||||
|
||||
|
@ -109,7 +109,7 @@ void run_test(u32 instruction, Fn fn) {
|
|||
jit.SetPC(0);
|
||||
|
||||
env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.GetVector(0)[0] == force_default_nan(fn(test_case)));
|
||||
|
||||
|
@ -119,7 +119,7 @@ void run_test(u32 instruction, Fn fn) {
|
|||
jit.SetPC(0);
|
||||
|
||||
env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.GetVector(0)[0] == force_default_nan(fn(test_case)));
|
||||
}
|
||||
|
@ -136,7 +136,7 @@ void run_test(u32 instruction, Fn fn) {
|
|||
jit.SetPC(0);
|
||||
|
||||
env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.GetVector(0)[0] == fn(test_case));
|
||||
|
||||
|
@ -148,7 +148,7 @@ void run_test(u32 instruction, Fn fn) {
|
|||
jit.SetPC(0);
|
||||
|
||||
env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
REQUIRE(jit.GetVector(0)[0] == force_default_nan(fn(test_case)));
|
||||
}
|
||||
|
|
|
@ -91,6 +91,9 @@ static u32 GenRandomInst(u64 pc, bool is_last_inst) {
|
|||
"MSR_reg",
|
||||
"MSR_imm",
|
||||
"MRS",
|
||||
// Does not need test
|
||||
"SVC",
|
||||
"BRK"
|
||||
};
|
||||
|
||||
for (const auto& [fn, bitstring] : list) {
|
||||
|
@ -198,9 +201,9 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv
|
|||
uni.ClearPageCache();
|
||||
|
||||
jit_env.ticks_left = instructions.size();
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
uni_env.ticks_left = instructions.size();
|
||||
uni_env.ticks_left = instructions.size() * 4;
|
||||
uni.Run();
|
||||
|
||||
SCOPE_FAIL {
|
||||
|
@ -296,7 +299,7 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv
|
|||
return;
|
||||
}
|
||||
|
||||
REQUIRE(uni.GetPC() == jit.GetPC());
|
||||
REQUIRE(uni.GetPC() + 4 == jit.GetPC());
|
||||
REQUIRE(uni.GetRegisters() == jit.GetRegisters());
|
||||
REQUIRE(uni.GetVectors() == jit.GetVectors());
|
||||
REQUIRE(uni.GetSP() == jit.GetSP());
|
||||
|
@ -306,7 +309,7 @@ static void RunTestInstance(Dynarmic::A64::Jit& jit, A64Unicorn& uni, A64TestEnv
|
|||
REQUIRE(FP::FPSR{uni.GetFpsr()}.QC() == FP::FPSR{jit.GetFpsr()}.QC());
|
||||
}
|
||||
|
||||
TEST_CASE("A64: Single random instruction", "[a64]") {
|
||||
TEST_CASE("A64: Single random instruction", "[a64][unicorn]") {
|
||||
A64TestEnv jit_env{};
|
||||
A64TestEnv uni_env{};
|
||||
|
||||
|
@ -333,7 +336,7 @@ TEST_CASE("A64: Single random instruction", "[a64]") {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE("A64: Floating point instructions", "[a64]") {
|
||||
TEST_CASE("A64: Floating point instructions", "[a64][unicorn]") {
|
||||
A64TestEnv jit_env{};
|
||||
A64TestEnv uni_env{};
|
||||
|
||||
|
@ -458,7 +461,7 @@ TEST_CASE("A64: Floating point instructions", "[a64]") {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE("A64: Small random block", "[a64]") {
|
||||
TEST_CASE("A64: Small random block", "[a64][unicorn]") {
|
||||
A64TestEnv jit_env{};
|
||||
A64TestEnv uni_env{};
|
||||
|
||||
|
@ -493,7 +496,7 @@ TEST_CASE("A64: Small random block", "[a64]") {
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE("A64: Large random block", "[a64]") {
|
||||
TEST_CASE("A64: Large random block", "[a64][unicorn]") {
|
||||
A64TestEnv jit_env{};
|
||||
A64TestEnv uni_env{};
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ TEST_CASE("misaligned load/store do not use page_table when detect_misaligned_ac
|
|||
jit.SetRegister(0, 0x000000000b0afff8);
|
||||
|
||||
env.ticks_left = 2;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
// If we don't crash we're fine.
|
||||
}
|
||||
|
|
102
src/dynarmic/tests/A64/real_world.cpp
Normal file
102
src/dynarmic/tests/A64/real_world.cpp
Normal file
File diff suppressed because one or more lines are too long
|
@ -27,38 +27,38 @@ TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have
|
|||
|
||||
jit.SetPC(100);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 42);
|
||||
|
||||
jit.SetPC(100);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 42);
|
||||
|
||||
jit.InvalidateCacheRange(108, 4);
|
||||
|
||||
jit.SetPC(100);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 42);
|
||||
|
||||
env.code_mem[2] = 0xd28008a0; // MOV X0, 69
|
||||
|
||||
jit.SetPC(100);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 42);
|
||||
|
||||
jit.InvalidateCacheRange(108, 4);
|
||||
|
||||
jit.SetPC(100);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 69);
|
||||
|
||||
jit.SetPC(100);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 69);
|
||||
}
|
||||
|
||||
|
@ -77,37 +77,37 @@ TEST_CASE("ensure fast dispatch entry is cleared even when a block does not have
|
|||
|
||||
jit.SetPC(0);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 42);
|
||||
|
||||
jit.SetPC(0);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 42);
|
||||
|
||||
jit.InvalidateCacheRange(8, 4);
|
||||
|
||||
jit.SetPC(0);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 42);
|
||||
|
||||
env.code_mem[2] = 0xd28008a0; // MOV X0, 69
|
||||
|
||||
jit.SetPC(0);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 42);
|
||||
|
||||
jit.InvalidateCacheRange(8, 4);
|
||||
|
||||
jit.SetPC(0);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 69);
|
||||
|
||||
jit.SetPC(0);
|
||||
env.ticks_left = 4;
|
||||
jit.Run();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
REQUIRE(jit.GetRegister(0) == 69);
|
||||
}
|
||||
|
|
|
@ -8,13 +8,11 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <map>
|
||||
|
||||
#include <unordered_map>
|
||||
#include "dynarmic/common/assert.h"
|
||||
#include "dynarmic/common/common_types.h"
|
||||
|
||||
#include "dynarmic/interface/A64/a64.h"
|
||||
#include "../native/testenv.h"
|
||||
|
||||
using Vector = Dynarmic::A64::Vector;
|
||||
|
||||
|
@ -26,7 +24,7 @@ public:
|
|||
u64 code_mem_start_address = 0;
|
||||
std::vector<u32> code_mem;
|
||||
|
||||
std::map<u64, u8> modified_memory;
|
||||
std::unordered_map<u64, u8> modified_memory;
|
||||
std::vector<std::string> interrupts;
|
||||
|
||||
bool IsInCodeMem(u64 vaddr) const {
|
||||
|
@ -133,9 +131,9 @@ class A64FastmemTestEnv final : public Dynarmic::A64::UserCallbacks {
|
|||
public:
|
||||
u64 ticks_left = 0;
|
||||
char* backing_memory = nullptr;
|
||||
bool ignore_invalid_insn = false;
|
||||
|
||||
explicit A64FastmemTestEnv(char* addr)
|
||||
: backing_memory(addr) {}
|
||||
explicit A64FastmemTestEnv(char* addr) : backing_memory(addr) {}
|
||||
|
||||
template<typename T>
|
||||
T read(u64 vaddr) {
|
||||
|
@ -205,7 +203,7 @@ public:
|
|||
return true;
|
||||
}
|
||||
|
||||
void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(false, "InterpreterFallback({:016x}, {})", pc, num_instructions); }
|
||||
void InterpreterFallback(u64 pc, size_t num_instructions) override { ASSERT_MSG(ignore_invalid_insn, "InterpreterFallback({:016x}, {})", pc, num_instructions); }
|
||||
|
||||
void CallSVC(std::uint32_t swi) override { ASSERT_MSG(false, "CallSVC({})", swi); }
|
||||
|
||||
|
|
|
@ -13,7 +13,7 @@
|
|||
|
||||
using namespace Dynarmic;
|
||||
|
||||
TEST_CASE("Unicorn: Sanity test", "[a64]") {
|
||||
TEST_CASE("Unicorn: Sanity test", "[a64][unicorn]") {
|
||||
A64TestEnv env;
|
||||
|
||||
env.code_mem.emplace_back(0x8b020020); // ADD X0, X1, X2
|
||||
|
@ -39,7 +39,7 @@ TEST_CASE("Unicorn: Sanity test", "[a64]") {
|
|||
REQUIRE(unicorn.GetPC() == 4);
|
||||
}
|
||||
|
||||
TEST_CASE("Unicorn: Ensure 0xFFFF'FFFF'FFFF'FFFF is readable", "[a64]") {
|
||||
TEST_CASE("Unicorn: Ensure 0xFFFF'FFFF'FFFF'FFFF is readable", "[a64][unicorn]") {
|
||||
A64TestEnv env;
|
||||
|
||||
env.code_mem.emplace_back(0x385fed99); // LDRB W25, [X12, #0xfffffffffffffffe]!
|
||||
|
@ -59,7 +59,7 @@ TEST_CASE("Unicorn: Ensure 0xFFFF'FFFF'FFFF'FFFF is readable", "[a64]") {
|
|||
REQUIRE(unicorn.GetPC() == 4);
|
||||
}
|
||||
|
||||
TEST_CASE("Unicorn: Ensure is able to read across page boundaries", "[a64]") {
|
||||
TEST_CASE("Unicorn: Ensure is able to read across page boundaries", "[a64][unicorn]") {
|
||||
A64TestEnv env;
|
||||
|
||||
env.code_mem.emplace_back(0xb85f93d9); // LDUR W25, [X30, #0xfffffffffffffff9]
|
||||
|
|
|
@ -29,6 +29,7 @@ if ("A64" IN_LIST DYNARMIC_FRONTENDS)
|
|||
A64/fp_min_max.cpp
|
||||
A64/misaligned_page_table.cpp
|
||||
A64/test_invalidation.cpp
|
||||
A64/real_world.cpp
|
||||
A64/testenv.h
|
||||
)
|
||||
endif()
|
||||
|
@ -66,11 +67,14 @@ endif()
|
|||
|
||||
if ("x86_64" IN_LIST ARCHITECTURE)
|
||||
target_link_libraries(dynarmic_tests PRIVATE xbyak::xbyak)
|
||||
|
||||
target_architecture_specific_sources(dynarmic_tests "x86_64"
|
||||
x64_cpu_info.cpp
|
||||
)
|
||||
|
||||
target_architecture_specific_sources(dynarmic_tests "x86_64"
|
||||
native/preserve_xmm.cpp
|
||||
)
|
||||
|
||||
if (NOT MSVC AND NOT DYNARMIC_MULTIARCH_BUILD)
|
||||
target_sources(dynarmic_tests PRIVATE
|
||||
rsqrt_test.cpp
|
||||
|
@ -129,4 +133,6 @@ target_include_directories(dynarmic_tests PRIVATE . ../src)
|
|||
target_compile_options(dynarmic_tests PRIVATE ${DYNARMIC_CXX_FLAGS})
|
||||
target_compile_definitions(dynarmic_tests PRIVATE FMT_USE_USER_DEFINED_LITERALS=1)
|
||||
|
||||
target_compile_options(dynarmic_tests PRIVATE -mavx2)
|
||||
|
||||
add_test(dynarmic_tests dynarmic_tests --durations yes)
|
||||
|
|
64
src/dynarmic/tests/native/preserve_xmm.cpp
Normal file
64
src/dynarmic/tests/native/preserve_xmm.cpp
Normal file
|
@ -0,0 +1,64 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "../A64/testenv.h"
|
||||
#include "dynarmic/common/fp/fpsr.h"
|
||||
#include "dynarmic/interface/exclusive_monitor.h"
|
||||
|
||||
using namespace Dynarmic;
|
||||
using namespace oaknut::util;
|
||||
|
||||
TEST_CASE("X86: Preserve XMM regs", "[x86]") {
|
||||
A64TestEnv env;
|
||||
A64::UserConfig jit_user_config{};
|
||||
jit_user_config.callbacks = &env;
|
||||
A64::Jit jit{jit_user_config};
|
||||
|
||||
oaknut::VectorCodeGenerator code{env.code_mem, nullptr};
|
||||
code.SMINP(V2.S2(), V0.S2(), V1.S2());
|
||||
code.UMINP(V3.S2(), V0.S2(), V1.S2());
|
||||
code.SMINP(V4.S4(), V0.S4(), V1.S4());
|
||||
code.UMINP(V5.S4(), V0.S4(), V1.S4());
|
||||
code.SMAXP(V6.S2(), V0.S2(), V1.S2());
|
||||
code.UMAXP(V7.S2(), V0.S2(), V1.S2());
|
||||
code.SMAXP(V8.S4(), V0.S4(), V1.S4());
|
||||
code.UMAXP(V9.S4(), V0.S4(), V1.S4());
|
||||
|
||||
constexpr std::array<Vector, 12> vectors = {
|
||||
// initial input vectors [0-1]
|
||||
Vector{0x00000003'00000002, 0xF1234567'01234567},
|
||||
Vector{0x80000000'7FFFFFFF, 0x76543210'76543209},
|
||||
// expected output vectors [2-9]
|
||||
Vector{0x80000000'00000002, 0},
|
||||
Vector{0x7FFFFFFF'00000002, 0},
|
||||
Vector{0xF1234567'00000002, 0x76543209'80000000},
|
||||
Vector{0x01234567'00000002, 0x76543209'7FFFFFFF},
|
||||
Vector{0x7FFFFFFF'00000003, 0},
|
||||
Vector{0x80000000'00000003, 0},
|
||||
Vector{0x01234567'00000003, 0x76543210'7FFFFFFF},
|
||||
Vector{0xF1234567'00000003, 0x76543210'80000000},
|
||||
// input vectors with elements swapped pairwise [10-11]
|
||||
Vector{0x00000002'00000003, 0x01234567'F1234567},
|
||||
Vector{0x7FFFFFFF'80000000, 0x76543209'76543210},
|
||||
};
|
||||
|
||||
jit.SetPC(0);
|
||||
jit.SetVector(0, vectors[0]);
|
||||
jit.SetVector(1, vectors[1]);
|
||||
|
||||
env.ticks_left = env.code_mem.size();
|
||||
CheckedRun([&]() { jit.Run(); });
|
||||
|
||||
CHECK(jit.GetVector(2) == vectors[2]);
|
||||
CHECK(jit.GetVector(3) == vectors[3]);
|
||||
CHECK(jit.GetVector(4) == vectors[4]);
|
||||
CHECK(jit.GetVector(5) == vectors[5]);
|
||||
CHECK(jit.GetVector(6) == vectors[6]);
|
||||
CHECK(jit.GetVector(7) == vectors[7]);
|
||||
CHECK(jit.GetVector(8) == vectors[8]);
|
||||
CHECK(jit.GetVector(9) == vectors[9]);
|
||||
}
|
50
src/dynarmic/tests/native/testenv.h
Normal file
50
src/dynarmic/tests/native/testenv.h
Normal file
|
@ -0,0 +1,50 @@
|
|||
#pragma once
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
#ifdef __AVX__
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
template<typename F>
|
||||
void CheckedRun(F&& fn) {
|
||||
#ifdef __AVX__
|
||||
__m256i xmm0 = _mm256_set_epi32(0, 0, 0, 0, 0, 0, 0, 0);
|
||||
__m256i xmm1 = _mm256_set_epi32(1, 1, 0, 0, 0, 0, 0, 1);
|
||||
__m256i xmm2 = _mm256_set_epi32(2, 2, 0, 0, 0, 0, 0, 2);
|
||||
__m256i xmm3 = _mm256_set_epi32(3, 3, 0, 0, 0, 0, 0, 3);
|
||||
__m256i xmm4 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 4);
|
||||
__m256i xmm5 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 5);
|
||||
__m256i xmm6 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 6);
|
||||
__m256i xmm7 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 7);
|
||||
__m256i xmm8 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 8);
|
||||
__m256i xmm9 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 9);
|
||||
__m256i xmm10 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 10);
|
||||
__m256i xmm11 = _mm256_set_epi32(4, 4, 0, 0, 0, 0, 0, 11);
|
||||
asm volatile(""
|
||||
: "+x"(xmm0), "+x"(xmm1), "+x"(xmm2), "+x"(xmm3)
|
||||
, "+x"(xmm4), "+x"(xmm5), "+x"(xmm6), "+x"(xmm7)
|
||||
, "+x"(xmm8), "+x"(xmm9), "+x"(xmm10), "+x"(xmm11)
|
||||
:
|
||||
);
|
||||
fn();
|
||||
asm volatile(""
|
||||
: "+x"(xmm0), "+x"(xmm1), "+x"(xmm2), "+x"(xmm3)
|
||||
, "+x"(xmm4), "+x"(xmm5), "+x"(xmm6), "+x"(xmm7)
|
||||
, "+x"(xmm8), "+x"(xmm9), "+x"(xmm10), "+x"(xmm11)
|
||||
:
|
||||
);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm0[0]) == 0);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm1[0]) == 1);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm2[0]) == 2);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm3[0]) == 3);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm4[0]) == 4);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm5[0]) == 5);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm6[0]) == 6);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm7[0]) == 7);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm8[0]) == 8);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm9[0]) == 9);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm10[0]) == 10);
|
||||
CHECK(std::bit_cast<std::uint64_t>(xmm11[0]) == 11);
|
||||
#else
|
||||
fn();
|
||||
#endif
|
||||
}
|
|
@ -173,7 +173,7 @@ void A64Unicorn::InterruptHook(uc_engine* uc, u32 int_number, void* user_data) {
|
|||
auto* this_ = static_cast<A64Unicorn*>(user_data);
|
||||
|
||||
u32 esr;
|
||||
CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR, &esr));
|
||||
//CHECKED(uc_reg_read(uc, UC_ARM64_REG_ESR_EL0, &esr));
|
||||
|
||||
auto ec = esr >> 26;
|
||||
auto iss = esr & 0xFFFFFF;
|
||||
|
|
|
@ -28,11 +28,10 @@ public:
|
|||
for (u64 page = page_start; page < page_end; ++page) {
|
||||
int& value = page_table[page];
|
||||
value += delta;
|
||||
if (value < 0) {
|
||||
throw std::logic_error{"negative page"};
|
||||
}
|
||||
if (value == 0) {
|
||||
page_table.erase(page);
|
||||
} else if (value < 0) {
|
||||
throw std::logic_error{"negative page"};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -143,6 +143,10 @@ public:
|
|||
return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0;
|
||||
}
|
||||
|
||||
[[nodiscard]] bool IsEmpty() const noexcept {
|
||||
return commits.empty();
|
||||
}
|
||||
|
||||
private:
|
||||
[[nodiscard]] static constexpr u32 ShiftType(u32 type) {
|
||||
return 1U << type;
|
||||
|
@ -290,36 +294,117 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, M
|
|||
if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) {
|
||||
return std::move(*commit);
|
||||
}
|
||||
// Commit has failed, allocate more memory.
|
||||
const u64 chunk_size = AllocationChunkSize(requirements.size);
|
||||
if (!TryAllocMemory(flags, type_mask, chunk_size)) {
|
||||
// TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
|
||||
// Commit has failed, try progressive fallback strategy
|
||||
u64 chunk_size = AllocationChunkSize(requirements.size);
|
||||
const u64 minimum_size = std::max<u64>(requirements.size, 4ULL << 20); // 4MB minimum
|
||||
|
||||
// try 1: Try allocating with original chunk size
|
||||
if (TryAllocMemory(flags, type_mask, chunk_size)) {
|
||||
return TryCommit(requirements, flags).value();
|
||||
}
|
||||
// Commit again, this time it won't fail since there's a fresh allocation above.
|
||||
// If it does, there's a bug.
|
||||
return TryCommit(requirements, flags).value();
|
||||
|
||||
// try 2: Clean up empty allocations and try again
|
||||
bool cleaned_up = false;
|
||||
for (auto it = allocations.begin(); it != allocations.end();) {
|
||||
if ((*it)->IsEmpty()) {
|
||||
it = allocations.erase(it);
|
||||
cleaned_up = true;
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
|
||||
if (cleaned_up && TryAllocMemory(flags, type_mask, chunk_size)) {
|
||||
LOG_INFO(Render_Vulkan, "Memory allocation succeeded after cleanup");
|
||||
return TryCommit(requirements, flags).value();
|
||||
}
|
||||
|
||||
// try 3: Progressive size reduction with cleanup between attempts
|
||||
while (chunk_size > minimum_size) {
|
||||
chunk_size >>= 1; // Halve the chunk size
|
||||
chunk_size = std::max(chunk_size, minimum_size);
|
||||
|
||||
if (TryAllocMemory(flags, type_mask, chunk_size)) {
|
||||
LOG_WARNING(Render_Vulkan, "Memory allocation succeeded with reduced chunk size: {} MB",
|
||||
chunk_size >> 20);
|
||||
return TryCommit(requirements, flags).value();
|
||||
}
|
||||
|
||||
// Clean up again between size reduction attempts
|
||||
for (auto it = allocations.begin(); it != allocations.end();) {
|
||||
if ((*it)->IsEmpty()) {
|
||||
it = allocations.erase(it);
|
||||
} else {
|
||||
++it;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// try 4: Try minimum size allocation
|
||||
if (chunk_size <= minimum_size && TryAllocMemory(flags, type_mask, minimum_size)) {
|
||||
LOG_WARNING(Render_Vulkan, "Memory allocation succeeded with minimum size: {} MB",
|
||||
minimum_size >> 20);
|
||||
return TryCommit(requirements, flags).value();
|
||||
}
|
||||
// try 5: Fallback to non-device-local memory if original was device-local
|
||||
if (flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
|
||||
const VkMemoryPropertyFlags fallback_flags = flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
|
||||
// Try with original chunk size first
|
||||
u64 fallback_chunk_size = AllocationChunkSize(requirements.size);
|
||||
if (TryAllocMemory(fallback_flags, type_mask, fallback_chunk_size)) {
|
||||
if (auto commit = TryCommit(requirements, fallback_flags)) {
|
||||
LOG_WARNING(Render_Vulkan, "Falling back to non-device-local memory due to OOM");
|
||||
return std::move(*commit);
|
||||
}
|
||||
}
|
||||
|
||||
// Progressive size reduction for non-device-local memory
|
||||
while (fallback_chunk_size > minimum_size) {
|
||||
fallback_chunk_size >>= 1;
|
||||
fallback_chunk_size = std::max(fallback_chunk_size, minimum_size);
|
||||
|
||||
if (TryAllocMemory(fallback_flags, type_mask, fallback_chunk_size)) {
|
||||
if (auto commit = TryCommit(requirements, fallback_flags)) {
|
||||
LOG_WARNING(Render_Vulkan,
|
||||
"Falling back to non-device-local memory with reduced size: {} MB",
|
||||
fallback_chunk_size >> 20);
|
||||
return std::move(*commit);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
LOG_CRITICAL(Render_Vulkan, "Vulkan memory allocation failed - exhausted all strategies");
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
|
||||
bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
|
||||
const u32 type = FindType(flags, type_mask).value();
|
||||
const auto type_opt = FindType(flags, type_mask);
|
||||
if (!type_opt) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Adreno requires 4KB alignment(subject to review)
|
||||
const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ?
|
||||
Common::AlignUp(size, 4096) :
|
||||
size;
|
||||
|
||||
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.allocationSize = size,
|
||||
.memoryTypeIndex = type,
|
||||
.allocationSize = aligned_size,
|
||||
.memoryTypeIndex = *type_opt,
|
||||
});
|
||||
|
||||
if (!memory) {
|
||||
if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
|
||||
// Try to allocate non device local memory
|
||||
return TryAllocMemory(flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, type_mask, size);
|
||||
} else {
|
||||
// RIP
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
allocations.push_back(
|
||||
std::make_unique<MemoryAllocation>(this, std::move(memory), flags, size, type));
|
||||
std::make_unique<MemoryAllocation>(this, std::move(memory), flags, aligned_size, *type_opt));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -331,11 +416,25 @@ void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
|
|||
|
||||
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
|
||||
VkMemoryPropertyFlags flags) {
|
||||
// Conservative, spec-compliant alignment for suballocation
|
||||
VkDeviceSize eff_align = requirements.alignment;
|
||||
const auto& limits = device.GetPhysical().GetProperties().limits;
|
||||
if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
|
||||
!(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
|
||||
// Non-coherent memory must be invalidated on atom boundary
|
||||
if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize;
|
||||
}
|
||||
// Separate buffers to avoid stalls on tilers
|
||||
if (buffer_image_granularity > eff_align) {
|
||||
eff_align = buffer_image_granularity;
|
||||
}
|
||||
eff_align = std::bit_ceil(eff_align);
|
||||
|
||||
for (auto& allocation : allocations) {
|
||||
if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) {
|
||||
continue;
|
||||
}
|
||||
if (auto commit = allocation->Commit(requirements.size, requirements.alignment)) {
|
||||
if (auto commit = allocation->Commit(requirements.size, eff_align)) {
|
||||
return commit;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue