Compare commits
25 commits
master
...
liz-dynarm
Author | SHA1 | Date | |
---|---|---|---|
65309cad44 | |||
98885623cb | |||
e7be3e1770 | |||
e6f6c1d325 | |||
18d3b95ce8 | |||
d188725514 | |||
1bf7373c79 | |||
ab4d7ed53b | |||
69887c91bb | |||
256cdca636 | |||
da7dc2885a | |||
53c705c09e | |||
f19a66d2d3 | |||
bdf1d80544 | |||
4622d869e3 | |||
5cd245d445 | |||
29582620a7 | |||
b403aedab9 | |||
f43bc8e15c | |||
25e8088e1f | |||
cb7a67f6d5 | |||
6eb2de327b | |||
4b1b30712c | |||
4f5ab82eca | |||
782f9adc25 |
18 changed files with 324 additions and 159 deletions
|
@ -168,6 +168,8 @@ set(YUZU_QT_MIRROR "" CACHE STRING "What mirror to use for downloading the bundl
|
||||||
|
|
||||||
option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
|
option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
|
||||||
|
|
||||||
|
# See https://github.com/llvm/llvm-project/issues/123946
|
||||||
|
# OpenBSD va_list doesn't play nice with precompiled headers
|
||||||
set(EXT_DEFAULT OFF)
|
set(EXT_DEFAULT OFF)
|
||||||
if (MSVC OR ANDROID)
|
if (MSVC OR ANDROID)
|
||||||
set(EXT_DEFAULT ON)
|
set(EXT_DEFAULT ON)
|
||||||
|
@ -338,7 +340,7 @@ if (YUZU_LEGACY)
|
||||||
add_compile_definitions(YUZU_LEGACY)
|
add_compile_definitions(YUZU_LEGACY)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (ARCHITECTURE_arm64 AND (ANDROID OR PLATFORM_LINUX))
|
if (ARCHITECTURE_arm64 AND (ANDROID OR APPLE OR PLATFORM_LINUX))
|
||||||
set(HAS_NCE 1)
|
set(HAS_NCE 1)
|
||||||
add_compile_definitions(HAS_NCE=1)
|
add_compile_definitions(HAS_NCE=1)
|
||||||
endif()
|
endif()
|
||||||
|
@ -483,7 +485,6 @@ if (YUZU_USE_CPM)
|
||||||
|
|
||||||
# Opus
|
# Opus
|
||||||
AddJsonPackage(opus)
|
AddJsonPackage(opus)
|
||||||
|
|
||||||
if (Opus_ADDED)
|
if (Opus_ADDED)
|
||||||
if (MSVC AND CXX_CLANG)
|
if (MSVC AND CXX_CLANG)
|
||||||
target_compile_options(opus PRIVATE
|
target_compile_options(opus PRIVATE
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# SPDX-FileCopyrightText: Copyright 2025 crueter
|
# SPDX-FileCopyrightText: Copyright 2025 crueter
|
||||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
cmake_minimum_required(VERSION 3.22)
|
||||||
|
|
||||||
if (MSVC OR ANDROID)
|
if (MSVC OR ANDROID)
|
||||||
set(BUNDLED_DEFAULT ON)
|
set(BUNDLED_DEFAULT ON)
|
||||||
else()
|
else()
|
||||||
|
@ -13,7 +15,6 @@ option(CPMUTIL_FORCE_BUNDLED
|
||||||
option(CPMUTIL_FORCE_SYSTEM
|
option(CPMUTIL_FORCE_SYSTEM
|
||||||
"Force system packages for all CPM dependencies (NOT RECOMMENDED)" OFF)
|
"Force system packages for all CPM dependencies (NOT RECOMMENDED)" OFF)
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.22)
|
|
||||||
include(CPM)
|
include(CPM)
|
||||||
|
|
||||||
# cpmfile parsing
|
# cpmfile parsing
|
||||||
|
|
8
docs/CrossCompileARM64.md
Normal file
8
docs/CrossCompileARM64.md
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
# Cross compile ARM64
|
||||||
|
|
||||||
|
A painless guide for cross compilation (or to test NCE) from a x86_64 system without polluting your main.
|
||||||
|
|
||||||
|
- Install QEMU: `sudo pkg install qemu`
|
||||||
|
- Download Debian 13: `wget https://cdimage.debian.org/debian-cd/current/arm64/iso-cd/debian-13.0.0-arm64-netinst.iso`
|
||||||
|
- Create a system disk: `qemu-img create -f qcow2 debian-13-arm64-ci.qcow2 30G`
|
||||||
|
- Run the VM: `qemu-system-aarch64 -M virt -m 2G -cpu max -bios /usr/local/share/qemu/edk2-aarch64-code.fd -drive if=none,file=debian-13.0.0-arm64-netinst.iso,format=raw,id=cdrom -device scsi-cd,drive=cdrom -drive if=none,file=debian-13-arm64-ci.qcow2,id=hd0,format=qcow2 -device virtio-blk-device,drive=hd0 -device virtio-gpu-pci -device usb-ehci -device usb-kbd -device intel-hda -device hda-output -nic user,model=virtio-net-pci`
|
|
@ -276,4 +276,13 @@ if(YUZU_USE_PRECOMPILED_HEADERS)
|
||||||
target_precompile_headers(common PRIVATE precompiled_headers.h)
|
target_precompile_headers(common PRIVATE precompiled_headers.h)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
# IOPS (needed for power state) requires linking to IOKit
|
||||||
|
if (APPLE)
|
||||||
|
find_library(IOKIT_LIBRARY IOKit)
|
||||||
|
if(NOT IOKIT_LIBRARY)
|
||||||
|
message(FATAL_ERROR "IOKit not found, did you install XCode tools?")
|
||||||
|
endif()
|
||||||
|
target_link_libraries(common PRIVATE ${IOKIT_LIBRARY})
|
||||||
|
endif()
|
||||||
|
|
||||||
create_target_directory_groups(common)
|
create_target_directory_groups(common)
|
||||||
|
|
|
@ -30,6 +30,7 @@
|
||||||
#include <sys/random.h>
|
#include <sys/random.h>
|
||||||
#include <mach/vm_map.h>
|
#include <mach/vm_map.h>
|
||||||
#include <mach/mach.h>
|
#include <mach/mach.h>
|
||||||
|
#include <map>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// FreeBSD
|
// FreeBSD
|
||||||
|
@ -399,18 +400,65 @@ private:
|
||||||
#ifdef ARCHITECTURE_arm64
|
#ifdef ARCHITECTURE_arm64
|
||||||
|
|
||||||
static void* ChooseVirtualBase(size_t virtual_size) {
|
static void* ChooseVirtualBase(size_t virtual_size) {
|
||||||
constexpr uintptr_t Map39BitSize = (1ULL << 39);
|
|
||||||
constexpr uintptr_t Map36BitSize = (1ULL << 36);
|
constexpr uintptr_t Map36BitSize = (1ULL << 36);
|
||||||
|
#ifdef __APPLE__
|
||||||
// This is not a cryptographic application, we just want something random.
|
// TODO: Fatal flaw, regions may change if map inserts elements, very rare, but MAY HAPPEN!
|
||||||
std::mt19937_64 rng;
|
std::map<u64, u64> aspace_list;
|
||||||
|
kern_return_t krc = KERN_SUCCESS;
|
||||||
|
vm_address_t address = 0;
|
||||||
|
vm_size_t r_size = 0;
|
||||||
|
uint32_t depth = 1;
|
||||||
|
do {
|
||||||
|
struct vm_region_submap_info_64 info;
|
||||||
|
mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
|
||||||
|
krc = vm_region_recurse_64(mach_task_self(), &address, &r_size, &depth, (vm_region_info_64_t)&info, &count);
|
||||||
|
if (krc == KERN_INVALID_ADDRESS)
|
||||||
|
break;
|
||||||
|
if (info.is_submap){
|
||||||
|
depth++;
|
||||||
|
} else {
|
||||||
|
aspace_list.insert({ u64(address), u64(r_size) });
|
||||||
|
address += r_size;
|
||||||
|
}
|
||||||
|
} while(1);
|
||||||
|
for (auto it = aspace_list.begin(); it != aspace_list.end(); it++) {
|
||||||
|
auto const next = std::next(it);
|
||||||
|
// properties of hole
|
||||||
|
auto const addr = it->first + it->second;
|
||||||
|
auto const size = (next != aspace_list.end()) ? next->first - addr : std::numeric_limits<u64>::max() - addr;
|
||||||
|
ASSERT(next == aspace_list.end() || it->first < next->first);
|
||||||
|
if (size > virtual_size && uintptr_t(addr + size) >= Map36BitSize) {
|
||||||
|
// valid address for NCE
|
||||||
|
if (uintptr_t(addr) >= Map36BitSize) { //common case: hole after 39 bit
|
||||||
|
if (size >= virtual_size) {
|
||||||
|
void* p = mmap(reinterpret_cast<void*>(addr), virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
if (p == MAP_FAILED)
|
||||||
|
continue;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
// skip
|
||||||
|
} else { //edge case: hole before 39 bit
|
||||||
|
auto const rem_size = size - (Map36BitSize - uintptr_t(addr));
|
||||||
|
if (rem_size >= virtual_size) {
|
||||||
|
void* p = mmap(reinterpret_cast<void*>(Map36BitSize), virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
if (p == MAP_FAILED)
|
||||||
|
continue;
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
// skip
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
UNREACHABLE();
|
||||||
|
#else
|
||||||
|
constexpr uintptr_t Map39BitSize = (1ULL << 39);
|
||||||
// We want to ensure we are allocating at an address aligned to the L2 block size.
|
// We want to ensure we are allocating at an address aligned to the L2 block size.
|
||||||
// For Qualcomm devices, we must also allocate memory above 36 bits.
|
// For Qualcomm devices, we must also allocate memory above 36 bits.
|
||||||
const size_t lower = Map36BitSize / HugePageSize;
|
const size_t lower = Map36BitSize / HugePageSize;
|
||||||
const size_t upper = (Map39BitSize - virtual_size) / HugePageSize;
|
const size_t upper = (Map39BitSize - virtual_size) / HugePageSize;
|
||||||
const size_t range = upper - lower;
|
const size_t range = upper - lower;
|
||||||
|
// This is not a cryptographic application, we just want something random.
|
||||||
|
std::mt19937_64 rng;
|
||||||
// Try up to 64 times to allocate memory at random addresses in the range.
|
// Try up to 64 times to allocate memory at random addresses in the range.
|
||||||
for (int i = 0; i < 64; i++) {
|
for (int i = 0; i < 64; i++) {
|
||||||
// Calculate a possible location.
|
// Calculate a possible location.
|
||||||
|
@ -434,6 +482,7 @@ static void* ChooseVirtualBase(size_t virtual_size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
return MAP_FAILED;
|
return MAP_FAILED;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
@ -500,9 +549,10 @@ class HostMemory::Impl {
|
||||||
public:
|
public:
|
||||||
explicit Impl(size_t backing_size_, size_t virtual_size_)
|
explicit Impl(size_t backing_size_, size_t virtual_size_)
|
||||||
: backing_size{backing_size_}, virtual_size{virtual_size_} {
|
: backing_size{backing_size_}, virtual_size{virtual_size_} {
|
||||||
long page_size = sysconf(_SC_PAGESIZE);
|
// TODO: Solve all 4k paging issues
|
||||||
ASSERT_MSG(page_size == 0x1000, "page size {:#x} is incompatible with 4K paging",
|
//long page_size = sysconf(_SC_PAGESIZE);
|
||||||
page_size);
|
//ASSERT_MSG(page_size == 0x1000, "page size {:#x} is incompatible with 4K paging",
|
||||||
|
// page_size);
|
||||||
// Backing memory initialization
|
// Backing memory initialization
|
||||||
#if defined(__sun__) || defined(__HAIKU__) || defined(__NetBSD__) || defined(__DragonFly__)
|
#if defined(__sun__) || defined(__HAIKU__) || defined(__NetBSD__) || defined(__DragonFly__)
|
||||||
fd = shm_open_anon(O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, 0600);
|
fd = shm_open_anon(O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, 0600);
|
||||||
|
|
|
@ -10,27 +10,16 @@
|
||||||
|
|
||||||
namespace Common {
|
namespace Common {
|
||||||
|
|
||||||
|
#ifdef __ANDROID__
|
||||||
template <typename T>
|
template <typename T>
|
||||||
T* LookupLibcSymbol(const char* name) {
|
T* LookupLibcSymbol(const char* name) {
|
||||||
#if defined(__BIONIC__)
|
|
||||||
Common::DynamicLibrary provider("libc.so");
|
Common::DynamicLibrary provider("libc.so");
|
||||||
if (!provider.IsOpen()) {
|
ASSERT_MSG(provider.IsOpen(), "Failed to open libc!");
|
||||||
UNREACHABLE_MSG("Failed to open libc!");
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// For other operating environments, we assume the symbol is not overridden.
|
|
||||||
const char* base = nullptr;
|
|
||||||
Common::DynamicLibrary provider(base);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void* sym = provider.GetSymbolAddress(name);
|
void* sym = provider.GetSymbolAddress(name);
|
||||||
if (sym == nullptr) {
|
if (sym == nullptr) {
|
||||||
sym = dlsym(RTLD_DEFAULT, name);
|
sym = dlsym(RTLD_DEFAULT, name);
|
||||||
}
|
}
|
||||||
if (sym == nullptr) {
|
ASSERT_MSG(sym != nullptr, "Unable to find symbol {}!", name);
|
||||||
UNREACHABLE_MSG("Unable to find symbol {}!", name);
|
|
||||||
}
|
|
||||||
|
|
||||||
return reinterpret_cast<T*>(sym);
|
return reinterpret_cast<T*>(sym);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,5 +27,10 @@ int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact)
|
||||||
static auto libc_sigaction = LookupLibcSymbol<decltype(sigaction)>("sigaction");
|
static auto libc_sigaction = LookupLibcSymbol<decltype(sigaction)>("sigaction");
|
||||||
return libc_sigaction(signum, act, oldact);
|
return libc_sigaction(signum, act, oldact);
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact) {
|
||||||
|
return sigaction(signum, act, oldact);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
} // namespace Common
|
} // namespace Common
|
||||||
|
|
|
@ -13,12 +13,14 @@
|
||||||
#include "core/arm/nce/patcher.h"
|
#include "core/arm/nce/patcher.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/memory.h"
|
#include "core/memory.h"
|
||||||
|
|
||||||
#include "core/hle/kernel/k_process.h"
|
#include "core/hle/kernel/k_process.h"
|
||||||
|
|
||||||
|
#include "dynarmic/common/context.h"
|
||||||
|
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <sys/syscall.h>
|
#include <sys/syscall.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
|
@ -33,95 +35,67 @@ static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0Nat
|
||||||
static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock);
|
static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock);
|
||||||
static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic);
|
static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic);
|
||||||
|
|
||||||
fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
|
|
||||||
_aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
|
|
||||||
while (header->magic != FPSIMD_MAGIC) {
|
|
||||||
header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast<char*>(header) + header->size);
|
|
||||||
}
|
|
||||||
return reinterpret_cast<fpsimd_context*>(header);
|
|
||||||
}
|
|
||||||
|
|
||||||
using namespace Common::Literals;
|
using namespace Common::Literals;
|
||||||
constexpr u32 StackSize = 128_KiB;
|
constexpr u32 StackSize = 128_KiB;
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void* ArmNce::RestoreGuestContext(void* raw_context) {
|
void* ArmNce::RestoreGuestContext(void* raw_context) {
|
||||||
// Retrieve the host context.
|
CTX_DECLARE(raw_context);
|
||||||
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
|
|
||||||
|
|
||||||
// Thread-local parameters will be located in x9.
|
|
||||||
auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(host_ctx.regs[9]);
|
|
||||||
auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
|
|
||||||
|
|
||||||
// Retrieve the host floating point state.
|
|
||||||
auto* fpctx = GetFloatingPointState(host_ctx);
|
|
||||||
|
|
||||||
// Save host callee-saved registers.
|
|
||||||
std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8],
|
|
||||||
sizeof(guest_ctx->host_ctx.host_saved_vregs));
|
|
||||||
std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19],
|
|
||||||
sizeof(guest_ctx->host_ctx.host_saved_regs));
|
|
||||||
|
|
||||||
// Save stack pointer.
|
|
||||||
guest_ctx->host_ctx.host_sp = host_ctx.sp;
|
|
||||||
|
|
||||||
// Restore all guest state except tpidr_el0.
|
// Restore all guest state except tpidr_el0.
|
||||||
host_ctx.sp = guest_ctx->sp;
|
// Thread-local parameters will be located in x9.
|
||||||
host_ctx.pc = guest_ctx->pc;
|
auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(CTX_X(9));
|
||||||
host_ctx.pstate = guest_ctx->pstate;
|
auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
|
||||||
fpctx->fpcr = guest_ctx->fpcr;
|
// Save host callee-saved registers.
|
||||||
fpctx->fpsr = guest_ctx->fpsr;
|
std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &CTX_Q(8),
|
||||||
std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs));
|
sizeof(guest_ctx->host_ctx.host_saved_vregs));
|
||||||
std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs));
|
// Save stack pointer.
|
||||||
|
guest_ctx->host_ctx.host_sp = CTX_SP;
|
||||||
|
CTX_PC = guest_ctx->sp;
|
||||||
|
CTX_SP = guest_ctx->pc;
|
||||||
|
CTX_PSTATE = guest_ctx->pstate;
|
||||||
|
CTX_FPCR = guest_ctx->fpcr;
|
||||||
|
CTX_FPSR = guest_ctx->fpsr;
|
||||||
|
std::memcpy(&CTX_X(0), guest_ctx->cpu_registers.data(), sizeof(guest_ctx->cpu_registers));
|
||||||
|
std::memcpy(&CTX_Q(0), guest_ctx->vector_registers.data(), sizeof(guest_ctx->vector_registers));
|
||||||
// Return the new thread-local storage pointer.
|
// Return the new thread-local storage pointer.
|
||||||
return tpidr;
|
return tpidr;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ArmNce::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
|
void ArmNce::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
|
||||||
// Retrieve the host context.
|
CTX_DECLARE(raw_context);
|
||||||
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
|
|
||||||
|
|
||||||
// Retrieve the host floating point state.
|
|
||||||
auto* fpctx = GetFloatingPointState(host_ctx);
|
|
||||||
|
|
||||||
// Save all guest registers except tpidr_el0.
|
// Save all guest registers except tpidr_el0.
|
||||||
std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs));
|
std::memcpy(guest_ctx->cpu_registers.data(), &CTX_X(0), sizeof(guest_ctx->cpu_registers));
|
||||||
std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs));
|
std::memcpy(guest_ctx->vector_registers.data(), &CTX_Q(0), sizeof(guest_ctx->vector_registers));
|
||||||
guest_ctx->fpsr = fpctx->fpsr;
|
guest_ctx->fpsr = CTX_FPSR;
|
||||||
guest_ctx->fpcr = fpctx->fpcr;
|
guest_ctx->fpcr = CTX_FPCR;
|
||||||
guest_ctx->pstate = static_cast<u32>(host_ctx.pstate);
|
guest_ctx->pc = CTX_PC;
|
||||||
guest_ctx->pc = host_ctx.pc;
|
guest_ctx->sp = CTX_SP;
|
||||||
guest_ctx->sp = host_ctx.sp;
|
guest_ctx->pstate = u32(CTX_PSTATE);
|
||||||
|
|
||||||
// Restore stack pointer.
|
// Restore stack pointer.
|
||||||
host_ctx.sp = guest_ctx->host_ctx.host_sp;
|
CTX_SP = guest_ctx->host_ctx.host_sp;
|
||||||
|
|
||||||
// Restore host callee-saved registers.
|
// Restore host callee-saved registers.
|
||||||
std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(),
|
std::memcpy(&CTX_X(19), guest_ctx->host_ctx.host_saved_regs.data(),
|
||||||
sizeof(guest_ctx->host_ctx.host_saved_regs));
|
sizeof(guest_ctx->host_ctx.host_saved_regs));
|
||||||
std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(),
|
std::memcpy(&CTX_Q(8), guest_ctx->host_ctx.host_saved_vregs.data(),
|
||||||
sizeof(guest_ctx->host_ctx.host_saved_vregs));
|
sizeof(guest_ctx->host_ctx.host_saved_vregs));
|
||||||
|
|
||||||
// Return from the call on exit by setting pc to x30.
|
// Return from the call on exit by setting pc to x30.
|
||||||
host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11];
|
CTX_PC = guest_ctx->host_ctx.host_saved_regs[11];
|
||||||
|
|
||||||
// Clear esr_el1 and return it.
|
// Clear esr_el1 and return it.
|
||||||
host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0);
|
CTX_X(0) = guest_ctx->esr_el1.exchange(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmNce::HandleFailedGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
|
bool ArmNce::HandleFailedGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
|
||||||
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
|
CTX_DECLARE(raw_context);
|
||||||
auto* info = static_cast<siginfo_t*>(raw_info);
|
auto* info = static_cast<siginfo_t*>(raw_info);
|
||||||
|
|
||||||
// We can't handle the access, so determine why we crashed.
|
// We can't handle the access, so determine why we crashed.
|
||||||
const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast<u64>(info->si_addr);
|
auto const is_prefetch_abort = CTX_PC == reinterpret_cast<u64>(info->si_addr);
|
||||||
|
|
||||||
// For data aborts, skip the instruction and return to guest code.
|
// For data aborts, skip the instruction and return to guest code.
|
||||||
// This will allow games to continue in many scenarios where they would otherwise crash.
|
// This will allow games to continue in many scenarios where they would otherwise crash.
|
||||||
if (!is_prefetch_abort) {
|
if (!is_prefetch_abort) {
|
||||||
host_ctx.pc += 4;
|
CTX_PC += 4;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -142,17 +116,13 @@ bool ArmNce::HandleFailedGuestFault(GuestContext* guest_ctx, void* raw_info, voi
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
|
bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
|
||||||
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
|
CTX_DECLARE(raw_context);
|
||||||
auto* fpctx = GetFloatingPointState(host_ctx);
|
|
||||||
auto& memory = guest_ctx->system->ApplicationMemory();
|
auto& memory = guest_ctx->system->ApplicationMemory();
|
||||||
|
|
||||||
// Match and execute an instruction.
|
// Match and execute an instruction.
|
||||||
auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx);
|
if (auto next_pc = MatchAndExecuteOneInstruction(memory, raw_context); next_pc) {
|
||||||
if (next_pc) {
|
CTX_PC = *next_pc;
|
||||||
host_ctx.pc = *next_pc;
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// We couldn't handle the access.
|
// We couldn't handle the access.
|
||||||
return HandleFailedGuestFault(guest_ctx, raw_info, raw_context);
|
return HandleFailedGuestFault(guest_ctx, raw_info, raw_context);
|
||||||
}
|
}
|
||||||
|
@ -275,9 +245,51 @@ ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index)
|
||||||
|
|
||||||
ArmNce::~ArmNce() = default;
|
ArmNce::~ArmNce() = default;
|
||||||
|
|
||||||
|
// Borrowed from libusb
|
||||||
|
static unsigned int posix_gettid(void) {
|
||||||
|
static thread_local unsigned int tl_tid;
|
||||||
|
int tid;
|
||||||
|
if (tl_tid)
|
||||||
|
return tl_tid;
|
||||||
|
#if defined(__ANDROID__)
|
||||||
|
tid = gettid();
|
||||||
|
#elif defined(__APPLE__)
|
||||||
|
#ifdef HAVE_PTHREAD_THREADID_NP
|
||||||
|
uint64_t thread_id;
|
||||||
|
if (pthread_threadid_np(NULL, &thread_id) == 0)
|
||||||
|
tid = (int)thread_id;
|
||||||
|
else
|
||||||
|
tid = -1;
|
||||||
|
#else
|
||||||
|
tid = (int)pthread_mach_thread_np(pthread_self());
|
||||||
|
#endif
|
||||||
|
#elif defined(__HAIKU__)
|
||||||
|
tid = get_pthread_thread_id(pthread_self());
|
||||||
|
#elif defined(__linux__)
|
||||||
|
tid = (int)syscall(SYS_gettid);
|
||||||
|
#elif defined(__NetBSD__)
|
||||||
|
tid = _lwp_self();
|
||||||
|
#elif defined(__OpenBSD__)
|
||||||
|
/* The following only works with OpenBSD > 5.1 as it requires
|
||||||
|
* real thread support. For 5.1 and earlier, -1 is returned. */
|
||||||
|
tid = syscall(SYS_getthrid);
|
||||||
|
#elif defined(__sun__)
|
||||||
|
tid = _lwp_self();
|
||||||
|
#else
|
||||||
|
tid = -1;
|
||||||
|
#endif
|
||||||
|
if (tid == -1) {
|
||||||
|
/* If we don't have a thread ID, at least return a unique
|
||||||
|
* value that can be used to distinguish individual
|
||||||
|
* threads. */
|
||||||
|
tid = (int)(intptr_t)pthread_self();
|
||||||
|
}
|
||||||
|
return tl_tid = (unsigned int)tid;
|
||||||
|
}
|
||||||
|
|
||||||
void ArmNce::Initialize() {
|
void ArmNce::Initialize() {
|
||||||
if (m_thread_id == -1) {
|
if (m_thread_id == -1) {
|
||||||
m_thread_id = gettid();
|
m_thread_id = posix_gettid();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configure signal stack.
|
// Configure signal stack.
|
||||||
|
@ -308,7 +320,7 @@ void ArmNce::Initialize() {
|
||||||
&ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
|
&ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
|
||||||
return_to_run_code_action.sa_mask = signal_mask;
|
return_to_run_code_action.sa_mask = signal_mask;
|
||||||
Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
|
Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
|
||||||
nullptr);
|
nullptr);
|
||||||
|
|
||||||
struct sigaction break_from_run_code_action {};
|
struct sigaction break_from_run_code_action {};
|
||||||
break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
|
break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
|
||||||
|
@ -378,7 +390,11 @@ void ArmNce::SignalInterrupt(Kernel::KThread* thread) {
|
||||||
if (params->is_running) {
|
if (params->is_running) {
|
||||||
// We should signal to the running thread.
|
// We should signal to the running thread.
|
||||||
// The running thread will unlock the thread context.
|
// The running thread will unlock the thread context.
|
||||||
|
#ifdef __linux__
|
||||||
syscall(SYS_tkill, m_thread_id, BreakFromRunCodeSignal);
|
syscall(SYS_tkill, m_thread_id, BreakFromRunCodeSignal);
|
||||||
|
#else
|
||||||
|
pthread_kill(pthread_t(m_thread_id), int(BreakFromRunCodeSignal));
|
||||||
|
#endif
|
||||||
} else {
|
} else {
|
||||||
// If the thread is no longer running, we have nothing to do.
|
// If the thread is no longer running, we have nothing to do.
|
||||||
UnlockThreadParameters(params);
|
UnlockThreadParameters(params);
|
||||||
|
|
|
@ -9,10 +9,15 @@
|
||||||
|
|
||||||
|
|
||||||
/* static HaltReason Core::ArmNce::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
|
/* static HaltReason Core::ArmNce::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.global __ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEy
|
||||||
|
__ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEy:
|
||||||
|
#else
|
||||||
.section .text._ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
|
.section .text._ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
|
||||||
.global _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
|
|
||||||
.type _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
|
.type _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
|
||||||
|
.global _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
|
||||||
_ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
|
_ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
|
||||||
|
#endif
|
||||||
/* Back up host sp to x3. */
|
/* Back up host sp to x3. */
|
||||||
/* Back up host tpidr_el0 to x4. */
|
/* Back up host tpidr_el0 to x4. */
|
||||||
mov x3, sp
|
mov x3, sp
|
||||||
|
@ -50,38 +55,52 @@ _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
|
||||||
|
|
||||||
|
|
||||||
/* static HaltReason Core::ArmNce::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
|
/* static HaltReason Core::ArmNce::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.global __ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv
|
||||||
|
__ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv:
|
||||||
|
#else
|
||||||
.section .text._ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
|
.section .text._ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
|
||||||
.global _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv
|
|
||||||
.type _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
|
.type _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
|
||||||
|
.global _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv
|
||||||
_ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv:
|
_ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv:
|
||||||
|
#endif
|
||||||
/* This jumps to the signal handler, which will restore the entire context. */
|
/* This jumps to the signal handler, which will restore the entire context. */
|
||||||
/* On entry, x0 = thread id, which is already in the right place. */
|
/* On entry, x0 = thread id, which is already in the right place. Even on macOS. */
|
||||||
|
mov x9, x1 /* Move tpidr to x9 so it is not trampled. */
|
||||||
/* Move tpidr to x9 so it is not trampled. */
|
|
||||||
mov x9, x1
|
|
||||||
|
|
||||||
/* Set up arguments. */
|
|
||||||
mov x8, #(__NR_tkill)
|
|
||||||
mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal)
|
mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal)
|
||||||
|
#ifdef __APPLE__
|
||||||
/* Tail call the signal handler. */
|
/* I can never be happy, why no tkill in mach kernel? Ugh ... */
|
||||||
svc #0
|
/* Signature: 328 AUE_PTHREADKILL ALL { int __pthread_kill(int thread_port, int sig); } */
|
||||||
|
mov x16, #(328)
|
||||||
/* Block execution from flowing here. */
|
svc #0x80 /* Tail call the signal handler. */
|
||||||
brk #1000
|
brk #0xF000 /* See: https://discourse.llvm.org/t/stepping-over-a-brk-instruction-on-arm64/69766/7 */
|
||||||
|
#else
|
||||||
|
/* Signature: int tgkill(pid_t tgid, pid_t tid, int sig); */
|
||||||
|
mov x8, #(__NR_tkill)
|
||||||
|
svc #0 /* Tail call the signal handler. */
|
||||||
|
brk #1000 /* Block execution from flowing here. */
|
||||||
|
#endif
|
||||||
|
|
||||||
/* static void Core::ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
|
/* static void Core::ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.global __ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
|
||||||
|
__ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
|
||||||
|
#else
|
||||||
.section .text._ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
|
.section .text._ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
|
||||||
.global _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
|
|
||||||
.type _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
|
.type _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
|
||||||
|
.global _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
|
||||||
_ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
|
_ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
|
||||||
|
#endif
|
||||||
stp x29, x30, [sp, #-0x10]!
|
stp x29, x30, [sp, #-0x10]!
|
||||||
mov x29, sp
|
mov x29, sp
|
||||||
|
|
||||||
/* Call the context restorer with the raw context. */
|
/* Call the context restorer with the raw context. */
|
||||||
mov x0, x2
|
mov x0, x2
|
||||||
|
#ifdef __APPLE__
|
||||||
|
bl __ZN4Core6ArmNce19RestoreGuestContextEPv
|
||||||
|
#else
|
||||||
bl _ZN4Core6ArmNce19RestoreGuestContextEPv
|
bl _ZN4Core6ArmNce19RestoreGuestContextEPv
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Save the old value of tpidr_el0. */
|
/* Save the old value of tpidr_el0. */
|
||||||
mrs x8, tpidr_el0
|
mrs x8, tpidr_el0
|
||||||
|
@ -92,7 +111,11 @@ _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
|
||||||
msr tpidr_el0, x0
|
msr tpidr_el0, x0
|
||||||
|
|
||||||
/* Unlock the context. */
|
/* Unlock the context. */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
bl __ZN4Core6ArmNce22UnlockThreadParametersEPv
|
||||||
|
#else
|
||||||
bl _ZN4Core6ArmNce22UnlockThreadParametersEPv
|
bl _ZN4Core6ArmNce22UnlockThreadParametersEPv
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Returning from here will enter the guest. */
|
/* Returning from here will enter the guest. */
|
||||||
ldp x29, x30, [sp], #0x10
|
ldp x29, x30, [sp], #0x10
|
||||||
|
@ -100,10 +123,15 @@ _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
|
||||||
|
|
||||||
|
|
||||||
/* static void Core::ArmNce::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
|
/* static void Core::ArmNce::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.global __ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_
|
||||||
|
__ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
|
||||||
|
#else
|
||||||
.section .text._ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
|
.section .text._ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
|
||||||
.global _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_
|
|
||||||
.type _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, %function
|
.type _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, %function
|
||||||
|
.global _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_
|
||||||
_ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
|
_ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
|
||||||
|
#endif
|
||||||
/* Check to see if we have the correct TLS magic. */
|
/* Check to see if we have the correct TLS magic. */
|
||||||
mrs x8, tpidr_el0
|
mrs x8, tpidr_el0
|
||||||
ldr w9, [x8, #(TpidrEl0TlsMagic)]
|
ldr w9, [x8, #(TpidrEl0TlsMagic)]
|
||||||
|
@ -121,7 +149,11 @@ _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
|
||||||
|
|
||||||
/* Tail call the restorer. */
|
/* Tail call the restorer. */
|
||||||
mov x1, x2
|
mov x1, x2
|
||||||
|
#ifdef __APPLE__
|
||||||
|
b __ZN4Core6ArmNce16SaveGuestContextEPNS_12GuestContextEPv
|
||||||
|
#else
|
||||||
b _ZN4Core6ArmNce16SaveGuestContextEPNS_12GuestContextEPv
|
b _ZN4Core6ArmNce16SaveGuestContextEPNS_12GuestContextEPv
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Returning from here will enter host code. */
|
/* Returning from here will enter host code. */
|
||||||
|
|
||||||
|
@ -131,10 +163,15 @@ _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
|
||||||
|
|
||||||
|
|
||||||
/* static void Core::ArmNce::GuestAlignmentFaultSignalHandler(int sig, void* info, void* raw_context) */
|
/* static void Core::ArmNce::GuestAlignmentFaultSignalHandler(int sig, void* info, void* raw_context) */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.global __ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_
|
||||||
|
__ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
|
||||||
|
#else
|
||||||
.section .text._ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_, "ax", %progbits
|
.section .text._ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_, "ax", %progbits
|
||||||
.global _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_
|
|
||||||
.type _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_, %function
|
.type _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_, %function
|
||||||
|
.global _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_
|
||||||
_ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
|
_ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
|
||||||
|
#endif
|
||||||
/* Check to see if we have the correct TLS magic. */
|
/* Check to see if we have the correct TLS magic. */
|
||||||
mrs x8, tpidr_el0
|
mrs x8, tpidr_el0
|
||||||
ldr w9, [x8, #(TpidrEl0TlsMagic)]
|
ldr w9, [x8, #(TpidrEl0TlsMagic)]
|
||||||
|
@ -146,7 +183,11 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
|
||||||
|
|
||||||
/* Incorrect TLS magic, so this is a host fault. */
|
/* Incorrect TLS magic, so this is a host fault. */
|
||||||
/* Tail call the handler. */
|
/* Tail call the handler. */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
b __ZN4Core6ArmNce24HandleHostAlignmentFaultEiPvS1_
|
||||||
|
#else
|
||||||
b _ZN4Core6ArmNce24HandleHostAlignmentFaultEiPvS1_
|
b _ZN4Core6ArmNce24HandleHostAlignmentFaultEiPvS1_
|
||||||
|
#endif
|
||||||
|
|
||||||
1:
|
1:
|
||||||
/* Correct TLS magic, so this is a guest fault. */
|
/* Correct TLS magic, so this is a guest fault. */
|
||||||
|
@ -163,7 +204,11 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
|
||||||
msr tpidr_el0, x3
|
msr tpidr_el0, x3
|
||||||
|
|
||||||
/* Call the handler. */
|
/* Call the handler. */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
bl __ZN4Core6ArmNce25HandleGuestAlignmentFaultEPNS_12GuestContextEPvS3_
|
||||||
|
#else
|
||||||
bl _ZN4Core6ArmNce25HandleGuestAlignmentFaultEPNS_12GuestContextEPvS3_
|
bl _ZN4Core6ArmNce25HandleGuestAlignmentFaultEPNS_12GuestContextEPvS3_
|
||||||
|
#endif
|
||||||
|
|
||||||
/* If the handler returned false, we want to preserve the host tpidr_el0. */
|
/* If the handler returned false, we want to preserve the host tpidr_el0. */
|
||||||
cbz x0, 2f
|
cbz x0, 2f
|
||||||
|
@ -177,10 +222,15 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
|
||||||
ret
|
ret
|
||||||
|
|
||||||
/* static void Core::ArmNce::GuestAccessFaultSignalHandler(int sig, void* info, void* raw_context) */
|
/* static void Core::ArmNce::GuestAccessFaultSignalHandler(int sig, void* info, void* raw_context) */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.global __ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_
|
||||||
|
__ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
|
||||||
|
#else
|
||||||
.section .text._ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_, "ax", %progbits
|
.section .text._ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_, "ax", %progbits
|
||||||
.global _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_
|
|
||||||
.type _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_, %function
|
.type _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_, %function
|
||||||
|
.global _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_
|
||||||
_ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
|
_ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
|
||||||
|
#endif
|
||||||
/* Check to see if we have the correct TLS magic. */
|
/* Check to see if we have the correct TLS magic. */
|
||||||
mrs x8, tpidr_el0
|
mrs x8, tpidr_el0
|
||||||
ldr w9, [x8, #(TpidrEl0TlsMagic)]
|
ldr w9, [x8, #(TpidrEl0TlsMagic)]
|
||||||
|
@ -192,7 +242,11 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
|
||||||
|
|
||||||
/* Incorrect TLS magic, so this is a host fault. */
|
/* Incorrect TLS magic, so this is a host fault. */
|
||||||
/* Tail call the handler. */
|
/* Tail call the handler. */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
b __ZN4Core6ArmNce21HandleHostAccessFaultEiPvS1_
|
||||||
|
#else
|
||||||
b _ZN4Core6ArmNce21HandleHostAccessFaultEiPvS1_
|
b _ZN4Core6ArmNce21HandleHostAccessFaultEiPvS1_
|
||||||
|
#endif
|
||||||
|
|
||||||
1:
|
1:
|
||||||
/* Correct TLS magic, so this is a guest fault. */
|
/* Correct TLS magic, so this is a guest fault. */
|
||||||
|
@ -209,7 +263,11 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
|
||||||
msr tpidr_el0, x3
|
msr tpidr_el0, x3
|
||||||
|
|
||||||
/* Call the handler. */
|
/* Call the handler. */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
bl __ZN4Core6ArmNce22HandleGuestAccessFaultEPNS_12GuestContextEPvS3_
|
||||||
|
#else
|
||||||
bl _ZN4Core6ArmNce22HandleGuestAccessFaultEPNS_12GuestContextEPvS3_
|
bl _ZN4Core6ArmNce22HandleGuestAccessFaultEPNS_12GuestContextEPvS3_
|
||||||
|
#endif
|
||||||
|
|
||||||
/* If the handler returned false, we want to preserve the host tpidr_el0. */
|
/* If the handler returned false, we want to preserve the host tpidr_el0. */
|
||||||
cbz x0, 2f
|
cbz x0, 2f
|
||||||
|
@ -224,10 +282,15 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
|
||||||
|
|
||||||
|
|
||||||
/* static void Core::ArmNce::LockThreadParameters(void* tpidr) */
|
/* static void Core::ArmNce::LockThreadParameters(void* tpidr) */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.global __ZN4Core6ArmNce20LockThreadParametersEPv
|
||||||
|
__ZN4Core6ArmNce20LockThreadParametersEPv:
|
||||||
|
#else
|
||||||
.section .text._ZN4Core6ArmNce20LockThreadParametersEPv, "ax", %progbits
|
.section .text._ZN4Core6ArmNce20LockThreadParametersEPv, "ax", %progbits
|
||||||
.global _ZN4Core6ArmNce20LockThreadParametersEPv
|
|
||||||
.type _ZN4Core6ArmNce20LockThreadParametersEPv, %function
|
.type _ZN4Core6ArmNce20LockThreadParametersEPv, %function
|
||||||
|
.global _ZN4Core6ArmNce20LockThreadParametersEPv
|
||||||
_ZN4Core6ArmNce20LockThreadParametersEPv:
|
_ZN4Core6ArmNce20LockThreadParametersEPv:
|
||||||
|
#endif
|
||||||
/* Offset to lock member. */
|
/* Offset to lock member. */
|
||||||
add x0, x0, #(TpidrEl0Lock)
|
add x0, x0, #(TpidrEl0Lock)
|
||||||
|
|
||||||
|
@ -252,10 +315,15 @@ _ZN4Core6ArmNce20LockThreadParametersEPv:
|
||||||
|
|
||||||
|
|
||||||
/* static void Core::ArmNce::UnlockThreadParameters(void* tpidr) */
|
/* static void Core::ArmNce::UnlockThreadParameters(void* tpidr) */
|
||||||
|
#ifdef __APPLE__
|
||||||
|
.global __ZN4Core6ArmNce22UnlockThreadParametersEPv
|
||||||
|
__ZN4Core6ArmNce22UnlockThreadParametersEPv:
|
||||||
|
#else
|
||||||
.section .text._ZN4Core6ArmNce22UnlockThreadParametersEPv, "ax", %progbits
|
.section .text._ZN4Core6ArmNce22UnlockThreadParametersEPv, "ax", %progbits
|
||||||
.global _ZN4Core6ArmNce22UnlockThreadParametersEPv
|
|
||||||
.type _ZN4Core6ArmNce22UnlockThreadParametersEPv, %function
|
.type _ZN4Core6ArmNce22UnlockThreadParametersEPv, %function
|
||||||
|
.global _ZN4Core6ArmNce22UnlockThreadParametersEPv
|
||||||
_ZN4Core6ArmNce22UnlockThreadParametersEPv:
|
_ZN4Core6ArmNce22UnlockThreadParametersEPv:
|
||||||
|
#endif
|
||||||
/* Offset to lock member. */
|
/* Offset to lock member. */
|
||||||
add x0, x0, #(TpidrEl0Lock)
|
add x0, x0, #(TpidrEl0Lock)
|
||||||
|
|
||||||
|
|
|
@ -5,22 +5,24 @@
|
||||||
|
|
||||||
#define __ASSEMBLY__
|
#define __ASSEMBLY__
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
/* https://cpip.readthedocs.io/en/stable/_static/dictobject.c/signal.h_bbe000f9714f274340a28e000a369354.html */
|
||||||
|
#define ReturnToRunCodeByExceptionLevelChangeSignal 31
|
||||||
|
#define BreakFromRunCodeSignal 16
|
||||||
|
#define GuestAccessFaultSignal 11
|
||||||
|
#define GuestAlignmentFaultSignal 10
|
||||||
|
#else
|
||||||
#include <asm-generic/signal.h>
|
#include <asm-generic/signal.h>
|
||||||
#include <asm-generic/unistd.h>
|
#include <asm-generic/unistd.h>
|
||||||
|
|
||||||
#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2
|
#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2
|
||||||
#define BreakFromRunCodeSignal SIGURG
|
#define BreakFromRunCodeSignal SIGURG
|
||||||
#define GuestAccessFaultSignal SIGSEGV
|
#define GuestAccessFaultSignal SIGSEGV
|
||||||
#define GuestAlignmentFaultSignal SIGBUS
|
#define GuestAlignmentFaultSignal SIGBUS
|
||||||
|
#endif
|
||||||
|
|
||||||
#define GuestContextSp 0xF8
|
#define GuestContextSp 0xF8
|
||||||
#define GuestContextHostContext 0x320
|
#define GuestContextHostContext 0x320
|
||||||
|
|
||||||
#define HostContextSpTpidrEl0 0xE0
|
|
||||||
#define HostContextTpidrEl0 0xE8
|
|
||||||
#define HostContextRegs 0x0
|
|
||||||
#define HostContextVregs 0x60
|
|
||||||
|
|
||||||
#define TpidrEl0NativeContext 0x10
|
#define TpidrEl0NativeContext 0x10
|
||||||
#define TpidrEl0Lock 0x18
|
#define TpidrEl0Lock 0x18
|
||||||
#define TpidrEl0TlsMagic 0x20
|
#define TpidrEl0TlsMagic 0x20
|
||||||
|
@ -28,3 +30,8 @@
|
||||||
|
|
||||||
#define SpinLockLocked 0
|
#define SpinLockLocked 0
|
||||||
#define SpinLockUnlocked 1
|
#define SpinLockUnlocked 1
|
||||||
|
|
||||||
|
#define HostContextSpTpidrEl0 0xE0
|
||||||
|
#define HostContextTpidrEl0 0xE8
|
||||||
|
#define HostContextRegs 0x0
|
||||||
|
#define HostContextVregs 0x60
|
||||||
|
|
|
@ -2,8 +2,9 @@
|
||||||
// SPDX-FileCopyrightText: Copyright 2023 merryhime <https://mary.rs>
|
// SPDX-FileCopyrightText: Copyright 2023 merryhime <https://mary.rs>
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
#include "common/bit_cast.h"
|
|
||||||
#include "core/arm/nce/interpreter_visitor.h"
|
#include "core/arm/nce/interpreter_visitor.h"
|
||||||
|
#include "core/memory.h"
|
||||||
|
#include "dynarmic/common/context.h"
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
|
|
||||||
|
@ -790,23 +791,20 @@ bool InterpreterVisitor::LDR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3
|
||||||
return this->SIMDOffset(scale, shift, opc_0, Rm, option, Rn, Vt);
|
return this->SIMDOffset(scale, shift, opc_0, Rm, option, Rn, Vt);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context,
|
std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, void* raw_context) {
|
||||||
fpsimd_context* fpsimd_context) {
|
CTX_DECLARE(raw_context);
|
||||||
std::span<u64, 31> regs(reinterpret_cast<u64*>(context->regs), 31);
|
std::span<u64, 31> regs(reinterpret_cast<u64*>(&CTX_X(0)), 31);
|
||||||
std::span<u128, 32> vregs(reinterpret_cast<u128*>(fpsimd_context->vregs), 32);
|
std::span<u128, 32> vregs(reinterpret_cast<u128*>(&CTX_Q(0)), 32);
|
||||||
u64& sp = *reinterpret_cast<u64*>(&context->sp);
|
u64& sp = *reinterpret_cast<u64*>(&CTX_SP);
|
||||||
const u64& pc = *reinterpret_cast<u64*>(&context->pc);
|
const u64& pc = *reinterpret_cast<u64*>(&CTX_PC);
|
||||||
|
|
||||||
InterpreterVisitor visitor(memory, regs, vregs, sp, pc);
|
InterpreterVisitor visitor(memory, regs, vregs, sp, pc);
|
||||||
u32 instruction = memory.Read32(pc);
|
u32 instruction = memory.Read32(pc);
|
||||||
bool was_executed = false;
|
bool was_executed = false;
|
||||||
|
|
||||||
if (auto decoder = Dynarmic::A64::Decode<VisitorBase>(instruction)) {
|
if (auto decoder = Dynarmic::A64::Decode<VisitorBase>(instruction)) {
|
||||||
was_executed = decoder->get().call(visitor, instruction);
|
was_executed = decoder->get().call(visitor, instruction);
|
||||||
} else {
|
} else {
|
||||||
LOG_ERROR(Core_ARM, "Unallocated encoding: {:#x}", instruction);
|
LOG_ERROR(Core_ARM, "Unallocated encoding: {:#x}", instruction);
|
||||||
}
|
}
|
||||||
|
|
||||||
return was_executed ? std::optional<u64>(pc + 4) : std::nullopt;
|
return was_executed ? std::optional<u64>(pc + 4) : std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
|
#include <span>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <span>
|
#include <span>
|
||||||
|
|
||||||
|
@ -105,7 +106,6 @@ private:
|
||||||
const u64& m_pc;
|
const u64& m_pc;
|
||||||
};
|
};
|
||||||
|
|
||||||
std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context,
|
std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, void* raw_context);
|
||||||
fpsimd_context* fpsimd_context);
|
|
||||||
|
|
||||||
} // namespace Core
|
} // namespace Core
|
||||||
|
|
|
@ -4,15 +4,14 @@
|
||||||
#include "common/arm64/native_clock.h"
|
#include "common/arm64/native_clock.h"
|
||||||
#include "common/bit_cast.h"
|
#include "common/bit_cast.h"
|
||||||
#include "common/literals.h"
|
#include "common/literals.h"
|
||||||
#include "core/arm/nce/arm_nce.h"
|
|
||||||
#include "core/arm/nce/guest_context.h"
|
#include "core/arm/nce/guest_context.h"
|
||||||
#include "core/arm/nce/instructions.h"
|
#include "core/arm/nce/instructions.h"
|
||||||
#include "core/arm/nce/patcher.h"
|
#include "core/arm/nce/patcher.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/core_timing.h"
|
#include "core/core_timing.h"
|
||||||
#include "core/hle/kernel/svc.h"
|
#include "core/hle/kernel/svc.h"
|
||||||
#include "core/memory.h"
|
|
||||||
#include "core/hle/kernel/k_thread.h"
|
#include "core/hle/kernel/k_thread.h"
|
||||||
|
#include "core/memory.h"
|
||||||
|
|
||||||
namespace Core::NCE {
|
namespace Core::NCE {
|
||||||
|
|
||||||
|
|
|
@ -6,6 +6,8 @@
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "dynarmic/backend/exception_handler.h"
|
||||||
|
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
@ -118,8 +120,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
||||||
CTX_DECLARE(raw_context);
|
CTX_DECLARE(raw_context);
|
||||||
#if defined(ARCHITECTURE_x86_64)
|
#if defined(ARCHITECTURE_x86_64)
|
||||||
{
|
{
|
||||||
std::shared_lock guard(sig_handler->code_block_infos_mutex);
|
std::shared_lock<std::shared_mutex> guard(sig_handler->code_block_infos_mutex);
|
||||||
if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) {
|
const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP);
|
||||||
|
if (iter != sig_handler->code_block_infos.end()) {
|
||||||
FakeCall fc = iter->second.cb(CTX_RIP);
|
FakeCall fc = iter->second.cb(CTX_RIP);
|
||||||
CTX_RSP -= sizeof(u64);
|
CTX_RSP -= sizeof(u64);
|
||||||
*mcl::bit_cast<u64*>(CTX_RSP) = fc.ret_rip;
|
*mcl::bit_cast<u64*>(CTX_RSP) = fc.ret_rip;
|
||||||
|
@ -130,8 +133,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
|
||||||
fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP);
|
fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP);
|
||||||
#elif defined(ARCHITECTURE_arm64)
|
#elif defined(ARCHITECTURE_arm64)
|
||||||
{
|
{
|
||||||
std::shared_lock guard(sig_handler->code_block_infos_mutex);
|
std::shared_lock<std::shared_mutex> guard(sig_handler->code_block_infos_mutex);
|
||||||
if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) {
|
const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC);
|
||||||
|
if (iter != sig_handler->code_block_infos.end()) {
|
||||||
FakeCall fc = iter->second.cb(CTX_PC);
|
FakeCall fc = iter->second.cb(CTX_PC);
|
||||||
CTX_PC = fc.call_pc;
|
CTX_PC = fc.call_pc;
|
||||||
return;
|
return;
|
||||||
|
@ -187,11 +191,11 @@ private:
|
||||||
ExceptionHandler::ExceptionHandler() = default;
|
ExceptionHandler::ExceptionHandler() = default;
|
||||||
ExceptionHandler::~ExceptionHandler() = default;
|
ExceptionHandler::~ExceptionHandler() = default;
|
||||||
|
|
||||||
#if defined(MCL_ARCHITECTURE_X86_64)
|
#if defined(ARCHITECTURE_x86_64)
|
||||||
void ExceptionHandler::Register(X64::BlockOfCode& code) {
|
void ExceptionHandler::Register(X64::BlockOfCode& code) {
|
||||||
impl = std::make_unique<Impl>(mcl::bit_cast<u64>(code.getCode()), code.GetTotalCodeSize());
|
impl = std::make_unique<Impl>(mcl::bit_cast<u64>(code.getCode()), code.GetTotalCodeSize());
|
||||||
}
|
}
|
||||||
#elif defined(MCL_ARCHITECTURE_ARM64)
|
#elif defined(ARCHITECTURE_arm64)
|
||||||
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
|
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
|
||||||
impl = std::make_unique<Impl>(mcl::bit_cast<u64>(mem.ptr()), size);
|
impl = std::make_unique<Impl>(mcl::bit_cast<u64>(mem.ptr()), size);
|
||||||
}
|
}
|
||||||
|
|
|
@ -104,6 +104,7 @@
|
||||||
# error "unimplemented"
|
# error "unimplemented"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// TODO: FreeBSD/OpenBSD
|
||||||
#ifdef ARCHITECTURE_arm64
|
#ifdef ARCHITECTURE_arm64
|
||||||
#ifdef __APPLE__
|
#ifdef __APPLE__
|
||||||
inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) {
|
inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) {
|
||||||
|
|
|
@ -49,6 +49,6 @@ if (UNIX AND NOT APPLE)
|
||||||
if (TARGET Qt6::GuiPrivate)
|
if (TARGET Qt6::GuiPrivate)
|
||||||
target_link_libraries(qt_common PRIVATE Qt6::GuiPrivate)
|
target_link_libraries(qt_common PRIVATE Qt6::GuiPrivate)
|
||||||
else()
|
else()
|
||||||
target_include_directories(qt_common PRIVATE ${Qt6Gui_PRIVATE_INCLUDE_DIRS})
|
target_include_directories(yuzu PRIVATE ${Qt6Gui_PRIVATE_INCLUDE_DIRS})
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
|
@ -386,9 +386,11 @@ void BufferCache<P>::BindHostComputeBuffers() {
|
||||||
template <class P>
|
template <class P>
|
||||||
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
|
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
|
||||||
const UniformBufferSizes* sizes) {
|
const UniformBufferSizes* sizes) {
|
||||||
if (channel_state->enabled_uniform_buffer_masks != mask) {
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||||
channel_state->fast_bound_uniform_buffers.fill(0);
|
if (channel_state->enabled_uniform_buffer_masks != mask) {
|
||||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
if constexpr (IS_OPENGL) {
|
||||||
|
channel_state->fast_bound_uniform_buffers.fill(0);
|
||||||
|
}
|
||||||
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
||||||
channel_state->uniform_buffer_binding_sizes.fill({});
|
channel_state->uniform_buffer_binding_sizes.fill({});
|
||||||
}
|
}
|
||||||
|
@ -804,7 +806,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||||
if (should_fast_bind) {
|
if (should_fast_bind) {
|
||||||
// We only have to bind when the currently bound buffer is not the fast version
|
// We only have to bind when the currently bound buffer is not the fast version
|
||||||
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
|
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||||
runtime.BindFastUniformBuffer(stage, binding_index, size);
|
runtime.BindFastUniformBuffer(stage, binding_index, size);
|
||||||
}
|
}
|
||||||
|
@ -813,8 +815,10 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
|
if constexpr (IS_OPENGL) {
|
||||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||||
|
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||||
|
}
|
||||||
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
|
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
|
||||||
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
|
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
|
||||||
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
|
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
|
||||||
|
@ -835,6 +839,9 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
}
|
}
|
||||||
const u32 offset = buffer.Offset(device_addr);
|
const u32 offset = buffer.Offset(device_addr);
|
||||||
if constexpr (IS_OPENGL) {
|
if constexpr (IS_OPENGL) {
|
||||||
|
// Fast buffer will be unbound
|
||||||
|
channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
|
||||||
|
|
||||||
// Mark the index as dirty if offset doesn't match
|
// Mark the index as dirty if offset doesn't match
|
||||||
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
||||||
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
||||||
|
@ -848,7 +855,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
} else {
|
} else {
|
||||||
runtime.BindUniformBuffer(buffer, offset, size);
|
runtime.BindUniformBuffer(buffer, offset, size);
|
||||||
}
|
}
|
||||||
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1783,7 +1789,12 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
|
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
|
||||||
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0;
|
if constexpr (IS_OPENGL) {
|
||||||
|
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
|
||||||
|
} else {
|
||||||
|
// Only OpenGL has fast uniform buffers
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -54,8 +54,6 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16;
|
||||||
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
|
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
|
||||||
constexpr u32 NUM_STAGES = 5;
|
constexpr u32 NUM_STAGES = 5;
|
||||||
|
|
||||||
static_assert(NUM_GRAPHICS_UNIFORM_BUFFERS <= 32, "fast bitmask must fit u32");
|
|
||||||
|
|
||||||
using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
|
using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
|
||||||
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
|
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
|
||||||
|
|
||||||
|
@ -139,8 +137,8 @@ public:
|
||||||
u32 written_compute_texture_buffers = 0;
|
u32 written_compute_texture_buffers = 0;
|
||||||
u32 image_compute_texture_buffers = 0;
|
u32 image_compute_texture_buffers = 0;
|
||||||
|
|
||||||
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_hits{};
|
std::array<u32, 16> uniform_cache_hits{};
|
||||||
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_shots{};
|
std::array<u32, 16> uniform_cache_shots{};
|
||||||
|
|
||||||
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
||||||
|
|
||||||
|
|
|
@ -368,8 +368,8 @@ if (APPLE)
|
||||||
|
|
||||||
if (YUZU_USE_BUNDLED_MOLTENVK)
|
if (YUZU_USE_BUNDLED_MOLTENVK)
|
||||||
set(MOLTENVK_PLATFORM "macOS")
|
set(MOLTENVK_PLATFORM "macOS")
|
||||||
set(MOLTENVK_VERSION "v1.3.0")
|
set(MOLTENVK_VERSION "v1.4.0")
|
||||||
download_moltenvk(${MOLTENVK_PLATFORM} ${MOLTENVK_VERSION})
|
download_moltenvk_external(${MOLTENVK_PLATFORM} ${MOLTENVK_VERSION})
|
||||||
endif()
|
endif()
|
||||||
find_library(MOLTENVK_LIBRARY MoltenVK REQUIRED)
|
find_library(MOLTENVK_LIBRARY MoltenVK REQUIRED)
|
||||||
message(STATUS "Using MoltenVK at ${MOLTENVK_LIBRARY}.")
|
message(STATUS "Using MoltenVK at ${MOLTENVK_LIBRARY}.")
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue