Compare commits

..

25 commits

Author SHA1 Message Date
65309cad44
cleanup cmake
Some checks failed
eden-license / license-header (pull_request) Failing after 23s
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-10-11 00:08:13 +00:00
98885623cb
fix comp
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-10-11 00:07:43 +00:00
e7be3e1770
fix cpm-fetch
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-10-11 00:07:43 +00:00
e6f6c1d325
fix android, macos, linux
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-10-11 00:07:37 +00:00
18d3b95ce8
16k page size for apple
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:07:37 +00:00
d188725514
better virtual base lookup algo for apple
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:07:37 +00:00
1bf7373c79
fix apple clang
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:07:37 +00:00
ab4d7ed53b
fix cmake defaults/include stuff for Sequoia
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-10-11 00:07:19 +00:00
69887c91bb
[nce] signal hanlder fixes for sigaction
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:05:17 +00:00
256cdca636
[docs, nce] use macos handler, cross arm inst
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:05:17 +00:00
da7dc2885a
[nce] fix tls using c23 kw
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:05:17 +00:00
53c705c09e
[nce] fix yoruself amd64
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:05:17 +00:00
f19a66d2d3
[nce] fix macos
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:05:17 +00:00
bdf1d80544
[nce] fix linux build
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:05:17 +00:00
4622d869e3
[nce] fix thread kill
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:05:17 +00:00
5cd245d445
[cmake] enable nce on macos
Signed-off-by: crueter <crueter@eden-emu.dev>
2025-10-11 00:05:17 +00:00
29582620a7
[nce] fix extra mangled symbols i forgot
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:53 +00:00
b403aedab9
[nce] add extra underscore, bother with preventing mangling later
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:53 +00:00
f43bc8e15c
[nce] more annoying syscalls and stuff
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:53 +00:00
25e8088e1f
[nce] fix apple gettid and tkill
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:52 +00:00
cb7a67f6d5
[nce] add syscall number for nearest thing to a tkill
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:52 +00:00
6eb2de327b
[nce] more arm macos fixes
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:52 +00:00
4b1b30712c
[nce] more apple fixes
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:52 +00:00
4f5ab82eca
[nce] common ctx
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:52 +00:00
782f9adc25
[nce, dynarmic] macOS port
Signed-off-by: lizzie <lizzie@eden-emu.dev>
2025-10-11 00:04:52 +00:00
15 changed files with 302 additions and 146 deletions

View file

@ -168,6 +168,8 @@ set(YUZU_QT_MIRROR "" CACHE STRING "What mirror to use for downloading the bundl
option(ENABLE_CUBEB "Enables the cubeb audio backend" ON)
# See https://github.com/llvm/llvm-project/issues/123946
# OpenBSD va_list doesn't play nice with precompiled headers
set(EXT_DEFAULT OFF)
if (MSVC OR ANDROID)
set(EXT_DEFAULT ON)
@ -338,7 +340,7 @@ if (YUZU_LEGACY)
add_compile_definitions(YUZU_LEGACY)
endif()
if (ARCHITECTURE_arm64 AND (ANDROID OR PLATFORM_LINUX))
if (ARCHITECTURE_arm64 AND (ANDROID OR APPLE OR PLATFORM_LINUX))
set(HAS_NCE 1)
add_compile_definitions(HAS_NCE=1)
endif()
@ -483,7 +485,6 @@ if (YUZU_USE_CPM)
# Opus
AddJsonPackage(opus)
if (Opus_ADDED)
if (MSVC AND CXX_CLANG)
target_compile_options(opus PRIVATE

View file

@ -1,6 +1,8 @@
# SPDX-FileCopyrightText: Copyright 2025 crueter
# SPDX-License-Identifier: GPL-3.0-or-later
cmake_minimum_required(VERSION 3.22)
if (MSVC OR ANDROID)
set(BUNDLED_DEFAULT ON)
else()
@ -13,7 +15,6 @@ option(CPMUTIL_FORCE_BUNDLED
option(CPMUTIL_FORCE_SYSTEM
"Force system packages for all CPM dependencies (NOT RECOMMENDED)" OFF)
cmake_minimum_required(VERSION 3.22)
include(CPM)
# cpmfile parsing

View file

@ -0,0 +1,8 @@
# Cross compile ARM64
A painless guide for cross compilation (or to test NCE) from a x86_64 system without polluting your main.
- Install QEMU: `sudo pkg install qemu`
- Download Debian 13: `wget https://cdimage.debian.org/debian-cd/current/arm64/iso-cd/debian-13.0.0-arm64-netinst.iso`
- Create a system disk: `qemu-img create -f qcow2 debian-13-arm64-ci.qcow2 30G`
- Run the VM: `qemu-system-aarch64 -M virt -m 2G -cpu max -bios /usr/local/share/qemu/edk2-aarch64-code.fd -drive if=none,file=debian-13.0.0-arm64-netinst.iso,format=raw,id=cdrom -device scsi-cd,drive=cdrom -drive if=none,file=debian-13-arm64-ci.qcow2,id=hd0,format=qcow2 -device virtio-blk-device,drive=hd0 -device virtio-gpu-pci -device usb-ehci -device usb-kbd -device intel-hda -device hda-output -nic user,model=virtio-net-pci`

View file

@ -276,4 +276,13 @@ if(YUZU_USE_PRECOMPILED_HEADERS)
target_precompile_headers(common PRIVATE precompiled_headers.h)
endif()
# IOPS (needed for power state) requires linking to IOKit
if (APPLE)
find_library(IOKIT_LIBRARY IOKit)
if(NOT IOKIT_LIBRARY)
message(FATAL_ERROR "IOKit not found, did you install XCode tools?")
endif()
target_link_libraries(common PRIVATE ${IOKIT_LIBRARY})
endif()
create_target_directory_groups(common)

View file

@ -30,6 +30,7 @@
#include <sys/random.h>
#include <mach/vm_map.h>
#include <mach/mach.h>
#include <map>
#endif
// FreeBSD
@ -399,18 +400,65 @@ private:
#ifdef ARCHITECTURE_arm64
static void* ChooseVirtualBase(size_t virtual_size) {
constexpr uintptr_t Map39BitSize = (1ULL << 39);
constexpr uintptr_t Map36BitSize = (1ULL << 36);
// This is not a cryptographic application, we just want something random.
std::mt19937_64 rng;
#ifdef __APPLE__
// TODO: Fatal flaw, regions may change if map inserts elements, very rare, but MAY HAPPEN!
std::map<u64, u64> aspace_list;
kern_return_t krc = KERN_SUCCESS;
vm_address_t address = 0;
vm_size_t r_size = 0;
uint32_t depth = 1;
do {
struct vm_region_submap_info_64 info;
mach_msg_type_number_t count = VM_REGION_SUBMAP_INFO_COUNT_64;
krc = vm_region_recurse_64(mach_task_self(), &address, &r_size, &depth, (vm_region_info_64_t)&info, &count);
if (krc == KERN_INVALID_ADDRESS)
break;
if (info.is_submap){
depth++;
} else {
aspace_list.insert({ u64(address), u64(r_size) });
address += r_size;
}
} while(1);
for (auto it = aspace_list.begin(); it != aspace_list.end(); it++) {
auto const next = std::next(it);
// properties of hole
auto const addr = it->first + it->second;
auto const size = (next != aspace_list.end()) ? next->first - addr : std::numeric_limits<u64>::max() - addr;
ASSERT(next == aspace_list.end() || it->first < next->first);
if (size > virtual_size && uintptr_t(addr + size) >= Map36BitSize) {
// valid address for NCE
if (uintptr_t(addr) >= Map36BitSize) { //common case: hole after 39 bit
if (size >= virtual_size) {
void* p = mmap(reinterpret_cast<void*>(addr), virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (p == MAP_FAILED)
continue;
return p;
}
// skip
} else { //edge case: hole before 39 bit
auto const rem_size = size - (Map36BitSize - uintptr_t(addr));
if (rem_size >= virtual_size) {
void* p = mmap(reinterpret_cast<void*>(Map36BitSize), virtual_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (p == MAP_FAILED)
continue;
return p;
}
// skip
}
}
}
UNREACHABLE();
#else
constexpr uintptr_t Map39BitSize = (1ULL << 39);
// We want to ensure we are allocating at an address aligned to the L2 block size.
// For Qualcomm devices, we must also allocate memory above 36 bits.
const size_t lower = Map36BitSize / HugePageSize;
const size_t upper = (Map39BitSize - virtual_size) / HugePageSize;
const size_t range = upper - lower;
// This is not a cryptographic application, we just want something random.
std::mt19937_64 rng;
// Try up to 64 times to allocate memory at random addresses in the range.
for (int i = 0; i < 64; i++) {
// Calculate a possible location.
@ -434,6 +482,7 @@ static void* ChooseVirtualBase(size_t virtual_size) {
}
return MAP_FAILED;
#endif
}
#else
@ -500,9 +549,10 @@ class HostMemory::Impl {
public:
explicit Impl(size_t backing_size_, size_t virtual_size_)
: backing_size{backing_size_}, virtual_size{virtual_size_} {
long page_size = sysconf(_SC_PAGESIZE);
ASSERT_MSG(page_size == 0x1000, "page size {:#x} is incompatible with 4K paging",
page_size);
// TODO: Solve all 4k paging issues
//long page_size = sysconf(_SC_PAGESIZE);
//ASSERT_MSG(page_size == 0x1000, "page size {:#x} is incompatible with 4K paging",
// page_size);
// Backing memory initialization
#if defined(__sun__) || defined(__HAIKU__) || defined(__NetBSD__) || defined(__DragonFly__)
fd = shm_open_anon(O_RDWR | O_CREAT | O_EXCL | O_NOFOLLOW, 0600);

View file

@ -10,27 +10,16 @@
namespace Common {
#ifdef __ANDROID__
template <typename T>
T* LookupLibcSymbol(const char* name) {
#if defined(__BIONIC__)
Common::DynamicLibrary provider("libc.so");
if (!provider.IsOpen()) {
UNREACHABLE_MSG("Failed to open libc!");
}
#else
// For other operating environments, we assume the symbol is not overridden.
const char* base = nullptr;
Common::DynamicLibrary provider(base);
#endif
ASSERT_MSG(provider.IsOpen(), "Failed to open libc!");
void* sym = provider.GetSymbolAddress(name);
if (sym == nullptr) {
sym = dlsym(RTLD_DEFAULT, name);
}
if (sym == nullptr) {
UNREACHABLE_MSG("Unable to find symbol {}!", name);
}
ASSERT_MSG(sym != nullptr, "Unable to find symbol {}!", name);
return reinterpret_cast<T*>(sym);
}
@ -38,5 +27,10 @@ int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact)
static auto libc_sigaction = LookupLibcSymbol<decltype(sigaction)>("sigaction");
return libc_sigaction(signum, act, oldact);
}
#else
int SigAction(int signum, const struct sigaction* act, struct sigaction* oldact) {
return sigaction(signum, act, oldact);
}
#endif
} // namespace Common

View file

@ -13,12 +13,14 @@
#include "core/arm/nce/patcher.h"
#include "core/core.h"
#include "core/memory.h"
#include "core/hle/kernel/k_process.h"
#include "dynarmic/common/context.h"
#include <signal.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <pthread.h>
namespace Core {
@ -33,95 +35,67 @@ static_assert(offsetof(NativeExecutionParameters, native_context) == TpidrEl0Nat
static_assert(offsetof(NativeExecutionParameters, lock) == TpidrEl0Lock);
static_assert(offsetof(NativeExecutionParameters, magic) == TpidrEl0TlsMagic);
fpsimd_context* GetFloatingPointState(mcontext_t& host_ctx) {
_aarch64_ctx* header = reinterpret_cast<_aarch64_ctx*>(&host_ctx.__reserved);
while (header->magic != FPSIMD_MAGIC) {
header = reinterpret_cast<_aarch64_ctx*>(reinterpret_cast<char*>(header) + header->size);
}
return reinterpret_cast<fpsimd_context*>(header);
}
using namespace Common::Literals;
constexpr u32 StackSize = 128_KiB;
} // namespace
void* ArmNce::RestoreGuestContext(void* raw_context) {
// Retrieve the host context.
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
// Thread-local parameters will be located in x9.
auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(host_ctx.regs[9]);
auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
// Retrieve the host floating point state.
auto* fpctx = GetFloatingPointState(host_ctx);
// Save host callee-saved registers.
std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &fpctx->vregs[8],
sizeof(guest_ctx->host_ctx.host_saved_vregs));
std::memcpy(guest_ctx->host_ctx.host_saved_regs.data(), &host_ctx.regs[19],
sizeof(guest_ctx->host_ctx.host_saved_regs));
// Save stack pointer.
guest_ctx->host_ctx.host_sp = host_ctx.sp;
CTX_DECLARE(raw_context);
// Restore all guest state except tpidr_el0.
host_ctx.sp = guest_ctx->sp;
host_ctx.pc = guest_ctx->pc;
host_ctx.pstate = guest_ctx->pstate;
fpctx->fpcr = guest_ctx->fpcr;
fpctx->fpsr = guest_ctx->fpsr;
std::memcpy(host_ctx.regs, guest_ctx->cpu_registers.data(), sizeof(host_ctx.regs));
std::memcpy(fpctx->vregs, guest_ctx->vector_registers.data(), sizeof(fpctx->vregs));
// Thread-local parameters will be located in x9.
auto* tpidr = reinterpret_cast<NativeExecutionParameters*>(CTX_X(9));
auto* guest_ctx = static_cast<GuestContext*>(tpidr->native_context);
// Save host callee-saved registers.
std::memcpy(guest_ctx->host_ctx.host_saved_vregs.data(), &CTX_Q(8),
sizeof(guest_ctx->host_ctx.host_saved_vregs));
// Save stack pointer.
guest_ctx->host_ctx.host_sp = CTX_SP;
CTX_PC = guest_ctx->sp;
CTX_SP = guest_ctx->pc;
CTX_PSTATE = guest_ctx->pstate;
CTX_FPCR = guest_ctx->fpcr;
CTX_FPSR = guest_ctx->fpsr;
std::memcpy(&CTX_X(0), guest_ctx->cpu_registers.data(), sizeof(guest_ctx->cpu_registers));
std::memcpy(&CTX_Q(0), guest_ctx->vector_registers.data(), sizeof(guest_ctx->vector_registers));
// Return the new thread-local storage pointer.
return tpidr;
}
void ArmNce::SaveGuestContext(GuestContext* guest_ctx, void* raw_context) {
// Retrieve the host context.
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
// Retrieve the host floating point state.
auto* fpctx = GetFloatingPointState(host_ctx);
CTX_DECLARE(raw_context);
// Save all guest registers except tpidr_el0.
std::memcpy(guest_ctx->cpu_registers.data(), host_ctx.regs, sizeof(host_ctx.regs));
std::memcpy(guest_ctx->vector_registers.data(), fpctx->vregs, sizeof(fpctx->vregs));
guest_ctx->fpsr = fpctx->fpsr;
guest_ctx->fpcr = fpctx->fpcr;
guest_ctx->pstate = static_cast<u32>(host_ctx.pstate);
guest_ctx->pc = host_ctx.pc;
guest_ctx->sp = host_ctx.sp;
std::memcpy(guest_ctx->cpu_registers.data(), &CTX_X(0), sizeof(guest_ctx->cpu_registers));
std::memcpy(guest_ctx->vector_registers.data(), &CTX_Q(0), sizeof(guest_ctx->vector_registers));
guest_ctx->fpsr = CTX_FPSR;
guest_ctx->fpcr = CTX_FPCR;
guest_ctx->pc = CTX_PC;
guest_ctx->sp = CTX_SP;
guest_ctx->pstate = u32(CTX_PSTATE);
// Restore stack pointer.
host_ctx.sp = guest_ctx->host_ctx.host_sp;
CTX_SP = guest_ctx->host_ctx.host_sp;
// Restore host callee-saved registers.
std::memcpy(&host_ctx.regs[19], guest_ctx->host_ctx.host_saved_regs.data(),
std::memcpy(&CTX_X(19), guest_ctx->host_ctx.host_saved_regs.data(),
sizeof(guest_ctx->host_ctx.host_saved_regs));
std::memcpy(&fpctx->vregs[8], guest_ctx->host_ctx.host_saved_vregs.data(),
std::memcpy(&CTX_Q(8), guest_ctx->host_ctx.host_saved_vregs.data(),
sizeof(guest_ctx->host_ctx.host_saved_vregs));
// Return from the call on exit by setting pc to x30.
host_ctx.pc = guest_ctx->host_ctx.host_saved_regs[11];
CTX_PC = guest_ctx->host_ctx.host_saved_regs[11];
// Clear esr_el1 and return it.
host_ctx.regs[0] = guest_ctx->esr_el1.exchange(0);
CTX_X(0) = guest_ctx->esr_el1.exchange(0);
}
bool ArmNce::HandleFailedGuestFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
CTX_DECLARE(raw_context);
auto* info = static_cast<siginfo_t*>(raw_info);
// We can't handle the access, so determine why we crashed.
const bool is_prefetch_abort = host_ctx.pc == reinterpret_cast<u64>(info->si_addr);
auto const is_prefetch_abort = CTX_PC == reinterpret_cast<u64>(info->si_addr);
// For data aborts, skip the instruction and return to guest code.
// This will allow games to continue in many scenarios where they would otherwise crash.
if (!is_prefetch_abort) {
host_ctx.pc += 4;
CTX_PC += 4;
return true;
}
@ -142,17 +116,13 @@ bool ArmNce::HandleFailedGuestFault(GuestContext* guest_ctx, void* raw_info, voi
}
bool ArmNce::HandleGuestAlignmentFault(GuestContext* guest_ctx, void* raw_info, void* raw_context) {
auto& host_ctx = static_cast<ucontext_t*>(raw_context)->uc_mcontext;
auto* fpctx = GetFloatingPointState(host_ctx);
CTX_DECLARE(raw_context);
auto& memory = guest_ctx->system->ApplicationMemory();
// Match and execute an instruction.
auto next_pc = MatchAndExecuteOneInstruction(memory, &host_ctx, fpctx);
if (next_pc) {
host_ctx.pc = *next_pc;
if (auto next_pc = MatchAndExecuteOneInstruction(memory, raw_context); next_pc) {
CTX_PC = *next_pc;
return true;
}
// We couldn't handle the access.
return HandleFailedGuestFault(guest_ctx, raw_info, raw_context);
}
@ -275,9 +245,51 @@ ArmNce::ArmNce(System& system, bool uses_wall_clock, std::size_t core_index)
ArmNce::~ArmNce() = default;
// Borrowed from libusb
static unsigned int posix_gettid(void) {
static thread_local unsigned int tl_tid;
int tid;
if (tl_tid)
return tl_tid;
#if defined(__ANDROID__)
tid = gettid();
#elif defined(__APPLE__)
#ifdef HAVE_PTHREAD_THREADID_NP
uint64_t thread_id;
if (pthread_threadid_np(NULL, &thread_id) == 0)
tid = (int)thread_id;
else
tid = -1;
#else
tid = (int)pthread_mach_thread_np(pthread_self());
#endif
#elif defined(__HAIKU__)
tid = get_pthread_thread_id(pthread_self());
#elif defined(__linux__)
tid = (int)syscall(SYS_gettid);
#elif defined(__NetBSD__)
tid = _lwp_self();
#elif defined(__OpenBSD__)
/* The following only works with OpenBSD > 5.1 as it requires
* real thread support. For 5.1 and earlier, -1 is returned. */
tid = syscall(SYS_getthrid);
#elif defined(__sun__)
tid = _lwp_self();
#else
tid = -1;
#endif
if (tid == -1) {
/* If we don't have a thread ID, at least return a unique
* value that can be used to distinguish individual
* threads. */
tid = (int)(intptr_t)pthread_self();
}
return tl_tid = (unsigned int)tid;
}
void ArmNce::Initialize() {
if (m_thread_id == -1) {
m_thread_id = gettid();
m_thread_id = posix_gettid();
}
// Configure signal stack.
@ -308,7 +320,7 @@ void ArmNce::Initialize() {
&ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler);
return_to_run_code_action.sa_mask = signal_mask;
Common::SigAction(ReturnToRunCodeByExceptionLevelChangeSignal, &return_to_run_code_action,
nullptr);
nullptr);
struct sigaction break_from_run_code_action {};
break_from_run_code_action.sa_flags = SA_SIGINFO | SA_ONSTACK;
@ -378,7 +390,11 @@ void ArmNce::SignalInterrupt(Kernel::KThread* thread) {
if (params->is_running) {
// We should signal to the running thread.
// The running thread will unlock the thread context.
#ifdef __linux__
syscall(SYS_tkill, m_thread_id, BreakFromRunCodeSignal);
#else
pthread_kill(pthread_t(m_thread_id), int(BreakFromRunCodeSignal));
#endif
} else {
// If the thread is no longer running, we have nothing to do.
UnlockThreadParameters(params);

View file

@ -9,10 +9,15 @@
/* static HaltReason Core::ArmNce::ReturnToRunCodeByTrampoline(void* tpidr, Core::GuestContext* ctx, u64 trampoline_addr) */
#ifdef __APPLE__
.global __ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEy
__ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEy:
#else
.section .text._ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, "ax", %progbits
.global _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
.type _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm, %function
.global _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm
_ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
#endif
/* Back up host sp to x3. */
/* Back up host tpidr_el0 to x4. */
mov x3, sp
@ -50,38 +55,52 @@ _ZN4Core6ArmNce27ReturnToRunCodeByTrampolineEPvPNS_12GuestContextEm:
/* static HaltReason Core::ArmNce::ReturnToRunCodeByExceptionLevelChange(int tid, void* tpidr) */
#ifdef __APPLE__
.global __ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv
__ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv:
#else
.section .text._ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, "ax", %progbits
.global _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv
.type _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv, %function
.global _ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv
_ZN4Core6ArmNce37ReturnToRunCodeByExceptionLevelChangeEiPv:
#endif
/* This jumps to the signal handler, which will restore the entire context. */
/* On entry, x0 = thread id, which is already in the right place. */
/* Move tpidr to x9 so it is not trampled. */
mov x9, x1
/* Set up arguments. */
mov x8, #(__NR_tkill)
/* On entry, x0 = thread id, which is already in the right place. Even on macOS. */
mov x9, x1 /* Move tpidr to x9 so it is not trampled. */
mov x1, #(ReturnToRunCodeByExceptionLevelChangeSignal)
/* Tail call the signal handler. */
svc #0
/* Block execution from flowing here. */
brk #1000
#ifdef __APPLE__
/* I can never be happy, why no tkill in mach kernel? Ugh ... */
/* Signature: 328 AUE_PTHREADKILL ALL { int __pthread_kill(int thread_port, int sig); } */
mov x16, #(328)
svc #0x80 /* Tail call the signal handler. */
brk #0xF000 /* See: https://discourse.llvm.org/t/stepping-over-a-brk-instruction-on-arm64/69766/7 */
#else
/* Signature: int tgkill(pid_t tgid, pid_t tid, int sig); */
mov x8, #(__NR_tkill)
svc #0 /* Tail call the signal handler. */
brk #1000 /* Block execution from flowing here. */
#endif
/* static void Core::ArmNce::ReturnToRunCodeByExceptionLevelChangeSignalHandler(int sig, void* info, void* raw_context) */
#ifdef __APPLE__
.global __ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
__ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
#else
.section .text._ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
.type _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_, %function
.global _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_
_ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
#endif
stp x29, x30, [sp, #-0x10]!
mov x29, sp
/* Call the context restorer with the raw context. */
mov x0, x2
#ifdef __APPLE__
bl __ZN4Core6ArmNce19RestoreGuestContextEPv
#else
bl _ZN4Core6ArmNce19RestoreGuestContextEPv
#endif
/* Save the old value of tpidr_el0. */
mrs x8, tpidr_el0
@ -92,7 +111,11 @@ _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
msr tpidr_el0, x0
/* Unlock the context. */
#ifdef __APPLE__
bl __ZN4Core6ArmNce22UnlockThreadParametersEPv
#else
bl _ZN4Core6ArmNce22UnlockThreadParametersEPv
#endif
/* Returning from here will enter the guest. */
ldp x29, x30, [sp], #0x10
@ -100,10 +123,15 @@ _ZN4Core6ArmNce50ReturnToRunCodeByExceptionLevelChangeSignalHandlerEiPvS1_:
/* static void Core::ArmNce::BreakFromRunCodeSignalHandler(int sig, void* info, void* raw_context) */
#ifdef __APPLE__
.global __ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_
__ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
#else
.section .text._ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_
.type _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_, %function
.global _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_
_ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
#endif
/* Check to see if we have the correct TLS magic. */
mrs x8, tpidr_el0
ldr w9, [x8, #(TpidrEl0TlsMagic)]
@ -121,7 +149,11 @@ _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
/* Tail call the restorer. */
mov x1, x2
#ifdef __APPLE__
b __ZN4Core6ArmNce16SaveGuestContextEPNS_12GuestContextEPv
#else
b _ZN4Core6ArmNce16SaveGuestContextEPNS_12GuestContextEPv
#endif
/* Returning from here will enter host code. */
@ -131,10 +163,15 @@ _ZN4Core6ArmNce29BreakFromRunCodeSignalHandlerEiPvS1_:
/* static void Core::ArmNce::GuestAlignmentFaultSignalHandler(int sig, void* info, void* raw_context) */
#ifdef __APPLE__
.global __ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_
__ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
#else
.section .text._ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_
.type _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_, %function
.global _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_
_ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
#endif
/* Check to see if we have the correct TLS magic. */
mrs x8, tpidr_el0
ldr w9, [x8, #(TpidrEl0TlsMagic)]
@ -146,7 +183,11 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
/* Incorrect TLS magic, so this is a host fault. */
/* Tail call the handler. */
#ifdef __APPLE__
b __ZN4Core6ArmNce24HandleHostAlignmentFaultEiPvS1_
#else
b _ZN4Core6ArmNce24HandleHostAlignmentFaultEiPvS1_
#endif
1:
/* Correct TLS magic, so this is a guest fault. */
@ -163,7 +204,11 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
msr tpidr_el0, x3
/* Call the handler. */
#ifdef __APPLE__
bl __ZN4Core6ArmNce25HandleGuestAlignmentFaultEPNS_12GuestContextEPvS3_
#else
bl _ZN4Core6ArmNce25HandleGuestAlignmentFaultEPNS_12GuestContextEPvS3_
#endif
/* If the handler returned false, we want to preserve the host tpidr_el0. */
cbz x0, 2f
@ -177,10 +222,15 @@ _ZN4Core6ArmNce32GuestAlignmentFaultSignalHandlerEiPvS1_:
ret
/* static void Core::ArmNce::GuestAccessFaultSignalHandler(int sig, void* info, void* raw_context) */
#ifdef __APPLE__
.global __ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_
__ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
#else
.section .text._ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_, "ax", %progbits
.global _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_
.type _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_, %function
.global _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_
_ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
#endif
/* Check to see if we have the correct TLS magic. */
mrs x8, tpidr_el0
ldr w9, [x8, #(TpidrEl0TlsMagic)]
@ -192,7 +242,11 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
/* Incorrect TLS magic, so this is a host fault. */
/* Tail call the handler. */
#ifdef __APPLE__
b __ZN4Core6ArmNce21HandleHostAccessFaultEiPvS1_
#else
b _ZN4Core6ArmNce21HandleHostAccessFaultEiPvS1_
#endif
1:
/* Correct TLS magic, so this is a guest fault. */
@ -209,7 +263,11 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
msr tpidr_el0, x3
/* Call the handler. */
#ifdef __APPLE__
bl __ZN4Core6ArmNce22HandleGuestAccessFaultEPNS_12GuestContextEPvS3_
#else
bl _ZN4Core6ArmNce22HandleGuestAccessFaultEPNS_12GuestContextEPvS3_
#endif
/* If the handler returned false, we want to preserve the host tpidr_el0. */
cbz x0, 2f
@ -224,10 +282,15 @@ _ZN4Core6ArmNce29GuestAccessFaultSignalHandlerEiPvS1_:
/* static void Core::ArmNce::LockThreadParameters(void* tpidr) */
#ifdef __APPLE__
.global __ZN4Core6ArmNce20LockThreadParametersEPv
__ZN4Core6ArmNce20LockThreadParametersEPv:
#else
.section .text._ZN4Core6ArmNce20LockThreadParametersEPv, "ax", %progbits
.global _ZN4Core6ArmNce20LockThreadParametersEPv
.type _ZN4Core6ArmNce20LockThreadParametersEPv, %function
.global _ZN4Core6ArmNce20LockThreadParametersEPv
_ZN4Core6ArmNce20LockThreadParametersEPv:
#endif
/* Offset to lock member. */
add x0, x0, #(TpidrEl0Lock)
@ -252,10 +315,15 @@ _ZN4Core6ArmNce20LockThreadParametersEPv:
/* static void Core::ArmNce::UnlockThreadParameters(void* tpidr) */
#ifdef __APPLE__
.global __ZN4Core6ArmNce22UnlockThreadParametersEPv
__ZN4Core6ArmNce22UnlockThreadParametersEPv:
#else
.section .text._ZN4Core6ArmNce22UnlockThreadParametersEPv, "ax", %progbits
.global _ZN4Core6ArmNce22UnlockThreadParametersEPv
.type _ZN4Core6ArmNce22UnlockThreadParametersEPv, %function
.global _ZN4Core6ArmNce22UnlockThreadParametersEPv
_ZN4Core6ArmNce22UnlockThreadParametersEPv:
#endif
/* Offset to lock member. */
add x0, x0, #(TpidrEl0Lock)

View file

@ -5,22 +5,24 @@
#define __ASSEMBLY__
#ifdef __APPLE__
/* https://cpip.readthedocs.io/en/stable/_static/dictobject.c/signal.h_bbe000f9714f274340a28e000a369354.html */
#define ReturnToRunCodeByExceptionLevelChangeSignal 31
#define BreakFromRunCodeSignal 16
#define GuestAccessFaultSignal 11
#define GuestAlignmentFaultSignal 10
#else
#include <asm-generic/signal.h>
#include <asm-generic/unistd.h>
#define ReturnToRunCodeByExceptionLevelChangeSignal SIGUSR2
#define BreakFromRunCodeSignal SIGURG
#define GuestAccessFaultSignal SIGSEGV
#define GuestAlignmentFaultSignal SIGBUS
#endif
#define GuestContextSp 0xF8
#define GuestContextHostContext 0x320
#define HostContextSpTpidrEl0 0xE0
#define HostContextTpidrEl0 0xE8
#define HostContextRegs 0x0
#define HostContextVregs 0x60
#define TpidrEl0NativeContext 0x10
#define TpidrEl0Lock 0x18
#define TpidrEl0TlsMagic 0x20
@ -28,3 +30,8 @@
#define SpinLockLocked 0
#define SpinLockUnlocked 1
#define HostContextSpTpidrEl0 0xE0
#define HostContextTpidrEl0 0xE8
#define HostContextRegs 0x0
#define HostContextVregs 0x60

View file

@ -2,8 +2,9 @@
// SPDX-FileCopyrightText: Copyright 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: GPL-2.0-or-later
#include "common/bit_cast.h"
#include "core/arm/nce/interpreter_visitor.h"
#include "core/memory.h"
#include "dynarmic/common/context.h"
namespace Core {
@ -790,23 +791,20 @@ bool InterpreterVisitor::LDR_reg_fpsimd(Imm<2> size, Imm<1> opc_1, Reg Rm, Imm<3
return this->SIMDOffset(scale, shift, opc_0, Rm, option, Rn, Vt);
}
std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context,
fpsimd_context* fpsimd_context) {
std::span<u64, 31> regs(reinterpret_cast<u64*>(context->regs), 31);
std::span<u128, 32> vregs(reinterpret_cast<u128*>(fpsimd_context->vregs), 32);
u64& sp = *reinterpret_cast<u64*>(&context->sp);
const u64& pc = *reinterpret_cast<u64*>(&context->pc);
std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, void* raw_context) {
CTX_DECLARE(raw_context);
std::span<u64, 31> regs(reinterpret_cast<u64*>(&CTX_X(0)), 31);
std::span<u128, 32> vregs(reinterpret_cast<u128*>(&CTX_Q(0)), 32);
u64& sp = *reinterpret_cast<u64*>(&CTX_SP);
const u64& pc = *reinterpret_cast<u64*>(&CTX_PC);
InterpreterVisitor visitor(memory, regs, vregs, sp, pc);
u32 instruction = memory.Read32(pc);
bool was_executed = false;
if (auto decoder = Dynarmic::A64::Decode<VisitorBase>(instruction)) {
was_executed = decoder->get().call(visitor, instruction);
} else {
LOG_ERROR(Core_ARM, "Unallocated encoding: {:#x}", instruction);
}
return was_executed ? std::optional<u64>(pc + 4) : std::nullopt;
}

View file

@ -9,6 +9,7 @@
#include <atomic>
#include <signal.h>
#include <span>
#include <unistd.h>
#include <span>
@ -105,7 +106,6 @@ private:
const u64& m_pc;
};
std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, mcontext_t* context,
fpsimd_context* fpsimd_context);
std::optional<u64> MatchAndExecuteOneInstruction(Core::Memory::Memory& memory, void* raw_context);
} // namespace Core

View file

@ -4,15 +4,14 @@
#include "common/arm64/native_clock.h"
#include "common/bit_cast.h"
#include "common/literals.h"
#include "core/arm/nce/arm_nce.h"
#include "core/arm/nce/guest_context.h"
#include "core/arm/nce/instructions.h"
#include "core/arm/nce/patcher.h"
#include "core/core.h"
#include "core/core_timing.h"
#include "core/hle/kernel/svc.h"
#include "core/memory.h"
#include "core/hle/kernel/k_thread.h"
#include "core/memory.h"
namespace Core::NCE {

View file

@ -6,6 +6,8 @@
* SPDX-License-Identifier: 0BSD
*/
#include "dynarmic/backend/exception_handler.h"
#include <cstring>
#include <functional>
#include <memory>
@ -118,8 +120,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
CTX_DECLARE(raw_context);
#if defined(ARCHITECTURE_x86_64)
{
std::shared_lock guard(sig_handler->code_block_infos_mutex);
if (auto const iter = sig_handler->FindCodeBlockInfo(CTX_RIP); iter != sig_handler->code_block_infos.end()) {
std::shared_lock<std::shared_mutex> guard(sig_handler->code_block_infos_mutex);
const auto iter = sig_handler->FindCodeBlockInfo(CTX_RIP);
if (iter != sig_handler->code_block_infos.end()) {
FakeCall fc = iter->second.cb(CTX_RIP);
CTX_RSP -= sizeof(u64);
*mcl::bit_cast<u64*>(CTX_RSP) = fc.ret_rip;
@ -130,8 +133,9 @@ void SigHandler::SigAction(int sig, siginfo_t* info, void* raw_context) {
fmt::print(stderr, "Unhandled {} at rip {:#018x}\n", sig == SIGSEGV ? "SIGSEGV" : "SIGBUS", CTX_RIP);
#elif defined(ARCHITECTURE_arm64)
{
std::shared_lock guard(sig_handler->code_block_infos_mutex);
if (const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC); iter != sig_handler->code_block_infos.end()) {
std::shared_lock<std::shared_mutex> guard(sig_handler->code_block_infos_mutex);
const auto iter = sig_handler->FindCodeBlockInfo(CTX_PC);
if (iter != sig_handler->code_block_infos.end()) {
FakeCall fc = iter->second.cb(CTX_PC);
CTX_PC = fc.call_pc;
return;
@ -187,11 +191,11 @@ private:
ExceptionHandler::ExceptionHandler() = default;
ExceptionHandler::~ExceptionHandler() = default;
#if defined(MCL_ARCHITECTURE_X86_64)
#if defined(ARCHITECTURE_x86_64)
void ExceptionHandler::Register(X64::BlockOfCode& code) {
impl = std::make_unique<Impl>(mcl::bit_cast<u64>(code.getCode()), code.GetTotalCodeSize());
}
#elif defined(MCL_ARCHITECTURE_ARM64)
#elif defined(ARCHITECTURE_arm64)
void ExceptionHandler::Register(oaknut::CodeBlock& mem, std::size_t size) {
impl = std::make_unique<Impl>(mcl::bit_cast<u64>(mem.ptr()), size);
}

View file

@ -104,6 +104,7 @@
# error "unimplemented"
#endif
// TODO: FreeBSD/OpenBSD
#ifdef ARCHITECTURE_arm64
#ifdef __APPLE__
inline _STRUCT_ARM_NEON_STATE64* GetFloatingPointState(mcontext_t& host_ctx) {

View file

@ -368,8 +368,8 @@ if (APPLE)
if (YUZU_USE_BUNDLED_MOLTENVK)
set(MOLTENVK_PLATFORM "macOS")
set(MOLTENVK_VERSION "v1.3.0")
download_moltenvk(${MOLTENVK_PLATFORM} ${MOLTENVK_VERSION})
set(MOLTENVK_VERSION "v1.4.0")
download_moltenvk_external(${MOLTENVK_PLATFORM} ${MOLTENVK_VERSION})
endif()
find_library(MOLTENVK_LIBRARY MoltenVK REQUIRED)
message(STATUS "Using MoltenVK at ${MOLTENVK_LIBRARY}.")