From 5534cebe1cc2874ec0100d737caca2d904e3228e Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 08:30:12 +0000 Subject: [PATCH] [dynarmic] backport WAITPKG based spinlocks Signed-off-by: lizzie --- .../dynarmic/backend/x64/block_of_code.cpp | 2 + .../backend/x64/emit_x64_memory.cpp.inc | 3 +- .../dynarmic/backend/x64/emit_x64_memory.h | 5 ++- .../src/dynarmic/backend/x64/host_feature.h | 3 +- .../src/dynarmic/common/spin_lock_x64.cpp | 37 +++++++++++++++++-- .../src/dynarmic/common/spin_lock_x64.h | 5 ++- 6 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..4a8de6475e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -188,6 +188,8 @@ HostFeature GetHostFeatures() { features |= HostFeature::LZCNT; if (cpu_info.has(Cpu::tGFNI)) features |= HostFeature::GFNI; + if (cpu_info.has(Cpu::tWAITPKG)) + features |= HostFeature::WAITPKG; if (cpu_info.has(Cpu::tBMI2)) { // BMI2 instructions such as pdep and pext have been very slow up until Zen 3. diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..36a2d40de6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -430,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())]; - EmitExclusiveLock(code, conf, tmp, eax); + EmitExclusiveLock(code, conf, tmp, tmp2); SharedLabel end = GenSharedLabel(); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 75a47c6a80..c363ea1b6b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -343,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p } code.mov(pointer, mcl::bit_cast(GetExclusiveMonitorLockPointer(conf.global_monitor))); - EmitSpinLockLock(code, pointer, tmp); + EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG)); } template diff --git a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h index 7246ed18d4..34dca971cb 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h +++ b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h @@ -35,9 +35,10 @@ enum class HostFeature : u64 { BMI2 = 1ULL << 19, LZCNT = 1ULL << 20, GFNI = 1ULL << 21, + WAITPKG = 1ULL << 22, // Zen-based BMI2 - FastBMI2 = 1ULL << 22, + FastBMI2 = 1ULL << 23, // Orthographic AVX512 features on 128 and 256 vectors AVX512_Ortho = AVX512F | AVX512VL, diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index da50179de9..5307672bfe 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -22,17 +22,46 @@ static const auto default_cg_mode = nullptr; //Allow RWE namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) { + // TODO: this is because we lack regalloc - so better to be safe :( + if (waitpkg) { + code.push(Xbyak::util::eax); + code.push(Xbyak::util::ebx); + code.push(Xbyak::util::edx); + } Xbyak::Label start, loop; - code.jmp(start, code.T_NEAR); code.L(loop); - code.pause(); + if (waitpkg) { + // TODO: This clobbers EAX and EDX did we tell the regalloc? + // ARM ptr for address-monitoring + code.umonitor(ptr); + // tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings + // tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings + // edx:eax is implicitly used as a 64-bit deadline timestamp + // Use the maximum so that we use the operating system's maximum + // allowed wait time within the IA32_UMWAIT_CONTROL register + // Enter power state designated by tmp and wait for a write to lock_ptr + code.mov(Xbyak::util::eax, 0xFFFFFFFF); + code.mov(Xbyak::util::edx, Xbyak::util::eax); + // TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it... + code.mov(Xbyak::util::ebx, 1); + code.umwait(Xbyak::util::ebx); + // CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write + // CF == 0 if we exited the wait for any other reason + } else { + code.pause(); + } code.L(start); code.mov(tmp, 1); /*code.lock();*/ code.xchg(code.dword[ptr], tmp); code.test(tmp, tmp); code.jnz(loop, code.T_NEAR); + if (waitpkg) { + code.pop(Xbyak::util::edx); + code.pop(Xbyak::util::ebx); + code.pop(Xbyak::util::eax); + } } void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { @@ -60,7 +89,7 @@ void SpinLockImpl::Initialize() { code.align(); lock = code.getCurr(); - EmitSpinLockLock(code, ABI_PARAM1, code.eax); + EmitSpinLockLock(code, ABI_PARAM1, code.eax, false); code.ret(); code.align(); diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h index df6a3d7407..df6860e2f2 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -9,7 +12,7 @@ namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg); void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); } // namespace Dynarmic