[dynarmic] backport WAITPKG based spinlocks #2614
6 changed files with 47 additions and 8 deletions
|
@ -188,6 +188,8 @@ HostFeature GetHostFeatures() {
|
||||||
features |= HostFeature::LZCNT;
|
features |= HostFeature::LZCNT;
|
||||||
if (cpu_info.has(Cpu::tGFNI))
|
if (cpu_info.has(Cpu::tGFNI))
|
||||||
features |= HostFeature::GFNI;
|
features |= HostFeature::GFNI;
|
||||||
|
if (cpu_info.has(Cpu::tWAITPKG))
|
||||||
|
features |= HostFeature::WAITPKG;
|
||||||
|
|
||||||
if (cpu_info.has(Cpu::tBMI2)) {
|
if (cpu_info.has(Cpu::tBMI2)) {
|
||||||
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
|
// BMI2 instructions such as pdep and pext have been very slow up until Zen 3.
|
||||||
|
|
|
@ -430,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i
|
||||||
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
|
const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]);
|
||||||
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32();
|
const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32();
|
||||||
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr();
|
||||||
|
const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr();
|
||||||
|
|
||||||
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
|
const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())];
|
||||||
|
|
||||||
EmitExclusiveLock(code, conf, tmp, eax);
|
EmitExclusiveLock(code, conf, tmp, tmp2);
|
||||||
|
|
||||||
SharedLabel end = GenSharedLabel();
|
SharedLabel end = GenSharedLabel();
|
||||||
|
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
/* This file is part of the dynarmic project.
|
/* This file is part of the dynarmic project.
|
||||||
* Copyright (c) 2022 MerryMage
|
* Copyright (c) 2022 MerryMage
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
|
@ -343,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p
|
||||||
}
|
}
|
||||||
|
|
||||||
code.mov(pointer, mcl::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
|
code.mov(pointer, mcl::bit_cast<u64>(GetExclusiveMonitorLockPointer(conf.global_monitor)));
|
||||||
EmitSpinLockLock(code, pointer, tmp);
|
EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG));
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename UserConfig>
|
template<typename UserConfig>
|
||||||
|
|
|
@ -35,9 +35,10 @@ enum class HostFeature : u64 {
|
||||||
BMI2 = 1ULL << 19,
|
BMI2 = 1ULL << 19,
|
||||||
LZCNT = 1ULL << 20,
|
LZCNT = 1ULL << 20,
|
||||||
GFNI = 1ULL << 21,
|
GFNI = 1ULL << 21,
|
||||||
|
WAITPKG = 1ULL << 22,
|
||||||
|
|
||||||
// Zen-based BMI2
|
// Zen-based BMI2
|
||||||
FastBMI2 = 1ULL << 22,
|
FastBMI2 = 1ULL << 23,
|
||||||
|
|
||||||
// Orthographic AVX512 features on 128 and 256 vectors
|
// Orthographic AVX512 features on 128 and 256 vectors
|
||||||
AVX512_Ortho = AVX512F | AVX512VL,
|
AVX512_Ortho = AVX512F | AVX512VL,
|
||||||
|
|
|
@ -22,17 +22,46 @@ static const auto default_cg_mode = nullptr; //Allow RWE
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
|
|
||||||
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
|
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) {
|
||||||
|
// TODO: this is because we lack regalloc - so better to be safe :(
|
||||||
|
if (waitpkg) {
|
||||||
|
code.push(Xbyak::util::eax);
|
||||||
|
code.push(Xbyak::util::ebx);
|
||||||
|
code.push(Xbyak::util::edx);
|
||||||
|
}
|
||||||
Xbyak::Label start, loop;
|
Xbyak::Label start, loop;
|
||||||
|
|
||||||
code.jmp(start, code.T_NEAR);
|
code.jmp(start, code.T_NEAR);
|
||||||
code.L(loop);
|
code.L(loop);
|
||||||
code.pause();
|
if (waitpkg) {
|
||||||
|
// TODO: This clobbers EAX and EDX did we tell the regalloc?
|
||||||
|
// ARM ptr for address-monitoring
|
||||||
|
code.umonitor(ptr);
|
||||||
|
// tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings
|
||||||
|
// tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings
|
||||||
|
// edx:eax is implicitly used as a 64-bit deadline timestamp
|
||||||
|
// Use the maximum so that we use the operating system's maximum
|
||||||
|
// allowed wait time within the IA32_UMWAIT_CONTROL register
|
||||||
|
// Enter power state designated by tmp and wait for a write to lock_ptr
|
||||||
|
code.mov(Xbyak::util::eax, 0xFFFFFFFF);
|
||||||
|
code.mov(Xbyak::util::edx, Xbyak::util::eax);
|
||||||
|
// TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it...
|
||||||
|
code.mov(Xbyak::util::ebx, 1);
|
||||||
|
code.umwait(Xbyak::util::ebx);
|
||||||
|
// CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write
|
||||||
|
// CF == 0 if we exited the wait for any other reason
|
||||||
|
} else {
|
||||||
|
code.pause();
|
||||||
|
}
|
||||||
code.L(start);
|
code.L(start);
|
||||||
code.mov(tmp, 1);
|
code.mov(tmp, 1);
|
||||||
/*code.lock();*/ code.xchg(code.dword[ptr], tmp);
|
/*code.lock();*/ code.xchg(code.dword[ptr], tmp);
|
||||||
code.test(tmp, tmp);
|
code.test(tmp, tmp);
|
||||||
code.jnz(loop, code.T_NEAR);
|
code.jnz(loop, code.T_NEAR);
|
||||||
|
if (waitpkg) {
|
||||||
|
code.pop(Xbyak::util::edx);
|
||||||
|
code.pop(Xbyak::util::ebx);
|
||||||
|
code.pop(Xbyak::util::eax);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
|
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) {
|
||||||
|
@ -60,7 +89,7 @@ void SpinLockImpl::Initialize() {
|
||||||
|
|
||||||
code.align();
|
code.align();
|
||||||
lock = code.getCurr<void (*)(volatile int*)>();
|
lock = code.getCurr<void (*)(volatile int*)>();
|
||||||
EmitSpinLockLock(code, ABI_PARAM1, code.eax);
|
EmitSpinLockLock(code, ABI_PARAM1, code.eax, false);
|
||||||
code.ret();
|
code.ret();
|
||||||
|
|
||||||
code.align();
|
code.align();
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
/* This file is part of the dynarmic project.
|
/* This file is part of the dynarmic project.
|
||||||
* Copyright (c) 2022 MerryMage
|
* Copyright (c) 2022 MerryMage
|
||||||
* SPDX-License-Identifier: 0BSD
|
* SPDX-License-Identifier: 0BSD
|
||||||
|
@ -9,7 +12,7 @@
|
||||||
|
|
||||||
namespace Dynarmic {
|
namespace Dynarmic {
|
||||||
|
|
||||||
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
|
void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg);
|
||||||
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
|
void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp);
|
||||||
|
|
||||||
} // namespace Dynarmic
|
} // namespace Dynarmic
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue