From d3b89f1a0444ec7e6d65b8b5a3eeb610eaf55bad Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 08:30:12 +0000 Subject: [PATCH 01/12] [dynarmic] backport WAITPKG based spinlocks Signed-off-by: lizzie --- .../dynarmic/backend/x64/block_of_code.cpp | 2 + .../backend/x64/emit_x64_memory.cpp.inc | 3 +- .../dynarmic/backend/x64/emit_x64_memory.h | 5 ++- .../src/dynarmic/backend/x64/host_feature.h | 3 +- .../src/dynarmic/common/spin_lock_x64.cpp | 37 +++++++++++++++++-- .../src/dynarmic/common/spin_lock_x64.h | 5 ++- 6 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..4a8de6475e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -188,6 +188,8 @@ HostFeature GetHostFeatures() { features |= HostFeature::LZCNT; if (cpu_info.has(Cpu::tGFNI)) features |= HostFeature::GFNI; + if (cpu_info.has(Cpu::tWAITPKG)) + features |= HostFeature::WAITPKG; if (cpu_info.has(Cpu::tBMI2)) { // BMI2 instructions such as pdep and pext have been very slow up until Zen 3. diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..36a2d40de6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -430,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())]; - EmitExclusiveLock(code, conf, tmp, eax); + EmitExclusiveLock(code, conf, tmp, tmp2); SharedLabel end = GenSharedLabel(); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 75a47c6a80..c363ea1b6b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -343,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p } code.mov(pointer, mcl::bit_cast(GetExclusiveMonitorLockPointer(conf.global_monitor))); - EmitSpinLockLock(code, pointer, tmp); + EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG)); } template diff --git a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h index 7246ed18d4..34dca971cb 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h +++ b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h @@ -35,9 +35,10 @@ enum class HostFeature : u64 { BMI2 = 1ULL << 19, LZCNT = 1ULL << 20, GFNI = 1ULL << 21, + WAITPKG = 1ULL << 22, // Zen-based BMI2 - FastBMI2 = 1ULL << 22, + FastBMI2 = 1ULL << 23, // Orthographic AVX512 features on 128 and 256 vectors AVX512_Ortho = AVX512F | AVX512VL, diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index da50179de9..5307672bfe 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -22,17 +22,46 @@ static const auto default_cg_mode = nullptr; //Allow RWE namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) { + // TODO: this is because we lack regalloc - so better to be safe :( + if (waitpkg) { + code.push(Xbyak::util::eax); + code.push(Xbyak::util::ebx); + code.push(Xbyak::util::edx); + } Xbyak::Label start, loop; - code.jmp(start, code.T_NEAR); code.L(loop); - code.pause(); + if (waitpkg) { + // TODO: This clobbers EAX and EDX did we tell the regalloc? + // ARM ptr for address-monitoring + code.umonitor(ptr); + // tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings + // tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings + // edx:eax is implicitly used as a 64-bit deadline timestamp + // Use the maximum so that we use the operating system's maximum + // allowed wait time within the IA32_UMWAIT_CONTROL register + // Enter power state designated by tmp and wait for a write to lock_ptr + code.mov(Xbyak::util::eax, 0xFFFFFFFF); + code.mov(Xbyak::util::edx, Xbyak::util::eax); + // TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it... + code.mov(Xbyak::util::ebx, 1); + code.umwait(Xbyak::util::ebx); + // CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write + // CF == 0 if we exited the wait for any other reason + } else { + code.pause(); + } code.L(start); code.mov(tmp, 1); /*code.lock();*/ code.xchg(code.dword[ptr], tmp); code.test(tmp, tmp); code.jnz(loop, code.T_NEAR); + if (waitpkg) { + code.pop(Xbyak::util::edx); + code.pop(Xbyak::util::ebx); + code.pop(Xbyak::util::eax); + } } void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { @@ -60,7 +89,7 @@ void SpinLockImpl::Initialize() { code.align(); lock = code.getCurr(); - EmitSpinLockLock(code, ABI_PARAM1, code.eax); + EmitSpinLockLock(code, ABI_PARAM1, code.eax, false); code.ret(); code.align(); diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h index df6a3d7407..df6860e2f2 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -9,7 +12,7 @@ namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg); void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); } // namespace Dynarmic From 020f1cdb1f2c60d8caad424c2319b721b167b181 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 1 Oct 2025 12:16:42 +0200 Subject: [PATCH 02/12] [qt] fix ci missing build_id (#2638) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2638 Co-authored-by: lizzie Co-committed-by: lizzie --- src/yuzu/main.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/yuzu/main.cpp b/src/yuzu/main.cpp index 71cc0a7e6b..fc7a953d77 100644 --- a/src/yuzu/main.cpp +++ b/src/yuzu/main.cpp @@ -4204,8 +4204,8 @@ void GMainWindow::OnEmulatorUpdateAvailable() { } #endif -void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version, - std::string_view gpu_vendor) { +void GMainWindow::UpdateWindowTitle(std::string_view title_name, std::string_view title_version, std::string_view gpu_vendor) { + static const std::string build_id = std::string{Common::g_build_id}; static const std::string yuzu_title = fmt::format("{} | {} | {}", std::string{Common::g_build_name}, std::string{Common::g_build_version}, From 4be6d30cd95634777e0ea0790d7c51a3d09bb773 Mon Sep 17 00:00:00 2001 From: Caio Oliveira Date: Wed, 1 Oct 2025 16:36:07 +0200 Subject: [PATCH 03/12] [fixup] fix bad variable names (#2642) * Mo[l]tenVK is only for apple, so desc is unnecessary * fix mistipo on BUILD_AUTO_UPDATE_WEB[SI]TE Signed-off-by: Caio Oliveira Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2642 Reviewed-by: MaranBr Reviewed-by: CamilleLaVey Co-authored-by: Caio Oliveira Co-committed-by: Caio Oliveira --- CMakeLists.txt | 2 +- docs/Options.md | 2 +- src/common/scm_rev.cpp.in | 4 ++-- src/yuzu/CMakeLists.txt | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eda6969f31..a9ff2e9458 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -224,7 +224,7 @@ set(YUZU_TZDB_PATH "" CACHE STRING "Path to a pre-downloaded timezone database") cmake_dependent_option(YUZU_USE_FASTER_LD "Check if a faster linker is available" ON "LINUX" OFF) -cmake_dependent_option(YUZU_APPLE_USE_BUNDLED_MONTENVK "Download bundled MoltenVK lib" ON "APPLE" OFF) +cmake_dependent_option(YUZU_USE_BUNDLED_MOLTENVK "Download bundled MoltenVK lib" ON "APPLE" OFF) option(YUZU_DISABLE_LLVM "Disable LLVM (useful for CI)" OFF) diff --git a/docs/Options.md b/docs/Options.md index 6af91e4918..3dd84ea645 100644 --- a/docs/Options.md +++ b/docs/Options.md @@ -31,7 +31,7 @@ Notes: * Currently, build fails without this - `YUZU_USE_FASTER_LD` (ON) Check if a faster linker is available * Only available on UNIX -- `YUZU_APPLE_USE_BUNDLED_MONTENVK` (ON, macOS only) Download bundled MoltenVK lib) +- `YUZU_USE_BUNDLED_MOLTENVK` (ON, macOS only) Download bundled MoltenVK lib) - `YUZU_TZDB_PATH` (string) Path to a pre-downloaded timezone database (useful for nixOS) - `ENABLE_OPENSSL` (ON for Linux and *BSD) Enable OpenSSL backend for the ssl service * Always enabled if the web service is enabled diff --git a/src/common/scm_rev.cpp.in b/src/common/scm_rev.cpp.in index 08b8c68835..60c9c119f9 100644 --- a/src/common/scm_rev.cpp.in +++ b/src/common/scm_rev.cpp.in @@ -18,7 +18,7 @@ #define TITLE_BAR_FORMAT_RUNNING "@TITLE_BAR_FORMAT_RUNNING@" #define IS_DEV_BUILD @IS_DEV_BUILD@ #define COMPILER_ID "@CXX_COMPILER@" -#define BUILD_AUTO_UPDATE_WEBISTE "@BUILD_AUTO_UPDATE_WEBISTE@" +#define BUILD_AUTO_UPDATE_WEBSITE "@BUILD_AUTO_UPDATE_WEBSITE@" #define BUILD_AUTO_UPDATE_API "@BUILD_AUTO_UPDATE_API@" #define BUILD_AUTO_UPDATE_REPO "@BUILD_AUTO_UPDATE_REPO@" @@ -37,7 +37,7 @@ constexpr const char g_title_bar_format_running[] = TITLE_BAR_FORMAT_RUNNING; constexpr const char g_compiler_id[] = COMPILER_ID; constexpr const bool g_is_dev_build = IS_DEV_BUILD; -constexpr const char g_build_auto_update_website[] = BUILD_AUTO_UPDATE_WEBISTE; +constexpr const char g_build_auto_update_website[] = BUILD_AUTO_UPDATE_WEBSITE; constexpr const char g_build_auto_update_api[] = BUILD_AUTO_UPDATE_API; constexpr const char g_build_auto_update_repo[] = BUILD_AUTO_UPDATE_REPO; diff --git a/src/yuzu/CMakeLists.txt b/src/yuzu/CMakeLists.txt index c3d8f5387a..00e03bd935 100644 --- a/src/yuzu/CMakeLists.txt +++ b/src/yuzu/CMakeLists.txt @@ -366,7 +366,7 @@ if (APPLE) set_target_properties(yuzu PROPERTIES MACOSX_BUNDLE TRUE) set_target_properties(yuzu PROPERTIES MACOSX_BUNDLE_INFO_PLIST ${CMAKE_CURRENT_SOURCE_DIR}/Info.plist) - if (YUZU_APPLE_USE_BUNDLED_MONTENVK) + if (YUZU_USE_BUNDLED_MOLTENVK) set(MOLTENVK_PLATFORM "macOS") set(MOLTENVK_VERSION "v1.3.0") download_moltenvk(${MOLTENVK_PLATFORM} ${MOLTENVK_VERSION}) From 61adc85c4ba4d87c5dcf99212a94b9d9d4a6fe14 Mon Sep 17 00:00:00 2001 From: Shinmegumi Date: Wed, 1 Oct 2025 21:09:27 +0200 Subject: [PATCH 04/12] [ci] Minor change to fix building (#2644) MSVC did not like that one of our variables was a constexpr since it was defined in the externals as a constexpr. Changed to const auto like the rest to ensure it built properly. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2644 Reviewed-by: MaranBr Reviewed-by: Maufeat Co-authored-by: Shinmegumi Co-committed-by: Shinmegumi --- src/yuzu/update_checker.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yuzu/update_checker.cpp b/src/yuzu/update_checker.cpp index 76b436d1d1..e54eb8d7f8 100644 --- a/src/yuzu/update_checker.cpp +++ b/src/yuzu/update_checker.cpp @@ -58,7 +58,7 @@ std::optional UpdateChecker::GetResponse(std::string url, std::stri std::optional UpdateChecker::GetLatestRelease(bool include_prereleases) { - constexpr auto update_check_url = std::string{Common::g_build_auto_update_api}; + const auto update_check_url = std::string{Common::g_build_auto_update_api}; std::string update_check_path = fmt::format("/repos/{}", std::string{Common::g_build_auto_update_repo}); try { if (include_prereleases) { // This can return either a prerelease or a stable release, From 76b5d6778ec6245979a9f6963616d7fadb814d76 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 1 Oct 2025 23:18:37 +0200 Subject: [PATCH 05/12] [common/logging] faster logging by avoiding constructing unused strings/results (and filtering first) (#2603) basically std::string would be invoked even when the logging was filtered, then destroyed instantly, invoking malloc/free and polluting mem arenas for no good reason Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2603 Reviewed-by: MaranBr Reviewed-by: crueter Co-authored-by: lizzie Co-committed-by: lizzie --- src/common/logging/backend.cpp | 38 +++++++++++-------- src/common/logging/filter.cpp | 26 ++++++------- src/common/logging/log.h | 21 +++------- .../hle/service/ssl/ssl_backend_openssl.cpp | 5 ++- 4 files changed, 45 insertions(+), 45 deletions(-) diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 4621771090..c2cbf3f4a5 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -39,9 +39,17 @@ namespace Common::Log { namespace { -/** - * Interface for logging backends. - */ +/// @brief Trims up to and including the last of ../, ..\, src/, src\ in a string +/// do not be fooled this isn't generating new strings on .rodata :) +constexpr const char* TrimSourcePath(std::string_view source) { + const auto rfind = [source](const std::string_view match) { + return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size()); + }; + auto idx = (std::max)({rfind("src/"), rfind("src\\"), rfind("../"), rfind("..\\")}); + return source.data() + idx; +} + +/// @brief Interface for logging backends. class Backend { public: virtual ~Backend() = default; @@ -53,9 +61,7 @@ public: virtual void Flush() = 0; }; -/** - * Backend that writes to stderr and with color - */ +/// @brief Backend that writes to stderr and with color class ColorConsoleBackend final : public Backend { public: explicit ColorConsoleBackend() = default; @@ -84,9 +90,7 @@ private: std::atomic_bool enabled{false}; }; -/** - * Backend that writes to a file passed into the constructor - */ +/// @brief Backend that writes to a file passed into the constructor class FileBackend final : public Backend { public: explicit FileBackend(const std::filesystem::path& filename) { @@ -248,13 +252,14 @@ public: color_console_backend.SetEnabled(enabled); } + bool CanPushEntry(Class log_class, Level log_level) const noexcept { + return filter.CheckMessage(log_class, log_level); + } + void PushEntry(Class log_class, Level log_level, const char* filename, unsigned int line_num, - const char* function, std::string&& message) { - if (!filter.CheckMessage(log_class, log_level)) { - return; - } + const char* function, std::string&& message) noexcept { message_queue.EmplaceWait( - CreateEntry(log_class, log_level, filename, line_num, function, std::move(message))); + CreateEntry(log_class, log_level, TrimSourcePath(filename), line_num, function, std::move(message))); } private: @@ -368,8 +373,9 @@ void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, fmt::string_view format, const fmt::format_args& args) { if (!initialization_in_progress_suppress_logging) { - Impl::Instance().PushEntry(log_class, log_level, filename, line_num, function, - fmt::vformat(format, args)); + auto& instance = Impl::Instance(); + if (instance.CanPushEntry(log_class, log_level)) + instance.PushEntry(log_class, log_level, filename, line_num, function, fmt::vformat(format, args)); } } } // namespace Common::Log diff --git a/src/common/logging/filter.cpp b/src/common/logging/filter.cpp index 4e3a614a45..813e812780 100644 --- a/src/common/logging/filter.cpp +++ b/src/common/logging/filter.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -9,22 +12,20 @@ namespace Common::Log { namespace { template Level GetLevelByName(const It begin, const It end) { - for (u8 i = 0; i < static_cast(Level::Count); ++i) { - const char* level_name = GetLevelName(static_cast(i)); - if (Common::ComparePartialString(begin, end, level_name)) { - return static_cast(i); - } + for (u32 i = 0; i < u32(Level::Count); ++i) { + const char* level_name = GetLevelName(Level(i)); + if (Common::ComparePartialString(begin, end, level_name)) + return Level(i); } return Level::Count; } template Class GetClassByName(const It begin, const It end) { - for (u8 i = 0; i < static_cast(Class::Count); ++i) { - const char* level_name = GetLogClassName(static_cast(i)); - if (Common::ComparePartialString(begin, end, level_name)) { - return static_cast(i); - } + for (u32 i = 0; i < u32(Class::Count); ++i) { + const char* level_name = GetLogClassName(Class(i)); + if (Common::ComparePartialString(begin, end, level_name)) + return Class(i); } return Class::Count; } @@ -229,13 +230,12 @@ void Filter::ParseFilterString(std::string_view filter_view) { } bool Filter::CheckMessage(Class log_class, Level level) const { - return static_cast(level) >= - static_cast(class_levels[static_cast(log_class)]); + return u8(level) >= u8(class_levels[std::size_t(log_class)]); } bool Filter::IsDebug() const { return std::any_of(class_levels.begin(), class_levels.end(), [](const Level& l) { - return static_cast(l) <= static_cast(Level::Debug); + return u8(l) <= u8(Level::Debug); }); } diff --git a/src/common/logging/log.h b/src/common/logging/log.h index bd7a7d7f49..7b23b59aab 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -16,15 +16,6 @@ namespace Common::Log { -// trims up to and including the last of ../, ..\, src/, src\ in a string -constexpr const char* TrimSourcePath(std::string_view source) { - const auto rfind = [source](const std::string_view match) { - return source.rfind(match) == source.npos ? 0 : (source.rfind(match) + match.size()); - }; - auto idx = (std::max)({rfind("src/"), rfind("src\\"), rfind("../"), rfind("..\\")}); - return source.data() + idx; -} - /// Logs a message to the global logger, using fmt void FmtLogMessageImpl(Class log_class, Level log_level, const char* filename, unsigned int line_num, const char* function, fmt::string_view format, @@ -42,7 +33,7 @@ void FmtLogMessage(Class log_class, Level log_level, const char* filename, unsig #ifdef _DEBUG #define LOG_TRACE(log_class, ...) \ Common::Log::FmtLogMessage(Common::Log::Class::log_class, Common::Log::Level::Trace, \ - Common::Log::TrimSourcePath(__FILE__), __LINE__, __func__, \ + __FILE__, __LINE__, __func__, \ __VA_ARGS__) #else #define LOG_TRACE(log_class, fmt, ...) (void(0)) @@ -50,21 +41,21 @@ void FmtLogMessage(Class log_class, Level log_level, const char* filename, unsig #define LOG_DEBUG(log_class, ...) \ Common::Log::FmtLogMessage(Common::Log::Class::log_class, Common::Log::Level::Debug, \ - Common::Log::TrimSourcePath(__FILE__), __LINE__, __func__, \ + __FILE__, __LINE__, __func__, \ __VA_ARGS__) #define LOG_INFO(log_class, ...) \ Common::Log::FmtLogMessage(Common::Log::Class::log_class, Common::Log::Level::Info, \ - Common::Log::TrimSourcePath(__FILE__), __LINE__, __func__, \ + __FILE__, __LINE__, __func__, \ __VA_ARGS__) #define LOG_WARNING(log_class, ...) \ Common::Log::FmtLogMessage(Common::Log::Class::log_class, Common::Log::Level::Warning, \ - Common::Log::TrimSourcePath(__FILE__), __LINE__, __func__, \ + __FILE__, __LINE__, __func__, \ __VA_ARGS__) #define LOG_ERROR(log_class, ...) \ Common::Log::FmtLogMessage(Common::Log::Class::log_class, Common::Log::Level::Error, \ - Common::Log::TrimSourcePath(__FILE__), __LINE__, __func__, \ + __FILE__, __LINE__, __func__, \ __VA_ARGS__) #define LOG_CRITICAL(log_class, ...) \ Common::Log::FmtLogMessage(Common::Log::Class::log_class, Common::Log::Level::Critical, \ - Common::Log::TrimSourcePath(__FILE__), __LINE__, __func__, \ + __FILE__, __LINE__, __func__, \ __VA_ARGS__) diff --git a/src/core/hle/service/ssl/ssl_backend_openssl.cpp b/src/core/hle/service/ssl/ssl_backend_openssl.cpp index 5714e6f3c5..795b69a873 100644 --- a/src/core/hle/service/ssl/ssl_backend_openssl.cpp +++ b/src/core/hle/service/ssl/ssl_backend_openssl.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -286,7 +289,7 @@ Result CheckOpenSSLErrors() { msg.append(data); } Common::Log::FmtLogMessage(Common::Log::Class::Service_SSL, Common::Log::Level::Error, - Common::Log::TrimSourcePath(file), line, func, "OpenSSL: {}", + file, line, func, "OpenSSL: {}", msg); } return ResultInternalError; From 326865cba2641f693d8500506594c387259dfc11 Mon Sep 17 00:00:00 2001 From: MaranBr Date: Thu, 2 Oct 2025 00:15:14 +0200 Subject: [PATCH 06/12] [host1x] Improve FFmpeg error handling (#2643) This improves the FFmpeg error handling. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2643 Co-authored-by: MaranBr Co-committed-by: MaranBr --- .../nvdrv/devices/nvhost_nvdec_common.cpp | 4 ++-- src/video_core/host1x/codecs/decoder.cpp | 4 ++++ src/video_core/host1x/ffmpeg/ffmpeg.cpp | 22 +++++++++---------- src/video_core/host1x/ffmpeg/ffmpeg.h | 2 +- src/video_core/host1x/vic.cpp | 5 +++++ 5 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp index de4197c52d..f7d6c33f77 100644 --- a/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp +++ b/src/core/hle/service/nvdrv/devices/nvhost_nvdec_common.cpp @@ -135,8 +135,8 @@ NvResult nvhost_nvdec_common::GetSyncpoint(IoctlGetSyncpoint& params) { } NvResult nvhost_nvdec_common::GetWaitbase(IoctlGetWaitbase& params) { - LOG_CRITICAL(Service_NVDRV, "called WAITBASE"); - params.value = 0; // Seems to be hard coded at 0 + LOG_DEBUG(Service_NVDRV, "called WAITBASE"); + params.value = 0; return NvResult::Success; } diff --git a/src/video_core/host1x/codecs/decoder.cpp b/src/video_core/host1x/codecs/decoder.cpp index cb17784b19..887eb28c8c 100755 --- a/src/video_core/host1x/codecs/decoder.cpp +++ b/src/video_core/host1x/codecs/decoder.cpp @@ -38,6 +38,10 @@ void Decoder::Decode() { // Receive output frames from decoder. auto frame = decode_api.ReceiveFrame(); + if (!frame) { + return; + } + if (IsInterlaced()) { auto [luma_top, luma_bottom, chroma_top, chroma_bottom] = GetInterlacedOffsets(); auto frame_copy = frame; diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.cpp b/src/video_core/host1x/ffmpeg/ffmpeg.cpp index 536a01fcc8..bbbbe615ce 100644 --- a/src/video_core/host1x/ffmpeg/ffmpeg.cpp +++ b/src/video_core/host1x/ffmpeg/ffmpeg.cpp @@ -233,7 +233,7 @@ bool DecoderContext::OpenContext(const Decoder& decoder) { } bool DecoderContext::SendPacket(const Packet& packet) { - if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0 && ret != AVERROR_EOF) { + if (const int ret = avcodec_send_packet(m_codec_context, packet.GetPacket()); ret < 0 && ret != AVERROR_EOF && ret != AVERROR(EAGAIN)) { LOG_ERROR(HW_GPU, "avcodec_send_packet error: {}", AVError(ret)); return false; } @@ -242,31 +242,31 @@ bool DecoderContext::SendPacket(const Packet& packet) { } std::shared_ptr DecoderContext::ReceiveFrame() { - auto ReceiveImpl = [&](AVFrame* frame) -> bool { - if (const int ret = avcodec_receive_frame(m_codec_context, frame); ret < 0 && ret != AVERROR_EOF) { + auto ReceiveImpl = [&](AVFrame* frame) -> int { + const int ret = avcodec_receive_frame(m_codec_context, frame); + if (ret < 0 && ret != AVERROR_EOF && ret != AVERROR(EAGAIN)) { LOG_ERROR(HW_GPU, "avcodec_receive_frame error: {}", AVError(ret)); - return false; } - return true; + return ret; }; std::shared_ptr intermediate_frame = std::make_shared(); - if (!ReceiveImpl(intermediate_frame->GetFrame())) { + if (ReceiveImpl(intermediate_frame->GetFrame()) < 0) { return {}; } - m_temp_frame = std::make_shared(); + m_final_frame = std::make_shared(); if (m_codec_context->hw_device_ctx) { - m_temp_frame->SetFormat(PreferredGpuFormat); - if (int ret = av_hwframe_transfer_data(m_temp_frame->GetFrame(), intermediate_frame->GetFrame(), 0); ret < 0) { + m_final_frame->SetFormat(PreferredGpuFormat); + if (const int ret = av_hwframe_transfer_data(m_final_frame->GetFrame(), intermediate_frame->GetFrame(), 0); ret < 0) { LOG_ERROR(HW_GPU, "av_hwframe_transfer_data error: {}", AVError(ret)); return {}; } } else { - m_temp_frame = std::move(intermediate_frame); + m_final_frame = std::move(intermediate_frame); } - return std::move(m_temp_frame); + return std::move(m_final_frame); } void DecodeApi::Reset() { diff --git a/src/video_core/host1x/ffmpeg/ffmpeg.h b/src/video_core/host1x/ffmpeg/ffmpeg.h index 0dd7c7cb04..d60a8ac4a7 100644 --- a/src/video_core/host1x/ffmpeg/ffmpeg.h +++ b/src/video_core/host1x/ffmpeg/ffmpeg.h @@ -194,7 +194,7 @@ public: private: const Decoder& m_decoder; AVCodecContext* m_codec_context{}; - std::shared_ptr m_temp_frame{}; + std::shared_ptr m_final_frame{}; bool m_decode_order{}; }; diff --git a/src/video_core/host1x/vic.cpp b/src/video_core/host1x/vic.cpp index 9c33370337..3dbbfa5552 100644 --- a/src/video_core/host1x/vic.cpp +++ b/src/video_core/host1x/vic.cpp @@ -146,6 +146,11 @@ void Vic::Execute() { } auto frame = frame_queue.GetFrame(nvdec_id, luma_offset); + + if (!frame) { + continue; + } + if (!frame.get()) { LOG_ERROR(HW_GPU, "Vic {} failed to get frame with offset {:#X}", id, luma_offset); continue; From 24e6c62109cd30e09dd72a4af58f132dd7c0d359 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 2 Oct 2025 00:25:41 +0200 Subject: [PATCH 07/12] [vk, ogl] invalidate pipeline caches from <=0.0.3 (#2637) Invalidates caches before next upcoming release, this will make transitions smoother especially for users whom do not know how to clear caches. The reasoning behind this is the recent changes to async shaders and other pipeline stuffs that may break compat Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2637 Reviewed-by: MaranBr Reviewed-by: Maufeat Co-authored-by: lizzie Co-committed-by: lizzie --- src/video_core/renderer_opengl/gl_shader_cache.cpp | 2 +- src/video_core/renderer_vulkan/vk_pipeline_cache.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index c2ead26bd9..45f729698e 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -54,7 +54,7 @@ using VideoCommon::LoadPipelines; using VideoCommon::SerializePipeline; using Context = ShaderContext::Context; -constexpr u32 CACHE_VERSION = 10; +constexpr u32 CACHE_VERSION = 13; template auto MakeSpan(Container& container) { diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 2fd0b59b3a..9cdbe5611b 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -55,7 +55,7 @@ using VideoCommon::FileEnvironment; using VideoCommon::GenericEnvironment; using VideoCommon::GraphicsEnvironment; -constexpr u32 CACHE_VERSION = 12; +constexpr u32 CACHE_VERSION = 13; constexpr std::array VULKAN_CACHE_MAGIC_NUMBER{'y', 'u', 'z', 'u', 'v', 'k', 'c', 'h'}; template From 1a5b3fb23934cfb9647120fc2a055bc116e67460 Mon Sep 17 00:00:00 2001 From: MaranBr Date: Thu, 2 Oct 2025 01:30:05 +0200 Subject: [PATCH 08/12] [audio_core] Fix audio reverb effect (#2646) This fixes the audio reverb effect that was causing loud noise in some games and on some platforms. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2646 Co-authored-by: MaranBr Co-committed-by: MaranBr --- src/audio_core/renderer/behavior/info_updater.cpp | 12 ------------ src/audio_core/renderer/command/effect/reverb.cpp | 2 -- 2 files changed, 14 deletions(-) diff --git a/src/audio_core/renderer/behavior/info_updater.cpp b/src/audio_core/renderer/behavior/info_updater.cpp index 48fe1f8975..20f6cda3a2 100644 --- a/src/audio_core/renderer/behavior/info_updater.cpp +++ b/src/audio_core/renderer/behavior/info_updater.cpp @@ -165,12 +165,6 @@ Result InfoUpdater::UpdateEffectsVersion1(EffectContext& effect_context, const b reinterpret_cast(output), effect_count}; for (u32 i = 0; i < effect_count; i++) { -#ifdef _WIN32 - // There's a bug in Windows where using this effect causes extreme noise. So let's skip it. - if (in_params[i].type == EffectInfoBase::Type::Reverb) { - continue; - } -#endif auto effect_info{&effect_context.GetInfo(i)}; if (effect_info->GetType() != in_params[i].type) { effect_info->ForceUnmapBuffers(pool_mapper); @@ -218,12 +212,6 @@ Result InfoUpdater::UpdateEffectsVersion2(EffectContext& effect_context, const b reinterpret_cast(output), effect_count}; for (u32 i = 0; i < effect_count; i++) { -#ifdef _WIN32 - // There's a bug in Windows where using this effect causes extreme noise. So let's skip it. - if (in_params[i].type == EffectInfoBase::Type::Reverb) { - continue; - } -#endif auto effect_info{&effect_context.GetInfo(i)}; if (effect_info->GetType() != in_params[i].type) { effect_info->ForceUnmapBuffers(pool_mapper); diff --git a/src/audio_core/renderer/command/effect/reverb.cpp b/src/audio_core/renderer/command/effect/reverb.cpp index 46b85b8945..87eab1adda 100644 --- a/src/audio_core/renderer/command/effect/reverb.cpp +++ b/src/audio_core/renderer/command/effect/reverb.cpp @@ -191,8 +191,6 @@ static void InitializeReverbEffect(const ReverbInfo::ParameterVersion2& params, const auto center_delay_time{(5 * delay).to_uint_floor()}; state.center_delay_line.Initialize(center_delay_time, 1.0f); - UpdateReverbEffectParameter(params, state); - for (u32 i = 0; i < ReverbInfo::MaxDelayLines; i++) { std::ranges::fill(state.fdn_delay_lines[i].buffer, 0); std::ranges::fill(state.decay_delay_lines[i].buffer, 0); From 990a43a48c77816f0f9f5edad40ec905f05df62b Mon Sep 17 00:00:00 2001 From: Ribbit Date: Thu, 2 Oct 2025 20:00:34 +0200 Subject: [PATCH 09/12] [vk] Add missing flush per spec (#2624) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We copy pixels into a CPU-side staging buffer and then ask the GPU to read from it. On some systems those CPU writes aren’t automatically visible to the GPU unless explicitly flushed, so the GPU can sometimes read stale data. By calling buffer.Flush() immediately after writing, we force those CPU changes to become visible to the device, ensuring the GPU sees the latest frame. However, this is an emulator, so sometimes what spec says may not work cause reasons. Co-authored-by: Ribbit Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2624 Reviewed-by: Shinmegumi Reviewed-by: MaranBr Co-authored-by: Ribbit Co-committed-by: Ribbit --- src/video_core/renderer_vulkan/present/layer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/video_core/renderer_vulkan/present/layer.cpp b/src/video_core/renderer_vulkan/present/layer.cpp index fa7c457573..5676dfe62a 100644 --- a/src/video_core/renderer_vulkan/present/layer.cpp +++ b/src/video_core/renderer_vulkan/present/layer.cpp @@ -280,6 +280,7 @@ void Layer::UpdateRawImage(const Tegra::FramebufferConfig& framebuffer, size_t i Tegra::Texture::UnswizzleTexture( mapped_span.subspan(image_offset, linear_size), std::span(host_ptr, tiled_size), bytes_per_pixel, framebuffer.width, framebuffer.height, 1, block_height_log2, 0); + buffer.Flush(); // Ensure host writes are visible before the GPU copy. } const VkBufferImageCopy copy{ From 2d8cb2d4570b35985648079d2cfd69e96481ff2a Mon Sep 17 00:00:00 2001 From: MaranBr Date: Thu, 2 Oct 2025 22:48:52 +0200 Subject: [PATCH 10/12] [file_sys] Properly fix the installation of new updates (#2651) This removes the workaround and properly fix the installation of new updates. Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2651 Co-authored-by: MaranBr Co-committed-by: MaranBr --- src/core/file_sys/content_archive.cpp | 4 --- src/core/file_sys/fssystem/fs_types.h | 3 ++ .../fssystem_nca_file_system_driver.cpp | 34 ++++++++++++++----- 3 files changed, 29 insertions(+), 12 deletions(-) diff --git a/src/core/file_sys/content_archive.cpp b/src/core/file_sys/content_archive.cpp index edd25644ac..4e3313f83c 100644 --- a/src/core/file_sys/content_archive.cpp +++ b/src/core/file_sys/content_archive.cpp @@ -38,9 +38,6 @@ NCA::NCA(VirtualFile file_, const NCA* base_nca) reader = std::make_shared(); if (Result rc = reader->Initialize(file, GetCryptoConfiguration(), GetNcaCompressionConfiguration()); R_FAILED(rc)) { - if (rc != ResultInvalidNcaSignature) { - LOG_ERROR(Loader, "File reader errored out during header read: {:#x}", rc.GetInnerValue()); - } status = Loader::ResultStatus::ErrorBadNCAHeader; return; } @@ -85,7 +82,6 @@ NCA::NCA(VirtualFile file_, const NCA* base_nca) for (s32 i = 0; i < fs_count; i++) { NcaFsHeaderReader header_reader; if (Result rc = fs.OpenStorage(&filesystems[i], &header_reader, i); R_FAILED(rc)) { - LOG_DEBUG(Loader, "File reader errored out during read of section {}: {:#x}", i, rc.GetInnerValue()); status = Loader::ResultStatus::ErrorBadNCAHeader; return; } diff --git a/src/core/file_sys/fssystem/fs_types.h b/src/core/file_sys/fssystem/fs_types.h index 43aeaf447b..f11b7f1dae 100644 --- a/src/core/file_sys/fssystem/fs_types.h +++ b/src/core/file_sys/fssystem/fs_types.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later diff --git a/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.cpp b/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.cpp index 25036b02c1..d496d58cec 100644 --- a/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.cpp +++ b/src/core/file_sys/fssystem/fssystem_nca_file_system_driver.cpp @@ -1049,13 +1049,11 @@ Result NcaFileSystemDriver::CreatePatchMetaStorage( ASSERT(out_aes_ctr_ex_meta != nullptr); ASSERT(out_indirect_meta != nullptr); ASSERT(base_storage != nullptr); - //ASSERT(patch_info.HasAesCtrExTable()); - //ASSERT(patch_info.HasIndirectTable()); ASSERT(Common::IsAligned(patch_info.aes_ctr_ex_size, NcaHeader::XtsBlockSize)); // Validate patch info extents. - R_UNLESS(patch_info.indirect_size > 0, ResultInvalidNcaPatchInfoIndirectSize); - R_UNLESS(patch_info.aes_ctr_ex_size >= 0, ResultInvalidNcaPatchInfoAesCtrExSize); + R_UNLESS(patch_info.aes_ctr_ex_size >= 0 && patch_info.HasAesCtrExTable(), ResultInvalidNcaPatchInfoAesCtrExSize); + R_UNLESS(patch_info.indirect_size > 0 && patch_info.HasIndirectTable(), ResultInvalidNcaPatchInfoIndirectSize); R_UNLESS(patch_info.indirect_size + patch_info.indirect_offset <= patch_info.aes_ctr_ex_offset, ResultInvalidNcaPatchInfoAesCtrExOffset); R_UNLESS(patch_info.aes_ctr_ex_offset + patch_info.aes_ctr_ex_size <= @@ -1333,10 +1331,30 @@ Result NcaFileSystemDriver::CreateIntegrityVerificationStorageImpl( R_UNLESS(last_layer_info_offset + layer_info.size <= layer_info_offset, ResultRomNcaInvalidIntegrityLayerInfoOffset); } - storage_info[level_hash_info.max_layers - 1] - = std::make_shared(std::move(base_storage), - layer_info.size, - last_layer_info_offset); + + switch (level_hash_info.max_layers - 1) { + case FileSys::HierarchicalIntegrityVerificationStorage::HierarchicalStorageInformation::MasterStorage: + storage_info.SetMasterHashStorage(std::make_shared(std::move(base_storage), layer_info.size, last_layer_info_offset)); + break; + case FileSys::HierarchicalIntegrityVerificationStorage::HierarchicalStorageInformation::Layer1Storage: + storage_info.SetLayer1HashStorage(std::make_shared(std::move(base_storage), layer_info.size, last_layer_info_offset)); + break; + case FileSys::HierarchicalIntegrityVerificationStorage::HierarchicalStorageInformation::Layer2Storage: + storage_info.SetLayer2HashStorage(std::make_shared(std::move(base_storage), layer_info.size, last_layer_info_offset)); + break; + case FileSys::HierarchicalIntegrityVerificationStorage::HierarchicalStorageInformation::Layer3Storage: + storage_info.SetLayer3HashStorage(std::make_shared(std::move(base_storage), layer_info.size, last_layer_info_offset)); + break; + case FileSys::HierarchicalIntegrityVerificationStorage::HierarchicalStorageInformation::Layer4Storage: + storage_info.SetLayer4HashStorage(std::make_shared(std::move(base_storage), layer_info.size, last_layer_info_offset)); + break; + case FileSys::HierarchicalIntegrityVerificationStorage::HierarchicalStorageInformation::Layer5Storage: + storage_info.SetLayer5HashStorage(std::make_shared(std::move(base_storage), layer_info.size, last_layer_info_offset)); + break; + case FileSys::HierarchicalIntegrityVerificationStorage::HierarchicalStorageInformation::DataStorage: + storage_info.SetDataStorage(std::make_shared(std::move(base_storage), layer_info.size, last_layer_info_offset)); + break; + } // Make the integrity romfs storage. auto integrity_storage = std::make_shared(); From de594c8792622caea4400c49df2cadac8f8b1143 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 2 Oct 2025 23:20:45 +0200 Subject: [PATCH 11/12] [dynarmic] add safe-opt to skip IR verification (#2613) Most programs are well behaved and don't cause internal IR issues. Hence, verification can be safely skipped. Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2613 Reviewed-by: MaranBr Co-authored-by: lizzie Co-committed-by: lizzie --- src/dynarmic/src/dynarmic/interface/optimization_flags.h | 5 +++++ src/dynarmic/src/dynarmic/ir/opt_passes.cpp | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/dynarmic/src/dynarmic/interface/optimization_flags.h b/src/dynarmic/src/dynarmic/interface/optimization_flags.h index 743d902767..9e58197b47 100644 --- a/src/dynarmic/src/dynarmic/interface/optimization_flags.h +++ b/src/dynarmic/src/dynarmic/interface/optimization_flags.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2020 MerryMage * SPDX-License-Identifier: 0BSD @@ -34,6 +37,8 @@ enum class OptimizationFlag : std::uint32_t { MiscIROpt = 0x00000020, /// Optimize for code speed rather than for code size (this serves well for tight loops) CodeSpeed = 0x00000040, + /// Disable verification passes + DisableVerification = 0x00000080, /// This is an UNSAFE optimization that reduces accuracy of fused multiply-add operations. /// This unfuses fused instructions to improve performance on host CPUs without FMA support. diff --git a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp index 844e29023c..e9175f0e6b 100644 --- a/src/dynarmic/src/dynarmic/ir/opt_passes.cpp +++ b/src/dynarmic/src/dynarmic/ir/opt_passes.cpp @@ -1491,9 +1491,9 @@ void Optimize(IR::Block& block, const A32::UserConfig& conf, const Optimization: Optimization::DeadCodeElimination(block); } Optimization::IdentityRemovalPass(block); - //if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { Optimization::VerificationPass(block); - //} + } } void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization::PolyfillOptions& polyfill_options) { @@ -1511,9 +1511,9 @@ void Optimize(IR::Block& block, const A64::UserConfig& conf, const Optimization: if (conf.HasOptimization(OptimizationFlag::MiscIROpt)) [[likely]] { Optimization::A64MergeInterpretBlocksPass(block, conf.callbacks); } - //if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { + if (!conf.HasOptimization(OptimizationFlag::DisableVerification)) { Optimization::VerificationPass(block); - //} + } } } // namespace Dynarmic::Optimization From d1bac2308f1d854df0dec83c810be647c692ab5c Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 29 Sep 2025 08:30:12 +0000 Subject: [PATCH 12/12] [dynarmic] backport WAITPKG based spinlocks Signed-off-by: lizzie --- .../dynarmic/backend/x64/block_of_code.cpp | 2 + .../backend/x64/emit_x64_memory.cpp.inc | 3 +- .../dynarmic/backend/x64/emit_x64_memory.h | 5 ++- .../src/dynarmic/backend/x64/host_feature.h | 3 +- .../src/dynarmic/common/spin_lock_x64.cpp | 37 +++++++++++++++++-- .../src/dynarmic/common/spin_lock_x64.h | 5 ++- 6 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp index d5d5f089ff..4a8de6475e 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp +++ b/src/dynarmic/src/dynarmic/backend/x64/block_of_code.cpp @@ -188,6 +188,8 @@ HostFeature GetHostFeatures() { features |= HostFeature::LZCNT; if (cpu_info.has(Cpu::tGFNI)) features |= HostFeature::GFNI; + if (cpu_info.has(Cpu::tWAITPKG)) + features |= HostFeature::WAITPKG; if (cpu_info.has(Cpu::tBMI2)) { // BMI2 instructions such as pdep and pext have been very slow up until Zen 3. diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc index 34f77b0446..36a2d40de6 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.cpp.inc @@ -430,10 +430,11 @@ void AxxEmitX64::EmitExclusiveWriteMemoryInline(AxxEmitContext& ctx, IR::Inst* i const Xbyak::Reg64 vaddr = ctx.reg_alloc.UseGpr(args[1]); const Xbyak::Reg32 status = ctx.reg_alloc.ScratchGpr().cvt32(); const Xbyak::Reg64 tmp = ctx.reg_alloc.ScratchGpr(); + const Xbyak::Reg64 tmp2 = ctx.reg_alloc.ScratchGpr(); const auto wrapped_fn = exclusive_write_fallbacks[std::make_tuple(ordered, bitsize, vaddr.getIdx(), value.getIdx())]; - EmitExclusiveLock(code, conf, tmp, eax); + EmitExclusiveLock(code, conf, tmp, tmp2); SharedLabel end = GenSharedLabel(); diff --git a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h index 75a47c6a80..c363ea1b6b 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h +++ b/src/dynarmic/src/dynarmic/backend/x64/emit_x64_memory.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -343,7 +346,7 @@ void EmitExclusiveLock(BlockOfCode& code, const UserConfig& conf, Xbyak::Reg64 p } code.mov(pointer, mcl::bit_cast(GetExclusiveMonitorLockPointer(conf.global_monitor))); - EmitSpinLockLock(code, pointer, tmp); + EmitSpinLockLock(code, pointer, tmp, code.HasHostFeature(HostFeature::WAITPKG)); } template diff --git a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h index 7246ed18d4..34dca971cb 100644 --- a/src/dynarmic/src/dynarmic/backend/x64/host_feature.h +++ b/src/dynarmic/src/dynarmic/backend/x64/host_feature.h @@ -35,9 +35,10 @@ enum class HostFeature : u64 { BMI2 = 1ULL << 19, LZCNT = 1ULL << 20, GFNI = 1ULL << 21, + WAITPKG = 1ULL << 22, // Zen-based BMI2 - FastBMI2 = 1ULL << 22, + FastBMI2 = 1ULL << 23, // Orthographic AVX512 features on 128 and 256 vectors AVX512_Ortho = AVX512F | AVX512VL, diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp index da50179de9..5307672bfe 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.cpp @@ -22,17 +22,46 @@ static const auto default_cg_mode = nullptr; //Allow RWE namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg) { + // TODO: this is because we lack regalloc - so better to be safe :( + if (waitpkg) { + code.push(Xbyak::util::eax); + code.push(Xbyak::util::ebx); + code.push(Xbyak::util::edx); + } Xbyak::Label start, loop; - code.jmp(start, code.T_NEAR); code.L(loop); - code.pause(); + if (waitpkg) { + // TODO: This clobbers EAX and EDX did we tell the regalloc? + // ARM ptr for address-monitoring + code.umonitor(ptr); + // tmp.bit[0] = 0: C0.1 | Slow Wakup | Better Savings + // tmp.bit[0] = 1: C0.2 | Fast Wakup | Lesser Savings + // edx:eax is implicitly used as a 64-bit deadline timestamp + // Use the maximum so that we use the operating system's maximum + // allowed wait time within the IA32_UMWAIT_CONTROL register + // Enter power state designated by tmp and wait for a write to lock_ptr + code.mov(Xbyak::util::eax, 0xFFFFFFFF); + code.mov(Xbyak::util::edx, Xbyak::util::eax); + // TODO: We can only be here because tmp is 1 already - however we repeatedly overwrite it... + code.mov(Xbyak::util::ebx, 1); + code.umwait(Xbyak::util::ebx); + // CF == 1 if we hit the OS-timeout in IA32_UMWAIT_CONTROL without a write + // CF == 0 if we exited the wait for any other reason + } else { + code.pause(); + } code.L(start); code.mov(tmp, 1); /*code.lock();*/ code.xchg(code.dword[ptr], tmp); code.test(tmp, tmp); code.jnz(loop, code.T_NEAR); + if (waitpkg) { + code.pop(Xbyak::util::edx); + code.pop(Xbyak::util::ebx); + code.pop(Xbyak::util::eax); + } } void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp) { @@ -60,7 +89,7 @@ void SpinLockImpl::Initialize() { code.align(); lock = code.getCurr(); - EmitSpinLockLock(code, ABI_PARAM1, code.eax); + EmitSpinLockLock(code, ABI_PARAM1, code.eax, false); code.ret(); code.align(); diff --git a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h index df6a3d7407..df6860e2f2 100644 --- a/src/dynarmic/src/dynarmic/common/spin_lock_x64.h +++ b/src/dynarmic/src/dynarmic/common/spin_lock_x64.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + /* This file is part of the dynarmic project. * Copyright (c) 2022 MerryMage * SPDX-License-Identifier: 0BSD @@ -9,7 +12,7 @@ namespace Dynarmic { -void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); +void EmitSpinLockLock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp, bool waitpkg); void EmitSpinLockUnlock(Xbyak::CodeGenerator& code, Xbyak::Reg64 ptr, Xbyak::Reg32 tmp); } // namespace Dynarmic