diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt index 92a49a1de7..ec2984e434 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt @@ -17,6 +17,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting { RENDERER_USE_SPEED_LIMIT("use_speed_limit"), USE_FAST_CPU_TIME("use_fast_cpu_time"), USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"), + FAST_GPU_PATH("fast_gpu_path"), SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"), USE_DOCKED_MODE("use_docked_mode"), USE_AUTO_STUB("use_auto_stub"), diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt index d4335ddcd8..589efd5c58 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt @@ -652,6 +652,13 @@ abstract class SettingsItem( max = 65535 ) ) + put( + SwitchSetting( + BooleanSetting.FAST_GPU_PATH, + titleId = R.string.fast_gpu_path, + descriptionId = R.string.fast_gpu_path_description + ) + ) put( SwitchSetting( BooleanSetting.SKIP_CPU_INNER_INVALIDATION, diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt index 8555b334ee..adabf67744 100644 --- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt +++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt @@ -464,6 +464,7 @@ class SettingsFragmentPresenter( add(IntSetting.FAST_CPU_TIME.key) add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key) add(IntSetting.CPU_TICKS.key) + add(BooleanSetting.FAST_GPU_PATH.key) add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key) add(BooleanSetting.USE_LRU_CACHE.key) add(BooleanSetting.CORE_SYNC_CORE_SPEED.key) diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index c78487e327..a73f0a1a15 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -101,6 +101,8 @@ Custom CPU Ticks Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 77–21000 is recommended. Ticks + Fast GPU Path + Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving the performance. This may cause glitches or crashes on some games. Skip CPU Inner Invalidation Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games. CPU Clock diff --git a/src/common/settings.h b/src/common/settings.h index e3c2bd57cc..9ac06e526e 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -450,6 +450,13 @@ struct Values { VramUsageMode::Aggressive, "vram_usage_mode", Category::RendererAdvanced}; + SwitchableSetting fast_gpu_path{linkage, + false, + "fast_gpu_path", + Category::RendererAdvanced, + Specialization::Default, + true, + true}; SwitchableSetting skip_cpu_inner_invalidation{linkage, true, "skip_cpu_inner_invalidation", diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 7c34005a12..59d26dba6e 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -40,7 +43,8 @@ struct GPU::Impl { explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_) : gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_}, shader_notify{std::make_unique()}, is_async{is_async_}, - gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {} + gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)}, + fast_path{Settings::values.fast_gpu_path.GetValue()} {} ~Impl() = default; @@ -96,6 +100,7 @@ struct GPU::Impl { /// Synchronizes CPU writes with Host GPU memory. void InvalidateGPUCache() { + if (fast_path) return; std::function callback_writes( [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); system.GatherGPUDirtyMemory(callback_writes); @@ -110,6 +115,16 @@ struct GPU::Impl { /// Request a host GPU memory flush from the CPU. template [[nodiscard]] u64 RequestSyncOperation(Func&& action) { + if (fast_path) { + // Execute immediately and publish the result + action(); + gpu_thread.TickGPU(); + const u64 fence = ++last_sync_fence; + // Mirror the normal path: advance current and wake any waiters + current_sync_fence.store(fence, std::memory_order_release); + sync_request_cv.notify_all(); + return fence; + } std::unique_lock lck{sync_request_mutex}; const u64 fence = ++last_sync_fence; sync_requests.emplace_back(action); @@ -122,12 +137,27 @@ struct GPU::Impl { } void WaitForSyncOperation(const u64 fence) { + if (fast_path) { + // Don’t block when the hack is on + return; + } std::unique_lock lck{sync_request_mutex}; sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; }); } /// Tick pending requests within the GPU. void TickWork() { + if (fast_path) { + // Drain queue without waiting on condition variables + while (!sync_requests.empty()) { + auto request = std::move(sync_requests.front()); + sync_requests.pop_front(); + request(); + current_sync_fence.fetch_add(1, std::memory_order_release); + } + return; + } + std::unique_lock lck{sync_request_mutex}; while (!sync_requests.empty()) { auto request = std::move(sync_requests.front()); @@ -259,6 +289,15 @@ struct GPU::Impl { } VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { + if (fast_path) { + // Bypass fence/tick entirely + auto raster_area = rasterizer->GetFlushArea(addr, size); + rasterizer->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address); + raster_area.preemtive = true; + // Give GPU thread a chance to run that flush + gpu_thread.TickGPU(); + return raster_area; + } auto raster_area = rasterizer->GetFlushArea(addr, size); if (raster_area.preemtive) { return raster_area; @@ -372,6 +411,9 @@ struct GPU::Impl { std::unique_ptr cpu_context; std::unique_ptr scheduler; + + const bool fast_path; + std::unordered_map> channels; Tegra::Control::ChannelState* current_channel; s32 bound_channel{-1}; diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp index 770a16a481..9af0b71210 100644 --- a/src/yuzu/configuration/shared_translation.cpp +++ b/src/yuzu/configuration/shared_translation.cpp @@ -250,6 +250,11 @@ std::unique_ptr InitializeTranslations(QWidget* parent) "of available video memory for performance. Has no effect on integrated graphics. " "Aggressive mode may severely impact the performance of other applications such as " "recording software.")); + INSERT(Settings, + fast_gpu_path, + tr("Fast GPU Path"), + tr("Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving " + "the performance. This may cause glitches or crashes on some games.")); INSERT(Settings, skip_cpu_inner_invalidation, tr("Skip CPU Inner Invalidation"),