diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
index 92a49a1de7..ec2984e434 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/BooleanSetting.kt
@@ -17,6 +17,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
RENDERER_USE_SPEED_LIMIT("use_speed_limit"),
USE_FAST_CPU_TIME("use_fast_cpu_time"),
USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"),
+ FAST_GPU_PATH("fast_gpu_path"),
SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"),
USE_DOCKED_MODE("use_docked_mode"),
USE_AUTO_STUB("use_auto_stub"),
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
index d4335ddcd8..589efd5c58 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/model/view/SettingsItem.kt
@@ -652,6 +652,13 @@ abstract class SettingsItem(
max = 65535
)
)
+ put(
+ SwitchSetting(
+ BooleanSetting.FAST_GPU_PATH,
+ titleId = R.string.fast_gpu_path,
+ descriptionId = R.string.fast_gpu_path_description
+ )
+ )
put(
SwitchSetting(
BooleanSetting.SKIP_CPU_INNER_INVALIDATION,
diff --git a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
index 8555b334ee..adabf67744 100644
--- a/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
+++ b/src/android/app/src/main/java/org/yuzu/yuzu_emu/features/settings/ui/SettingsFragmentPresenter.kt
@@ -464,6 +464,7 @@ class SettingsFragmentPresenter(
add(IntSetting.FAST_CPU_TIME.key)
add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key)
add(IntSetting.CPU_TICKS.key)
+ add(BooleanSetting.FAST_GPU_PATH.key)
add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key)
add(BooleanSetting.USE_LRU_CACHE.key)
add(BooleanSetting.CORE_SYNC_CORE_SPEED.key)
diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml
index c78487e327..a73f0a1a15 100644
--- a/src/android/app/src/main/res/values/strings.xml
+++ b/src/android/app/src/main/res/values/strings.xml
@@ -101,6 +101,8 @@
Custom CPU Ticks
Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 77–21000 is recommended.
Ticks
+ Fast GPU Path
+ Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving the performance. This may cause glitches or crashes on some games.
Skip CPU Inner Invalidation
Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games.
CPU Clock
diff --git a/src/common/settings.h b/src/common/settings.h
index e3c2bd57cc..9ac06e526e 100644
--- a/src/common/settings.h
+++ b/src/common/settings.h
@@ -450,6 +450,13 @@ struct Values {
VramUsageMode::Aggressive,
"vram_usage_mode",
Category::RendererAdvanced};
+ SwitchableSetting fast_gpu_path{linkage,
+ false,
+ "fast_gpu_path",
+ Category::RendererAdvanced,
+ Specialization::Default,
+ true,
+ true};
SwitchableSetting skip_cpu_inner_invalidation{linkage,
true,
"skip_cpu_inner_invalidation",
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 7c34005a12..59d26dba6e 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@@ -40,7 +43,8 @@ struct GPU::Impl {
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
: gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
shader_notify{std::make_unique()}, is_async{is_async_},
- gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)} {}
+ gpu_thread{system_, is_async_}, scheduler{std::make_unique(gpu)},
+ fast_path{Settings::values.fast_gpu_path.GetValue()} {}
~Impl() = default;
@@ -96,6 +100,7 @@ struct GPU::Impl {
/// Synchronizes CPU writes with Host GPU memory.
void InvalidateGPUCache() {
+ if (fast_path) return;
std::function callback_writes(
[this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
system.GatherGPUDirtyMemory(callback_writes);
@@ -110,6 +115,16 @@ struct GPU::Impl {
/// Request a host GPU memory flush from the CPU.
template
[[nodiscard]] u64 RequestSyncOperation(Func&& action) {
+ if (fast_path) {
+ // Execute immediately and publish the result
+ action();
+ gpu_thread.TickGPU();
+ const u64 fence = ++last_sync_fence;
+ // Mirror the normal path: advance current and wake any waiters
+ current_sync_fence.store(fence, std::memory_order_release);
+ sync_request_cv.notify_all();
+ return fence;
+ }
std::unique_lock lck{sync_request_mutex};
const u64 fence = ++last_sync_fence;
sync_requests.emplace_back(action);
@@ -122,12 +137,27 @@ struct GPU::Impl {
}
void WaitForSyncOperation(const u64 fence) {
+ if (fast_path) {
+ // Don’t block when the hack is on
+ return;
+ }
std::unique_lock lck{sync_request_mutex};
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
}
/// Tick pending requests within the GPU.
void TickWork() {
+ if (fast_path) {
+ // Drain queue without waiting on condition variables
+ while (!sync_requests.empty()) {
+ auto request = std::move(sync_requests.front());
+ sync_requests.pop_front();
+ request();
+ current_sync_fence.fetch_add(1, std::memory_order_release);
+ }
+ return;
+ }
+
std::unique_lock lck{sync_request_mutex};
while (!sync_requests.empty()) {
auto request = std::move(sync_requests.front());
@@ -259,6 +289,15 @@ struct GPU::Impl {
}
VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
+ if (fast_path) {
+ // Bypass fence/tick entirely
+ auto raster_area = rasterizer->GetFlushArea(addr, size);
+ rasterizer->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
+ raster_area.preemtive = true;
+ // Give GPU thread a chance to run that flush
+ gpu_thread.TickGPU();
+ return raster_area;
+ }
auto raster_area = rasterizer->GetFlushArea(addr, size);
if (raster_area.preemtive) {
return raster_area;
@@ -372,6 +411,9 @@ struct GPU::Impl {
std::unique_ptr cpu_context;
std::unique_ptr scheduler;
+
+ const bool fast_path;
+
std::unordered_map> channels;
Tegra::Control::ChannelState* current_channel;
s32 bound_channel{-1};
diff --git a/src/yuzu/configuration/shared_translation.cpp b/src/yuzu/configuration/shared_translation.cpp
index 770a16a481..9af0b71210 100644
--- a/src/yuzu/configuration/shared_translation.cpp
+++ b/src/yuzu/configuration/shared_translation.cpp
@@ -250,6 +250,11 @@ std::unique_ptr InitializeTranslations(QWidget* parent)
"of available video memory for performance. Has no effect on integrated graphics. "
"Aggressive mode may severely impact the performance of other applications such as "
"recording software."));
+ INSERT(Settings,
+ fast_gpu_path,
+ tr("Fast GPU Path"),
+ tr("Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving "
+ "the performance. This may cause glitches or crashes on some games."));
INSERT(Settings,
skip_cpu_inner_invalidation,
tr("Skip CPU Inner Invalidation"),