Compare commits
5 commits
master
...
fast-gpu-p
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c0a1818bbf | ||
![]() |
eed010964e | ||
![]() |
6103d2b7f4 | ||
![]() |
50f6bfb6bd | ||
![]() |
435483f7c1 |
7 changed files with 66 additions and 1 deletions
|
@ -17,6 +17,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
|
||||||
RENDERER_USE_SPEED_LIMIT("use_speed_limit"),
|
RENDERER_USE_SPEED_LIMIT("use_speed_limit"),
|
||||||
USE_FAST_CPU_TIME("use_fast_cpu_time"),
|
USE_FAST_CPU_TIME("use_fast_cpu_time"),
|
||||||
USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"),
|
USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"),
|
||||||
|
FAST_GPU_PATH("fast_gpu_path"),
|
||||||
SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"),
|
SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"),
|
||||||
USE_DOCKED_MODE("use_docked_mode"),
|
USE_DOCKED_MODE("use_docked_mode"),
|
||||||
USE_AUTO_STUB("use_auto_stub"),
|
USE_AUTO_STUB("use_auto_stub"),
|
||||||
|
|
|
@ -652,6 +652,13 @@ abstract class SettingsItem(
|
||||||
max = 65535
|
max = 65535
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
put(
|
||||||
|
SwitchSetting(
|
||||||
|
BooleanSetting.FAST_GPU_PATH,
|
||||||
|
titleId = R.string.fast_gpu_path,
|
||||||
|
descriptionId = R.string.fast_gpu_path_description
|
||||||
|
)
|
||||||
|
)
|
||||||
put(
|
put(
|
||||||
SwitchSetting(
|
SwitchSetting(
|
||||||
BooleanSetting.SKIP_CPU_INNER_INVALIDATION,
|
BooleanSetting.SKIP_CPU_INNER_INVALIDATION,
|
||||||
|
|
|
@ -464,6 +464,7 @@ class SettingsFragmentPresenter(
|
||||||
add(IntSetting.FAST_CPU_TIME.key)
|
add(IntSetting.FAST_CPU_TIME.key)
|
||||||
add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key)
|
add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key)
|
||||||
add(IntSetting.CPU_TICKS.key)
|
add(IntSetting.CPU_TICKS.key)
|
||||||
|
add(BooleanSetting.FAST_GPU_PATH.key)
|
||||||
add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key)
|
add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key)
|
||||||
add(BooleanSetting.USE_LRU_CACHE.key)
|
add(BooleanSetting.USE_LRU_CACHE.key)
|
||||||
add(BooleanSetting.CORE_SYNC_CORE_SPEED.key)
|
add(BooleanSetting.CORE_SYNC_CORE_SPEED.key)
|
||||||
|
|
|
@ -101,6 +101,8 @@
|
||||||
<string name="custom_cpu_ticks">Custom CPU Ticks</string>
|
<string name="custom_cpu_ticks">Custom CPU Ticks</string>
|
||||||
<string name="custom_cpu_ticks_description">Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 77–21000 is recommended.</string>
|
<string name="custom_cpu_ticks_description">Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 77–21000 is recommended.</string>
|
||||||
<string name="cpu_ticks">Ticks</string>
|
<string name="cpu_ticks">Ticks</string>
|
||||||
|
<string name="fast_gpu_path">Fast GPU Path</string>
|
||||||
|
<string name="fast_gpu_path_description">Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving the performance. This may cause glitches or crashes on some games.</string>
|
||||||
<string name="skip_cpu_inner_invalidation">Skip CPU Inner Invalidation</string>
|
<string name="skip_cpu_inner_invalidation">Skip CPU Inner Invalidation</string>
|
||||||
<string name="skip_cpu_inner_invalidation_description">Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games.</string>
|
<string name="skip_cpu_inner_invalidation_description">Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games.</string>
|
||||||
<string name="fast_cpu_time">CPU Clock</string>
|
<string name="fast_cpu_time">CPU Clock</string>
|
||||||
|
|
|
@ -450,6 +450,13 @@ struct Values {
|
||||||
VramUsageMode::Aggressive,
|
VramUsageMode::Aggressive,
|
||||||
"vram_usage_mode",
|
"vram_usage_mode",
|
||||||
Category::RendererAdvanced};
|
Category::RendererAdvanced};
|
||||||
|
SwitchableSetting<bool> fast_gpu_path{linkage,
|
||||||
|
false,
|
||||||
|
"fast_gpu_path",
|
||||||
|
Category::RendererAdvanced,
|
||||||
|
Specialization::Default,
|
||||||
|
true,
|
||||||
|
true};
|
||||||
SwitchableSetting<bool> skip_cpu_inner_invalidation{linkage,
|
SwitchableSetting<bool> skip_cpu_inner_invalidation{linkage,
|
||||||
true,
|
true,
|
||||||
"skip_cpu_inner_invalidation",
|
"skip_cpu_inner_invalidation",
|
||||||
|
|
|
@ -1,3 +1,6 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||||
|
|
||||||
|
@ -40,7 +43,8 @@ struct GPU::Impl {
|
||||||
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
|
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
|
||||||
: gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
|
: gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
|
||||||
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
|
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
|
||||||
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
|
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)},
|
||||||
|
fast_path{Settings::values.fast_gpu_path.GetValue()} {}
|
||||||
|
|
||||||
~Impl() = default;
|
~Impl() = default;
|
||||||
|
|
||||||
|
@ -96,6 +100,7 @@ struct GPU::Impl {
|
||||||
|
|
||||||
/// Synchronizes CPU writes with Host GPU memory.
|
/// Synchronizes CPU writes with Host GPU memory.
|
||||||
void InvalidateGPUCache() {
|
void InvalidateGPUCache() {
|
||||||
|
if (fast_path) return;
|
||||||
std::function<void(PAddr, size_t)> callback_writes(
|
std::function<void(PAddr, size_t)> callback_writes(
|
||||||
[this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
|
[this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
|
||||||
system.GatherGPUDirtyMemory(callback_writes);
|
system.GatherGPUDirtyMemory(callback_writes);
|
||||||
|
@ -110,6 +115,16 @@ struct GPU::Impl {
|
||||||
/// Request a host GPU memory flush from the CPU.
|
/// Request a host GPU memory flush from the CPU.
|
||||||
template <typename Func>
|
template <typename Func>
|
||||||
[[nodiscard]] u64 RequestSyncOperation(Func&& action) {
|
[[nodiscard]] u64 RequestSyncOperation(Func&& action) {
|
||||||
|
if (fast_path) {
|
||||||
|
// Execute immediately and publish the result
|
||||||
|
action();
|
||||||
|
gpu_thread.TickGPU();
|
||||||
|
const u64 fence = ++last_sync_fence;
|
||||||
|
// Mirror the normal path: advance current and wake any waiters
|
||||||
|
current_sync_fence.store(fence, std::memory_order_release);
|
||||||
|
sync_request_cv.notify_all();
|
||||||
|
return fence;
|
||||||
|
}
|
||||||
std::unique_lock lck{sync_request_mutex};
|
std::unique_lock lck{sync_request_mutex};
|
||||||
const u64 fence = ++last_sync_fence;
|
const u64 fence = ++last_sync_fence;
|
||||||
sync_requests.emplace_back(action);
|
sync_requests.emplace_back(action);
|
||||||
|
@ -122,12 +137,27 @@ struct GPU::Impl {
|
||||||
}
|
}
|
||||||
|
|
||||||
void WaitForSyncOperation(const u64 fence) {
|
void WaitForSyncOperation(const u64 fence) {
|
||||||
|
if (fast_path) {
|
||||||
|
// Don’t block when the hack is on
|
||||||
|
return;
|
||||||
|
}
|
||||||
std::unique_lock lck{sync_request_mutex};
|
std::unique_lock lck{sync_request_mutex};
|
||||||
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
|
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Tick pending requests within the GPU.
|
/// Tick pending requests within the GPU.
|
||||||
void TickWork() {
|
void TickWork() {
|
||||||
|
if (fast_path) {
|
||||||
|
// Drain queue without waiting on condition variables
|
||||||
|
while (!sync_requests.empty()) {
|
||||||
|
auto request = std::move(sync_requests.front());
|
||||||
|
sync_requests.pop_front();
|
||||||
|
request();
|
||||||
|
current_sync_fence.fetch_add(1, std::memory_order_release);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
std::unique_lock lck{sync_request_mutex};
|
std::unique_lock lck{sync_request_mutex};
|
||||||
while (!sync_requests.empty()) {
|
while (!sync_requests.empty()) {
|
||||||
auto request = std::move(sync_requests.front());
|
auto request = std::move(sync_requests.front());
|
||||||
|
@ -259,6 +289,15 @@ struct GPU::Impl {
|
||||||
}
|
}
|
||||||
|
|
||||||
VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
|
VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
|
||||||
|
if (fast_path) {
|
||||||
|
// Bypass fence/tick entirely
|
||||||
|
auto raster_area = rasterizer->GetFlushArea(addr, size);
|
||||||
|
rasterizer->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
|
||||||
|
raster_area.preemtive = true;
|
||||||
|
// Give GPU thread a chance to run that flush
|
||||||
|
gpu_thread.TickGPU();
|
||||||
|
return raster_area;
|
||||||
|
}
|
||||||
auto raster_area = rasterizer->GetFlushArea(addr, size);
|
auto raster_area = rasterizer->GetFlushArea(addr, size);
|
||||||
if (raster_area.preemtive) {
|
if (raster_area.preemtive) {
|
||||||
return raster_area;
|
return raster_area;
|
||||||
|
@ -372,6 +411,9 @@ struct GPU::Impl {
|
||||||
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
|
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
|
||||||
|
|
||||||
std::unique_ptr<Tegra::Control::Scheduler> scheduler;
|
std::unique_ptr<Tegra::Control::Scheduler> scheduler;
|
||||||
|
|
||||||
|
const bool fast_path;
|
||||||
|
|
||||||
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
|
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
|
||||||
Tegra::Control::ChannelState* current_channel;
|
Tegra::Control::ChannelState* current_channel;
|
||||||
s32 bound_channel{-1};
|
s32 bound_channel{-1};
|
||||||
|
|
|
@ -250,6 +250,11 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent)
|
||||||
"of available video memory for performance. Has no effect on integrated graphics. "
|
"of available video memory for performance. Has no effect on integrated graphics. "
|
||||||
"Aggressive mode may severely impact the performance of other applications such as "
|
"Aggressive mode may severely impact the performance of other applications such as "
|
||||||
"recording software."));
|
"recording software."));
|
||||||
|
INSERT(Settings,
|
||||||
|
fast_gpu_path,
|
||||||
|
tr("Fast GPU Path"),
|
||||||
|
tr("Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving "
|
||||||
|
"the performance. This may cause glitches or crashes on some games."));
|
||||||
INSERT(Settings,
|
INSERT(Settings,
|
||||||
skip_cpu_inner_invalidation,
|
skip_cpu_inner_invalidation,
|
||||||
tr("Skip CPU Inner Invalidation"),
|
tr("Skip CPU Inner Invalidation"),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue