Compare commits
5 commits
master
...
fast-gpu-p
Author | SHA1 | Date | |
---|---|---|---|
![]() |
c0a1818bbf | ||
![]() |
eed010964e | ||
![]() |
6103d2b7f4 | ||
![]() |
50f6bfb6bd | ||
![]() |
435483f7c1 |
7 changed files with 66 additions and 1 deletions
|
@ -17,6 +17,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
|
|||
RENDERER_USE_SPEED_LIMIT("use_speed_limit"),
|
||||
USE_FAST_CPU_TIME("use_fast_cpu_time"),
|
||||
USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"),
|
||||
FAST_GPU_PATH("fast_gpu_path"),
|
||||
SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"),
|
||||
USE_DOCKED_MODE("use_docked_mode"),
|
||||
USE_AUTO_STUB("use_auto_stub"),
|
||||
|
|
|
@ -652,6 +652,13 @@ abstract class SettingsItem(
|
|||
max = 65535
|
||||
)
|
||||
)
|
||||
put(
|
||||
SwitchSetting(
|
||||
BooleanSetting.FAST_GPU_PATH,
|
||||
titleId = R.string.fast_gpu_path,
|
||||
descriptionId = R.string.fast_gpu_path_description
|
||||
)
|
||||
)
|
||||
put(
|
||||
SwitchSetting(
|
||||
BooleanSetting.SKIP_CPU_INNER_INVALIDATION,
|
||||
|
|
|
@ -464,6 +464,7 @@ class SettingsFragmentPresenter(
|
|||
add(IntSetting.FAST_CPU_TIME.key)
|
||||
add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key)
|
||||
add(IntSetting.CPU_TICKS.key)
|
||||
add(BooleanSetting.FAST_GPU_PATH.key)
|
||||
add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key)
|
||||
add(BooleanSetting.USE_LRU_CACHE.key)
|
||||
add(BooleanSetting.CORE_SYNC_CORE_SPEED.key)
|
||||
|
|
|
@ -101,6 +101,8 @@
|
|||
<string name="custom_cpu_ticks">Custom CPU Ticks</string>
|
||||
<string name="custom_cpu_ticks_description">Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 77–21000 is recommended.</string>
|
||||
<string name="cpu_ticks">Ticks</string>
|
||||
<string name="fast_gpu_path">Fast GPU Path</string>
|
||||
<string name="fast_gpu_path_description">Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving the performance. This may cause glitches or crashes on some games.</string>
|
||||
<string name="skip_cpu_inner_invalidation">Skip CPU Inner Invalidation</string>
|
||||
<string name="skip_cpu_inner_invalidation_description">Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games.</string>
|
||||
<string name="fast_cpu_time">CPU Clock</string>
|
||||
|
|
|
@ -450,6 +450,13 @@ struct Values {
|
|||
VramUsageMode::Aggressive,
|
||||
"vram_usage_mode",
|
||||
Category::RendererAdvanced};
|
||||
SwitchableSetting<bool> fast_gpu_path{linkage,
|
||||
false,
|
||||
"fast_gpu_path",
|
||||
Category::RendererAdvanced,
|
||||
Specialization::Default,
|
||||
true,
|
||||
true};
|
||||
SwitchableSetting<bool> skip_cpu_inner_invalidation{linkage,
|
||||
true,
|
||||
"skip_cpu_inner_invalidation",
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -40,7 +43,8 @@ struct GPU::Impl {
|
|||
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
|
||||
: gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
|
||||
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
|
||||
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
|
||||
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)},
|
||||
fast_path{Settings::values.fast_gpu_path.GetValue()} {}
|
||||
|
||||
~Impl() = default;
|
||||
|
||||
|
@ -96,6 +100,7 @@ struct GPU::Impl {
|
|||
|
||||
/// Synchronizes CPU writes with Host GPU memory.
|
||||
void InvalidateGPUCache() {
|
||||
if (fast_path) return;
|
||||
std::function<void(PAddr, size_t)> callback_writes(
|
||||
[this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
|
||||
system.GatherGPUDirtyMemory(callback_writes);
|
||||
|
@ -110,6 +115,16 @@ struct GPU::Impl {
|
|||
/// Request a host GPU memory flush from the CPU.
|
||||
template <typename Func>
|
||||
[[nodiscard]] u64 RequestSyncOperation(Func&& action) {
|
||||
if (fast_path) {
|
||||
// Execute immediately and publish the result
|
||||
action();
|
||||
gpu_thread.TickGPU();
|
||||
const u64 fence = ++last_sync_fence;
|
||||
// Mirror the normal path: advance current and wake any waiters
|
||||
current_sync_fence.store(fence, std::memory_order_release);
|
||||
sync_request_cv.notify_all();
|
||||
return fence;
|
||||
}
|
||||
std::unique_lock lck{sync_request_mutex};
|
||||
const u64 fence = ++last_sync_fence;
|
||||
sync_requests.emplace_back(action);
|
||||
|
@ -122,12 +137,27 @@ struct GPU::Impl {
|
|||
}
|
||||
|
||||
void WaitForSyncOperation(const u64 fence) {
|
||||
if (fast_path) {
|
||||
// Don’t block when the hack is on
|
||||
return;
|
||||
}
|
||||
std::unique_lock lck{sync_request_mutex};
|
||||
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
|
||||
}
|
||||
|
||||
/// Tick pending requests within the GPU.
|
||||
void TickWork() {
|
||||
if (fast_path) {
|
||||
// Drain queue without waiting on condition variables
|
||||
while (!sync_requests.empty()) {
|
||||
auto request = std::move(sync_requests.front());
|
||||
sync_requests.pop_front();
|
||||
request();
|
||||
current_sync_fence.fetch_add(1, std::memory_order_release);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
std::unique_lock lck{sync_request_mutex};
|
||||
while (!sync_requests.empty()) {
|
||||
auto request = std::move(sync_requests.front());
|
||||
|
@ -259,6 +289,15 @@ struct GPU::Impl {
|
|||
}
|
||||
|
||||
VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
|
||||
if (fast_path) {
|
||||
// Bypass fence/tick entirely
|
||||
auto raster_area = rasterizer->GetFlushArea(addr, size);
|
||||
rasterizer->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
|
||||
raster_area.preemtive = true;
|
||||
// Give GPU thread a chance to run that flush
|
||||
gpu_thread.TickGPU();
|
||||
return raster_area;
|
||||
}
|
||||
auto raster_area = rasterizer->GetFlushArea(addr, size);
|
||||
if (raster_area.preemtive) {
|
||||
return raster_area;
|
||||
|
@ -372,6 +411,9 @@ struct GPU::Impl {
|
|||
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
|
||||
|
||||
std::unique_ptr<Tegra::Control::Scheduler> scheduler;
|
||||
|
||||
const bool fast_path;
|
||||
|
||||
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
|
||||
Tegra::Control::ChannelState* current_channel;
|
||||
s32 bound_channel{-1};
|
||||
|
|
|
@ -250,6 +250,11 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent)
|
|||
"of available video memory for performance. Has no effect on integrated graphics. "
|
||||
"Aggressive mode may severely impact the performance of other applications such as "
|
||||
"recording software."));
|
||||
INSERT(Settings,
|
||||
fast_gpu_path,
|
||||
tr("Fast GPU Path"),
|
||||
tr("Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving "
|
||||
"the performance. This may cause glitches or crashes on some games."));
|
||||
INSERT(Settings,
|
||||
skip_cpu_inner_invalidation,
|
||||
tr("Skip CPU Inner Invalidation"),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue