Compare commits

...
Sign in to create a new pull request.

5 commits

Author SHA1 Message Date
Gamer64
c0a1818bbf [GPU]: Call TickGPU
All checks were successful
eden-license / license-header (pull_request) Successful in 28s
This is my last try lol
2025-07-25 07:52:57 +02:00
Gamer64
eed010964e [GPU]: Try to fix deadlock 2025-07-25 07:52:57 +02:00
Gamer64
6103d2b7f4 [GPU]: Hack rewrite
Hopefully this works properly now
2025-07-25 07:52:57 +02:00
Gamer64
50f6bfb6bd [GPU]: Add license header 2025-07-25 07:52:57 +02:00
Gamer64
435483f7c1 [GPU]: Implement Fast GPU Path 2025-07-25 07:52:57 +02:00
7 changed files with 66 additions and 1 deletions

View file

@ -17,6 +17,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
RENDERER_USE_SPEED_LIMIT("use_speed_limit"),
USE_FAST_CPU_TIME("use_fast_cpu_time"),
USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"),
FAST_GPU_PATH("fast_gpu_path"),
SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"),
USE_DOCKED_MODE("use_docked_mode"),
USE_AUTO_STUB("use_auto_stub"),

View file

@ -652,6 +652,13 @@ abstract class SettingsItem(
max = 65535
)
)
put(
SwitchSetting(
BooleanSetting.FAST_GPU_PATH,
titleId = R.string.fast_gpu_path,
descriptionId = R.string.fast_gpu_path_description
)
)
put(
SwitchSetting(
BooleanSetting.SKIP_CPU_INNER_INVALIDATION,

View file

@ -464,6 +464,7 @@ class SettingsFragmentPresenter(
add(IntSetting.FAST_CPU_TIME.key)
add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key)
add(IntSetting.CPU_TICKS.key)
add(BooleanSetting.FAST_GPU_PATH.key)
add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key)
add(BooleanSetting.USE_LRU_CACHE.key)
add(BooleanSetting.CORE_SYNC_CORE_SPEED.key)

View file

@ -101,6 +101,8 @@
<string name="custom_cpu_ticks">Custom CPU Ticks</string>
<string name="custom_cpu_ticks_description">Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 7721000 is recommended.</string>
<string name="cpu_ticks">Ticks</string>
<string name="fast_gpu_path">Fast GPU Path</string>
<string name="fast_gpu_path_description">Bypasses all CPUGPU synchronization and fence handling, reducing overhead and improving the performance. This may cause glitches or crashes on some games.</string>
<string name="skip_cpu_inner_invalidation">Skip CPU Inner Invalidation</string>
<string name="skip_cpu_inner_invalidation_description">Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games.</string>
<string name="fast_cpu_time">CPU Clock</string>

View file

@ -450,6 +450,13 @@ struct Values {
VramUsageMode::Aggressive,
"vram_usage_mode",
Category::RendererAdvanced};
SwitchableSetting<bool> fast_gpu_path{linkage,
false,
"fast_gpu_path",
Category::RendererAdvanced,
Specialization::Default,
true,
true};
SwitchableSetting<bool> skip_cpu_inner_invalidation{linkage,
true,
"skip_cpu_inner_invalidation",

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -40,7 +43,8 @@ struct GPU::Impl {
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
: gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)},
fast_path{Settings::values.fast_gpu_path.GetValue()} {}
~Impl() = default;
@ -96,6 +100,7 @@ struct GPU::Impl {
/// Synchronizes CPU writes with Host GPU memory.
void InvalidateGPUCache() {
if (fast_path) return;
std::function<void(PAddr, size_t)> callback_writes(
[this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
system.GatherGPUDirtyMemory(callback_writes);
@ -110,6 +115,16 @@ struct GPU::Impl {
/// Request a host GPU memory flush from the CPU.
template <typename Func>
[[nodiscard]] u64 RequestSyncOperation(Func&& action) {
if (fast_path) {
// Execute immediately and publish the result
action();
gpu_thread.TickGPU();
const u64 fence = ++last_sync_fence;
// Mirror the normal path: advance current and wake any waiters
current_sync_fence.store(fence, std::memory_order_release);
sync_request_cv.notify_all();
return fence;
}
std::unique_lock lck{sync_request_mutex};
const u64 fence = ++last_sync_fence;
sync_requests.emplace_back(action);
@ -122,12 +137,27 @@ struct GPU::Impl {
}
void WaitForSyncOperation(const u64 fence) {
if (fast_path) {
// Dont block when the hack is on
return;
}
std::unique_lock lck{sync_request_mutex};
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
}
/// Tick pending requests within the GPU.
void TickWork() {
if (fast_path) {
// Drain queue without waiting on condition variables
while (!sync_requests.empty()) {
auto request = std::move(sync_requests.front());
sync_requests.pop_front();
request();
current_sync_fence.fetch_add(1, std::memory_order_release);
}
return;
}
std::unique_lock lck{sync_request_mutex};
while (!sync_requests.empty()) {
auto request = std::move(sync_requests.front());
@ -259,6 +289,15 @@ struct GPU::Impl {
}
VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
if (fast_path) {
// Bypass fence/tick entirely
auto raster_area = rasterizer->GetFlushArea(addr, size);
rasterizer->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
raster_area.preemtive = true;
// Give GPU thread a chance to run that flush
gpu_thread.TickGPU();
return raster_area;
}
auto raster_area = rasterizer->GetFlushArea(addr, size);
if (raster_area.preemtive) {
return raster_area;
@ -372,6 +411,9 @@ struct GPU::Impl {
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
std::unique_ptr<Tegra::Control::Scheduler> scheduler;
const bool fast_path;
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
Tegra::Control::ChannelState* current_channel;
s32 bound_channel{-1};

View file

@ -250,6 +250,11 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent)
"of available video memory for performance. Has no effect on integrated graphics. "
"Aggressive mode may severely impact the performance of other applications such as "
"recording software."));
INSERT(Settings,
fast_gpu_path,
tr("Fast GPU Path"),
tr("Bypasses all CPUGPU synchronization and fence handling, reducing overhead and improving "
"the performance. This may cause glitches or crashes on some games."));
INSERT(Settings,
skip_cpu_inner_invalidation,
tr("Skip CPU Inner Invalidation"),