forked from eden-emu/eden
[GPU]: Implement Fast GPU Path
This commit is contained in:
parent
bdfcb6c950
commit
435483f7c1
7 changed files with 55 additions and 1 deletions
|
@ -17,6 +17,7 @@ enum class BooleanSetting(override val key: String) : AbstractBooleanSetting {
|
|||
RENDERER_USE_SPEED_LIMIT("use_speed_limit"),
|
||||
USE_FAST_CPU_TIME("use_fast_cpu_time"),
|
||||
USE_CUSTOM_CPU_TICKS("use_custom_cpu_ticks"),
|
||||
FAST_GPU_PATH("fast_gpu_path"),
|
||||
SKIP_CPU_INNER_INVALIDATION("skip_cpu_inner_invalidation"),
|
||||
USE_DOCKED_MODE("use_docked_mode"),
|
||||
USE_AUTO_STUB("use_auto_stub"),
|
||||
|
|
|
@ -652,6 +652,13 @@ abstract class SettingsItem(
|
|||
max = 65535
|
||||
)
|
||||
)
|
||||
put(
|
||||
SwitchSetting(
|
||||
BooleanSetting.FAST_GPU_PATH,
|
||||
titleId = R.string.fast_gpu_path,
|
||||
descriptionId = R.string.fast_gpu_path_description
|
||||
)
|
||||
)
|
||||
put(
|
||||
SwitchSetting(
|
||||
BooleanSetting.SKIP_CPU_INNER_INVALIDATION,
|
||||
|
|
|
@ -464,6 +464,7 @@ class SettingsFragmentPresenter(
|
|||
add(IntSetting.FAST_CPU_TIME.key)
|
||||
add(BooleanSetting.USE_CUSTOM_CPU_TICKS.key)
|
||||
add(IntSetting.CPU_TICKS.key)
|
||||
add(BooleanSetting.FAST_GPU_PATH.key)
|
||||
add(BooleanSetting.SKIP_CPU_INNER_INVALIDATION.key)
|
||||
add(BooleanSetting.USE_LRU_CACHE.key)
|
||||
add(BooleanSetting.CORE_SYNC_CORE_SPEED.key)
|
||||
|
|
|
@ -101,6 +101,8 @@
|
|||
<string name="custom_cpu_ticks">Custom CPU Ticks</string>
|
||||
<string name="custom_cpu_ticks_description">Set a custom value of CPU ticks. Higher values can increase performance, but may also cause the game to freeze. A range of 77–21000 is recommended.</string>
|
||||
<string name="cpu_ticks">Ticks</string>
|
||||
<string name="fast_gpu_path">Fast GPU Path</string>
|
||||
<string name="fast_gpu_path_description">Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving the performance. This may cause glitches or crashes on some games.</string>
|
||||
<string name="skip_cpu_inner_invalidation">Skip CPU Inner Invalidation</string>
|
||||
<string name="skip_cpu_inner_invalidation_description">Skips certain CPU-side cache invalidations during memory updates, reducing CPU usage and improving it\'s performance. This may cause glitches or crashes on some games.</string>
|
||||
<string name="fast_cpu_time">CPU Clock</string>
|
||||
|
|
|
@ -450,6 +450,13 @@ struct Values {
|
|||
VramUsageMode::Aggressive,
|
||||
"vram_usage_mode",
|
||||
Category::RendererAdvanced};
|
||||
SwitchableSetting<bool> fast_gpu_path{linkage,
|
||||
false,
|
||||
"fast_gpu_path",
|
||||
Category::RendererAdvanced,
|
||||
Specialization::Default,
|
||||
true,
|
||||
true};
|
||||
SwitchableSetting<bool> skip_cpu_inner_invalidation{linkage,
|
||||
true,
|
||||
"skip_cpu_inner_invalidation",
|
||||
|
|
|
@ -40,7 +40,8 @@ struct GPU::Impl {
|
|||
explicit Impl(GPU& gpu_, Core::System& system_, bool is_async_, bool use_nvdec_)
|
||||
: gpu{gpu_}, system{system_}, host1x{system.Host1x()}, use_nvdec{use_nvdec_},
|
||||
shader_notify{std::make_unique<VideoCore::ShaderNotify>()}, is_async{is_async_},
|
||||
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)} {}
|
||||
gpu_thread{system_, is_async_}, scheduler{std::make_unique<Control::Scheduler>(gpu)},
|
||||
fast_path{Settings::values.fast_gpu_path.GetValue()} {}
|
||||
|
||||
~Impl() = default;
|
||||
|
||||
|
@ -110,6 +111,11 @@ struct GPU::Impl {
|
|||
/// Request a host GPU memory flush from the CPU.
|
||||
template <typename Func>
|
||||
[[nodiscard]] u64 RequestSyncOperation(Func&& action) {
|
||||
if (fast_path) {
|
||||
// Just bump the fence counter, but do NOT enqueue
|
||||
return ++last_sync_fence;
|
||||
}
|
||||
|
||||
std::unique_lock lck{sync_request_mutex};
|
||||
const u64 fence = ++last_sync_fence;
|
||||
sync_requests.emplace_back(action);
|
||||
|
@ -122,12 +128,25 @@ struct GPU::Impl {
|
|||
}
|
||||
|
||||
void WaitForSyncOperation(const u64 fence) {
|
||||
if (fast_path) {
|
||||
// Never block
|
||||
return;
|
||||
}
|
||||
|
||||
std::unique_lock lck{sync_request_mutex};
|
||||
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
|
||||
}
|
||||
|
||||
/// Tick pending requests within the GPU.
|
||||
void TickWork() {
|
||||
if (fast_path) {
|
||||
// Drop all pending requests in one go
|
||||
sync_requests.clear();
|
||||
current_sync_fence.store(last_sync_fence, std::memory_order_relaxed);
|
||||
sync_request_cv.notify_all();
|
||||
return;
|
||||
}
|
||||
|
||||
std::unique_lock lck{sync_request_mutex};
|
||||
while (!sync_requests.empty()) {
|
||||
auto request = std::move(sync_requests.front());
|
||||
|
@ -289,6 +308,11 @@ struct GPU::Impl {
|
|||
|
||||
void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
|
||||
std::vector<Service::Nvidia::NvFence>&& fences) {
|
||||
if (fast_path) {
|
||||
renderer->Composite(layers);
|
||||
return;
|
||||
}
|
||||
|
||||
size_t num_fences{fences.size()};
|
||||
size_t current_request_counter{};
|
||||
{
|
||||
|
@ -327,6 +351,10 @@ struct GPU::Impl {
|
|||
}
|
||||
|
||||
std::vector<u8> GetAppletCaptureBuffer() {
|
||||
if (fast_path) {
|
||||
return renderer->GetAppletCaptureBuffer();
|
||||
}
|
||||
|
||||
std::vector<u8> out;
|
||||
|
||||
const auto wait_fence =
|
||||
|
@ -372,6 +400,9 @@ struct GPU::Impl {
|
|||
std::unique_ptr<Core::Frontend::GraphicsContext> cpu_context;
|
||||
|
||||
std::unique_ptr<Tegra::Control::Scheduler> scheduler;
|
||||
|
||||
const bool fast_path;
|
||||
|
||||
std::unordered_map<s32, std::shared_ptr<Tegra::Control::ChannelState>> channels;
|
||||
Tegra::Control::ChannelState* current_channel;
|
||||
s32 bound_channel{-1};
|
||||
|
|
|
@ -250,6 +250,11 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QWidget* parent)
|
|||
"of available video memory for performance. Has no effect on integrated graphics. "
|
||||
"Aggressive mode may severely impact the performance of other applications such as "
|
||||
"recording software."));
|
||||
INSERT(Settings,
|
||||
fast_gpu_path,
|
||||
tr("Fast GPU Path"),
|
||||
tr("Bypasses all CPU–GPU synchronization and fence handling, reducing overhead and improving "
|
||||
"the performance. This may cause glitches or crashes on some games."));
|
||||
INSERT(Settings,
|
||||
skip_cpu_inner_invalidation,
|
||||
tr("Skip CPU Inner Invalidation"),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue