[GPU]: Hack rewrite

Hopefully this works properly now
This commit is contained in:
Gamer64 2025-07-24 01:48:39 +02:00 committed by crueter
parent 50f6bfb6bd
commit 6103d2b7f4

View file

@ -100,6 +100,7 @@ struct GPU::Impl {
/// Synchronizes CPU writes with Host GPU memory. /// Synchronizes CPU writes with Host GPU memory.
void InvalidateGPUCache() { void InvalidateGPUCache() {
if (fast_path) return;
std::function<void(PAddr, size_t)> callback_writes( std::function<void(PAddr, size_t)> callback_writes(
[this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); }); [this](PAddr address, size_t size) { rasterizer->OnCacheInvalidation(address, size); });
system.GatherGPUDirtyMemory(callback_writes); system.GatherGPUDirtyMemory(callback_writes);
@ -115,10 +116,10 @@ struct GPU::Impl {
template <typename Func> template <typename Func>
[[nodiscard]] u64 RequestSyncOperation(Func&& action) { [[nodiscard]] u64 RequestSyncOperation(Func&& action) {
if (fast_path) { if (fast_path) {
// Just bump the fence counter, but do NOT enqueue // Execute immediately, increment fence, skip queueing
action();
return ++last_sync_fence; return ++last_sync_fence;
} }
std::unique_lock lck{sync_request_mutex}; std::unique_lock lck{sync_request_mutex};
const u64 fence = ++last_sync_fence; const u64 fence = ++last_sync_fence;
sync_requests.emplace_back(action); sync_requests.emplace_back(action);
@ -131,11 +132,6 @@ struct GPU::Impl {
} }
void WaitForSyncOperation(const u64 fence) { void WaitForSyncOperation(const u64 fence) {
if (fast_path) {
// Never block
return;
}
std::unique_lock lck{sync_request_mutex}; std::unique_lock lck{sync_request_mutex};
sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; }); sync_request_cv.wait(lck, [this, fence] { return CurrentSyncRequestFence() >= fence; });
} }
@ -143,10 +139,13 @@ struct GPU::Impl {
/// Tick pending requests within the GPU. /// Tick pending requests within the GPU.
void TickWork() { void TickWork() {
if (fast_path) { if (fast_path) {
// Drop all pending requests in one go // Drain queue without waiting on condition variables
sync_requests.clear(); while (!sync_requests.empty()) {
current_sync_fence.store(last_sync_fence, std::memory_order_relaxed); auto req = std::move(sync_requests.front());
sync_request_cv.notify_all(); sync_requests.pop_front();
req();
current_sync_fence.fetch_add(1, std::memory_order_release);
}
return; return;
} }
@ -281,6 +280,13 @@ struct GPU::Impl {
} }
VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) { VideoCore::RasterizerDownloadArea OnCPURead(DAddr addr, u64 size) {
if (fast_path) {
// Bypass fence/tick entirely
auto raster_area = rasterizer->GetFlushArea(addr, size);
rasterizer->FlushRegion(raster_area.start_address, raster_area.end_address - raster_area.start_address);
raster_area.preemtive = true;
return raster_area;
}
auto raster_area = rasterizer->GetFlushArea(addr, size); auto raster_area = rasterizer->GetFlushArea(addr, size);
if (raster_area.preemtive) { if (raster_area.preemtive) {
return raster_area; return raster_area;
@ -311,11 +317,6 @@ struct GPU::Impl {
void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers, void RequestComposite(std::vector<Tegra::FramebufferConfig>&& layers,
std::vector<Service::Nvidia::NvFence>&& fences) { std::vector<Service::Nvidia::NvFence>&& fences) {
if (fast_path) {
renderer->Composite(layers);
return;
}
size_t num_fences{fences.size()}; size_t num_fences{fences.size()};
size_t current_request_counter{}; size_t current_request_counter{};
{ {
@ -354,10 +355,6 @@ struct GPU::Impl {
} }
std::vector<u8> GetAppletCaptureBuffer() { std::vector<u8> GetAppletCaptureBuffer() {
if (fast_path) {
return renderer->GetAppletCaptureBuffer();
}
std::vector<u8> out; std::vector<u8> out;
const auto wait_fence = const auto wait_fence =