forked from eden-emu/eden
		
	VideoCore: Refactor fencing system.
This commit is contained in:
		
							parent
							
								
									4d60410dd9
								
							
						
					
					
						commit
						bc8b3d225e
					
				
					 20 changed files with 154 additions and 167 deletions
				
			
		|  | @ -40,7 +40,8 @@ void nvdisp_disp0::OnClose(DeviceFD fd) {} | |||
| 
 | ||||
| void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, | ||||
|                         u32 height, u32 stride, android::BufferTransformFlags transform, | ||||
|                         const Common::Rectangle<int>& crop_rect) { | ||||
|                         const Common::Rectangle<int>& crop_rect, | ||||
|                         std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences) { | ||||
|     const VAddr addr = nvmap.GetHandleAddress(buffer_handle); | ||||
|     LOG_TRACE(Service, | ||||
|               "Drawing from address {:X} offset {:08X} Width {} Height {} Stride {} Format {}", | ||||
|  | @ -50,7 +51,7 @@ void nvdisp_disp0::flip(u32 buffer_handle, u32 offset, android::PixelFormat form | |||
|                                                stride, format, transform, crop_rect}; | ||||
| 
 | ||||
|     system.GetPerfStats().EndSystemFrame(); | ||||
|     system.GPU().RequestSwapBuffers(&framebuffer, nullptr, 0); | ||||
|     system.GPU().RequestSwapBuffers(&framebuffer, fences, num_fences); | ||||
|     system.SpeedLimiter().DoSpeedLimiting(system.CoreTiming().GetGlobalTimeUs()); | ||||
|     system.GetPerfStats().BeginSystemFrame(); | ||||
| } | ||||
|  |  | |||
|  | @ -38,7 +38,8 @@ public: | |||
|     /// Performs a screen flip, drawing the buffer pointed to by the handle.
 | ||||
|     void flip(u32 buffer_handle, u32 offset, android::PixelFormat format, u32 width, u32 height, | ||||
|               u32 stride, android::BufferTransformFlags transform, | ||||
|               const Common::Rectangle<int>& crop_rect); | ||||
|               const Common::Rectangle<int>& crop_rect, | ||||
|               std::array<Service::Nvidia::NvFence, 4>& fences, u32 num_fences); | ||||
| 
 | ||||
|     Kernel::KEvent* QueryEvent(u32 event_id) override; | ||||
| 
 | ||||
|  |  | |||
|  | @ -269,17 +269,6 @@ void NVFlinger::Compose() { | |||
|             return; // We are likely shutting down
 | ||||
|         } | ||||
| 
 | ||||
|         auto& syncpoint_manager = system.Host1x().GetSyncpointManager(); | ||||
|         const auto& multi_fence = buffer.fence; | ||||
|         guard->unlock(); | ||||
|         for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { | ||||
|             const auto& fence = multi_fence.fences[fence_id]; | ||||
|             syncpoint_manager.WaitGuest(fence.id, fence.value); | ||||
|         } | ||||
|         guard->lock(); | ||||
| 
 | ||||
|         MicroProfileFlip(); | ||||
| 
 | ||||
|         // Now send the buffer to the GPU for drawing.
 | ||||
|         // TODO(Subv): Support more than just disp0. The display device selection is probably based
 | ||||
|         // on which display we're drawing (Default, Internal, External, etc)
 | ||||
|  | @ -293,8 +282,10 @@ void NVFlinger::Compose() { | |||
| 
 | ||||
|         nvdisp->flip(igbp_buffer.BufferId(), igbp_buffer.Offset(), igbp_buffer.ExternalFormat(), | ||||
|                      igbp_buffer.Width(), igbp_buffer.Height(), igbp_buffer.Stride(), | ||||
|                      static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect); | ||||
|                      static_cast<android::BufferTransformFlags>(buffer.transform), crop_rect, | ||||
|                      buffer.fence.fences, buffer.fence.num_fences); | ||||
| 
 | ||||
|         MicroProfileFlip(); | ||||
|         guard->lock(); | ||||
| 
 | ||||
|         swap_interval = buffer.swap_interval; | ||||
|  |  | |||
|  | @ -826,6 +826,19 @@ void BufferCache<P>::CommitAsyncFlushesHigh() { | |||
|     const bool is_accuracy_normal = | ||||
|         Settings::values.gpu_accuracy.GetValue() == Settings::GPUAccuracy::Normal; | ||||
| 
 | ||||
|     auto it = committed_ranges.begin(); | ||||
|     while (it != committed_ranges.end()) { | ||||
|         auto& current_intervals = *it; | ||||
|         auto next_it = std::next(it); | ||||
|         while (next_it != committed_ranges.end()) { | ||||
|             for (auto& interval : *next_it) { | ||||
|                 current_intervals.subtract(interval); | ||||
|             } | ||||
|             next_it++; | ||||
|         } | ||||
|         it++; | ||||
|     } | ||||
| 
 | ||||
|     boost::container::small_vector<std::pair<BufferCopy, BufferId>, 1> downloads; | ||||
|     u64 total_size_bytes = 0; | ||||
|     u64 largest_copy = 0; | ||||
|  |  | |||
|  | @ -24,8 +24,6 @@ MICROPROFILE_DEFINE(DispatchCalls, "GPU", "Execute command buffer", MP_RGB(128, | |||
| void DmaPusher::DispatchCalls() { | ||||
|     MICROPROFILE_SCOPE(DispatchCalls); | ||||
| 
 | ||||
|     gpu.SyncGuestHost(); | ||||
| 
 | ||||
|     dma_pushbuffer_subindex = 0; | ||||
| 
 | ||||
|     dma_state.is_last_call = true; | ||||
|  | @ -36,7 +34,6 @@ void DmaPusher::DispatchCalls() { | |||
|         } | ||||
|     } | ||||
|     gpu.FlushCommands(); | ||||
|     gpu.SyncGuestHost(); | ||||
|     gpu.OnCommandListEnd(); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -242,6 +242,9 @@ void Maxwell3D::ProcessMethodCall(u32 method, u32 argument, u32 nonshadow_argume | |||
|         return; | ||||
|     case MAXWELL3D_REG_INDEX(fragment_barrier): | ||||
|         return rasterizer->FragmentBarrier(); | ||||
|     case MAXWELL3D_REG_INDEX(invalidate_texture_data_cache): | ||||
|         rasterizer->InvalidateGPUCache(); | ||||
|         return rasterizer->WaitForIdle(); | ||||
|     case MAXWELL3D_REG_INDEX(tiled_cache_barrier): | ||||
|         return rasterizer->TiledCacheBarrier(); | ||||
|     } | ||||
|  | @ -472,10 +475,25 @@ void Maxwell3D::ProcessQueryGet() { | |||
| 
 | ||||
|     switch (regs.query.query_get.operation) { | ||||
|     case Regs::QueryOperation::Release: | ||||
|         if (regs.query.query_get.fence == 1) { | ||||
|             rasterizer->SignalSemaphore(regs.query.QueryAddress(), regs.query.query_sequence); | ||||
|         if (regs.query.query_get.fence == 1 || regs.query.query_get.short_query != 0) { | ||||
|             const GPUVAddr sequence_address{regs.query.QueryAddress()}; | ||||
|             const u32 payload = regs.query.query_sequence; | ||||
|             std::function<void()> operation([this, sequence_address, payload] { | ||||
|                 memory_manager.Write<u32>(sequence_address, payload); | ||||
|             }); | ||||
|             rasterizer->SignalFence(std::move(operation)); | ||||
|         } else { | ||||
|             StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0); | ||||
|             struct LongQueryResult { | ||||
|                 u64_le value; | ||||
|                 u64_le timestamp; | ||||
|             }; | ||||
|             const GPUVAddr sequence_address{regs.query.QueryAddress()}; | ||||
|             const u32 payload = regs.query.query_sequence; | ||||
|             std::function<void()> operation([this, sequence_address, payload] { | ||||
|                 LongQueryResult query_result{payload, system.GPU().GetTicks()}; | ||||
|                 memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result)); | ||||
|             }); | ||||
|             rasterizer->SignalFence(std::move(operation)); | ||||
|         } | ||||
|         break; | ||||
|     case Regs::QueryOperation::Acquire: | ||||
|  |  | |||
|  | @ -79,12 +79,15 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
|             u64 timestamp; | ||||
|         }; | ||||
| 
 | ||||
|         const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | ||||
|         const u32 payload = regs.semaphore_sequence; | ||||
|         std::function<void()> operation([this, sequence_address, payload] { | ||||
|             Block block{}; | ||||
|         block.sequence = regs.semaphore_sequence; | ||||
|         // TODO(Kmather73): Generate a real GPU timestamp and write it here instead of
 | ||||
|         // CoreTiming
 | ||||
|             block.sequence = payload; | ||||
|             block.timestamp = gpu.GetTicks(); | ||||
|         memory_manager.WriteBlock(regs.semaphore_address.SemaphoreAddress(), &block, sizeof(block)); | ||||
|             memory_manager.WriteBlock(sequence_address, &block, sizeof(block)); | ||||
|         }); | ||||
|         rasterizer->SignalFence(std::move(operation)); | ||||
|     } else { | ||||
|         do { | ||||
|             const u32 word{memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress())}; | ||||
|  | @ -94,6 +97,7 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
|                 regs.acquire_active = true; | ||||
|                 regs.acquire_mode = false; | ||||
|                 if (word != regs.acquire_value) { | ||||
|                     rasterizer->ReleaseFences(); | ||||
|                     std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||||
|                     continue; | ||||
|                 } | ||||
|  | @ -101,11 +105,13 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
|                 regs.acquire_active = true; | ||||
|                 regs.acquire_mode = true; | ||||
|                 if (word < regs.acquire_value) { | ||||
|                     rasterizer->ReleaseFences(); | ||||
|                     std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||||
|                     continue; | ||||
|                 } | ||||
|             } else if (op == GpuSemaphoreOperation::AcquireMask) { | ||||
|                 if (word & regs.semaphore_sequence == 0) { | ||||
|                 if (word && regs.semaphore_sequence == 0) { | ||||
|                     rasterizer->ReleaseFences(); | ||||
|                     std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||||
|                     continue; | ||||
|                 } | ||||
|  | @ -117,16 +123,23 @@ void Puller::ProcessSemaphoreTriggerMethod() { | |||
| } | ||||
| 
 | ||||
| void Puller::ProcessSemaphoreRelease() { | ||||
|     rasterizer->SignalSemaphore(regs.semaphore_address.SemaphoreAddress(), regs.semaphore_release); | ||||
|     const GPUVAddr sequence_address{regs.semaphore_address.SemaphoreAddress()}; | ||||
|     const u32 payload = regs.semaphore_release; | ||||
|     std::function<void()> operation([this, sequence_address, payload] { | ||||
|         memory_manager.Write<u32>(sequence_address, payload); | ||||
|     }); | ||||
|     rasterizer->SignalFence(std::move(operation)); | ||||
| } | ||||
| 
 | ||||
| void Puller::ProcessSemaphoreAcquire() { | ||||
|     const u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||||
|     u32 word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||||
|     const auto value = regs.semaphore_acquire; | ||||
|     std::this_thread::sleep_for(std::chrono::milliseconds(5)); | ||||
|     if (word != value) { | ||||
|     while (word != value) { | ||||
|         regs.acquire_active = true; | ||||
|         regs.acquire_value = value; | ||||
|         std::this_thread::sleep_for(std::chrono::milliseconds(1)); | ||||
|         rasterizer->ReleaseFences(); | ||||
|         word = memory_manager.Read<u32>(regs.semaphore_address.SemaphoreAddress()); | ||||
|         // TODO(kemathe73) figure out how to do the acquire_timeout
 | ||||
|         regs.acquire_mode = false; | ||||
|         regs.acquire_source = false; | ||||
|  | @ -147,9 +160,9 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { | |||
|     case BufferMethods::SemaphoreAddressHigh: | ||||
|     case BufferMethods::SemaphoreAddressLow: | ||||
|     case BufferMethods::SemaphoreSequencePayload: | ||||
|     case BufferMethods::WrcacheFlush: | ||||
|     case BufferMethods::SyncpointPayload: | ||||
|         break; | ||||
|     case BufferMethods::WrcacheFlush: | ||||
|     case BufferMethods::RefCnt: | ||||
|         rasterizer->SignalReference(); | ||||
|         break; | ||||
|  | @ -173,7 +186,7 @@ void Puller::CallPullerMethod(const MethodCall& method_call) { | |||
|     } | ||||
|     case BufferMethods::MemOpB: { | ||||
|         // Implement this better.
 | ||||
|         rasterizer->SyncGuestHost(); | ||||
|         rasterizer->InvalidateGPUCache(); | ||||
|         break; | ||||
|     } | ||||
|     case BufferMethods::MemOpC: | ||||
|  |  | |||
|  | @ -5,6 +5,8 @@ | |||
| 
 | ||||
| #include <algorithm> | ||||
| #include <cstring> | ||||
| #include <deque> | ||||
| #include <functional> | ||||
| #include <memory> | ||||
| #include <queue> | ||||
| 
 | ||||
|  | @ -19,28 +21,7 @@ namespace VideoCommon { | |||
| 
 | ||||
| class FenceBase { | ||||
| public: | ||||
|     explicit FenceBase(u32 payload_, bool is_stubbed_) | ||||
|         : address{}, payload{payload_}, is_semaphore{false}, is_stubbed{is_stubbed_} {} | ||||
| 
 | ||||
|     explicit FenceBase(u8* address_, u32 payload_, bool is_stubbed_) | ||||
|         : address{address_}, payload{payload_}, is_semaphore{true}, is_stubbed{is_stubbed_} {} | ||||
| 
 | ||||
|     u8* GetAddress() const { | ||||
|         return address; | ||||
|     } | ||||
| 
 | ||||
|     u32 GetPayload() const { | ||||
|         return payload; | ||||
|     } | ||||
| 
 | ||||
|     bool IsSemaphore() const { | ||||
|         return is_semaphore; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     u8* address; | ||||
|     u32 payload; | ||||
|     bool is_semaphore; | ||||
|     explicit FenceBase(bool is_stubbed_) : is_stubbed{is_stubbed_} {} | ||||
| 
 | ||||
| protected: | ||||
|     bool is_stubbed; | ||||
|  | @ -60,31 +41,28 @@ public: | |||
|         buffer_cache.AccumulateFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     void SignalSemaphore(u8* addr, u32 value) { | ||||
|     void SyncOperation(std::function<void()>&& func) { | ||||
|         uncommitted_operations.emplace_back(std::move(func)); | ||||
|     } | ||||
| 
 | ||||
|     void SignalFence(std::function<void()>&& func) { | ||||
|         TryReleasePendingFences(); | ||||
|         const bool should_flush = ShouldFlush(); | ||||
|         CommitAsyncFlushes(); | ||||
|         TFence new_fence = CreateFence(addr, value, !should_flush); | ||||
|         uncommitted_operations.emplace_back(std::move(func)); | ||||
|         CommitOperations(); | ||||
|         TFence new_fence = CreateFence(!should_flush); | ||||
|         fences.push(new_fence); | ||||
|         QueueFence(new_fence); | ||||
|         if (should_flush) { | ||||
|             rasterizer.FlushCommands(); | ||||
|         } | ||||
|         rasterizer.SyncGuestHost(); | ||||
|     } | ||||
| 
 | ||||
|     void SignalSyncPoint(u32 value) { | ||||
|         syncpoint_manager.IncrementGuest(value); | ||||
|         TryReleasePendingFences(); | ||||
|         const bool should_flush = ShouldFlush(); | ||||
|         CommitAsyncFlushes(); | ||||
|         TFence new_fence = CreateFence(value, !should_flush); | ||||
|         fences.push(new_fence); | ||||
|         QueueFence(new_fence); | ||||
|         if (should_flush) { | ||||
|             rasterizer.FlushCommands(); | ||||
|         } | ||||
|         rasterizer.SyncGuestHost(); | ||||
|         std::function<void()> func([this, value] { syncpoint_manager.IncrementHost(value); }); | ||||
|         SignalFence(std::move(func)); | ||||
|     } | ||||
| 
 | ||||
|     void WaitPendingFences() { | ||||
|  | @ -94,12 +72,10 @@ public: | |||
|                 WaitFence(current_fence); | ||||
|             } | ||||
|             PopAsyncFlushes(); | ||||
|             if (current_fence->IsSemaphore()) { | ||||
|                 char* address = reinterpret_cast<char*>(current_fence->GetAddress()); | ||||
|                 auto payload = current_fence->GetPayload(); | ||||
|                 std::memcpy(address, &payload, sizeof(payload)); | ||||
|             } else { | ||||
|                 syncpoint_manager.IncrementHost(current_fence->GetPayload()); | ||||
|             auto operations = std::move(pending_operations.front()); | ||||
|             pending_operations.pop_front(); | ||||
|             for (auto& operation : operations) { | ||||
|                 operation(); | ||||
|             } | ||||
|             PopFence(); | ||||
|         } | ||||
|  | @ -114,11 +90,9 @@ protected: | |||
| 
 | ||||
|     virtual ~FenceManager() = default; | ||||
| 
 | ||||
|     /// Creates a Sync Point Fence Interface, does not create a backend fence if 'is_stubbed' is
 | ||||
|     /// Creates a Fence Interface, does not create a backend fence if 'is_stubbed' is
 | ||||
|     /// true
 | ||||
|     virtual TFence CreateFence(u32 value, bool is_stubbed) = 0; | ||||
|     /// Creates a Semaphore Fence Interface, does not create a backend fence if 'is_stubbed' is true
 | ||||
|     virtual TFence CreateFence(u8* addr, u32 value, bool is_stubbed) = 0; | ||||
|     virtual TFence CreateFence(bool is_stubbed) = 0; | ||||
|     /// Queues a fence into the backend if the fence isn't stubbed.
 | ||||
|     virtual void QueueFence(TFence& fence) = 0; | ||||
|     /// Notifies that the backend fence has been signaled/reached in host GPU.
 | ||||
|  | @ -141,12 +115,10 @@ private: | |||
|                 return; | ||||
|             } | ||||
|             PopAsyncFlushes(); | ||||
|             if (current_fence->IsSemaphore()) { | ||||
|                 char* address = reinterpret_cast<char*>(current_fence->GetAddress()); | ||||
|                 const auto payload = current_fence->GetPayload(); | ||||
|                 std::memcpy(address, &payload, sizeof(payload)); | ||||
|             } else { | ||||
|                 syncpoint_manager.IncrementHost(current_fence->GetPayload()); | ||||
|             auto operations = std::move(pending_operations.front()); | ||||
|             pending_operations.pop_front(); | ||||
|             for (auto& operation : operations) { | ||||
|                 operation(); | ||||
|             } | ||||
|             PopFence(); | ||||
|         } | ||||
|  | @ -165,16 +137,20 @@ private: | |||
|     } | ||||
| 
 | ||||
|     void PopAsyncFlushes() { | ||||
|         { | ||||
|             std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|             texture_cache.PopAsyncFlushes(); | ||||
|             buffer_cache.PopAsyncFlushes(); | ||||
|         } | ||||
|         query_cache.PopAsyncFlushes(); | ||||
|     } | ||||
| 
 | ||||
|     void CommitAsyncFlushes() { | ||||
|         { | ||||
|             std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|             texture_cache.CommitAsyncFlushes(); | ||||
|             buffer_cache.CommitAsyncFlushes(); | ||||
|         } | ||||
|         query_cache.CommitAsyncFlushes(); | ||||
|     } | ||||
| 
 | ||||
|  | @ -183,7 +159,13 @@ private: | |||
|         fences.pop(); | ||||
|     } | ||||
| 
 | ||||
|     void CommitOperations() { | ||||
|         pending_operations.emplace_back(std::move(uncommitted_operations)); | ||||
|     } | ||||
| 
 | ||||
|     std::queue<TFence> fences; | ||||
|     std::deque<std::function<void()>> uncommitted_operations; | ||||
|     std::deque<std::deque<std::function<void()>>> pending_operations; | ||||
| 
 | ||||
|     DelayedDestructionRing<TFence, 6> delayed_destruction_ring; | ||||
| }; | ||||
|  |  | |||
|  | @ -93,17 +93,14 @@ struct GPU::Impl { | |||
|     } | ||||
| 
 | ||||
|     /// Synchronizes CPU writes with Host GPU memory.
 | ||||
|     void SyncGuestHost() { | ||||
|         rasterizer->SyncGuestHost(); | ||||
|     void InvalidateGPUCache() { | ||||
|         rasterizer->InvalidateGPUCache(); | ||||
|     } | ||||
| 
 | ||||
|     /// Signal the ending of command list.
 | ||||
|     void OnCommandListEnd() { | ||||
|         if (is_async) { | ||||
|             // This command only applies to asynchronous GPU mode
 | ||||
|         gpu_thread.OnCommandListEnd(); | ||||
|     } | ||||
|     } | ||||
| 
 | ||||
|     /// Request a host GPU memory flush from the CPU.
 | ||||
|     template <typename Func> | ||||
|  | @ -296,7 +293,7 @@ struct GPU::Impl { | |||
|     } | ||||
| 
 | ||||
|     void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | ||||
|                             Service::Nvidia::NvFence* fences, size_t num_fences) { | ||||
|                             std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { | ||||
|         size_t current_request_counter{}; | ||||
|         { | ||||
|             std::unique_lock<std::mutex> lk(request_swap_mutex); | ||||
|  | @ -412,8 +409,8 @@ void GPU::FlushCommands() { | |||
|     impl->FlushCommands(); | ||||
| } | ||||
| 
 | ||||
| void GPU::SyncGuestHost() { | ||||
|     impl->SyncGuestHost(); | ||||
| void GPU::InvalidateGPUCache() { | ||||
|     impl->InvalidateGPUCache(); | ||||
| } | ||||
| 
 | ||||
| void GPU::OnCommandListEnd() { | ||||
|  | @ -488,7 +485,7 @@ const VideoCore::ShaderNotify& GPU::ShaderNotify() const { | |||
| } | ||||
| 
 | ||||
| void GPU::RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | ||||
|                              Service::Nvidia::NvFence* fences, size_t num_fences) { | ||||
|                              std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences) { | ||||
|     impl->RequestSwapBuffers(framebuffer, fences, num_fences); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -110,7 +110,7 @@ public: | |||
|     /// Flush all current written commands into the host GPU for execution.
 | ||||
|     void FlushCommands(); | ||||
|     /// Synchronizes CPU writes with Host GPU memory.
 | ||||
|     void SyncGuestHost(); | ||||
|     void InvalidateGPUCache(); | ||||
|     /// Signal the ending of command list.
 | ||||
|     void OnCommandListEnd(); | ||||
| 
 | ||||
|  | @ -180,7 +180,7 @@ public: | |||
|     void RendererFrameEndNotify(); | ||||
| 
 | ||||
|     void RequestSwapBuffers(const Tegra::FramebufferConfig* framebuffer, | ||||
|                             Service::Nvidia::NvFence* fences, size_t num_fences); | ||||
|                             std::array<Service::Nvidia::NvFence, 4>& fences, size_t num_fences); | ||||
| 
 | ||||
|     /// Performs any additional setup necessary in order to begin GPU emulation.
 | ||||
|     /// This can be used to launch any necessary threads and register any necessary
 | ||||
|  |  | |||
|  | @ -98,7 +98,7 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { | |||
| } | ||||
| 
 | ||||
| void ThreadManager::TickGPU() { | ||||
|     PushCommand(GPUTickCommand(), true); | ||||
|     PushCommand(GPUTickCommand()); | ||||
| } | ||||
| 
 | ||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | ||||
|  |  | |||
|  | @ -62,7 +62,10 @@ public: | |||
|     virtual void DisableGraphicsUniformBuffer(size_t stage, u32 index) = 0; | ||||
| 
 | ||||
|     /// Signal a GPU based semaphore as a fence
 | ||||
|     virtual void SignalSemaphore(GPUVAddr addr, u32 value) = 0; | ||||
|     virtual void SignalFence(std::function<void()>&& func) = 0; | ||||
| 
 | ||||
|     /// Send an operation to be done after a certain amount of flushes.
 | ||||
|     virtual void SyncOperation(std::function<void()>&& func) = 0; | ||||
| 
 | ||||
|     /// Signal a GPU based syncpoint as a fence
 | ||||
|     virtual void SignalSyncPoint(u32 value) = 0; | ||||
|  | @ -89,7 +92,7 @@ public: | |||
|     virtual void OnCPUWrite(VAddr addr, u64 size) = 0; | ||||
| 
 | ||||
|     /// Sync memory between guest and host.
 | ||||
|     virtual void SyncGuestHost() = 0; | ||||
|     virtual void InvalidateGPUCache() = 0; | ||||
| 
 | ||||
|     /// Unmap memory range
 | ||||
|     virtual void UnmapMemory(VAddr addr, u64 size) = 0; | ||||
|  |  | |||
|  | @ -10,10 +10,7 @@ | |||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| GLInnerFence::GLInnerFence(u32 payload_, bool is_stubbed_) : FenceBase{payload_, is_stubbed_} {} | ||||
| 
 | ||||
| GLInnerFence::GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_) | ||||
|     : FenceBase{address_, payload_, is_stubbed_} {} | ||||
| GLInnerFence::GLInnerFence(bool is_stubbed_) : FenceBase{is_stubbed_} {} | ||||
| 
 | ||||
| GLInnerFence::~GLInnerFence() = default; | ||||
| 
 | ||||
|  | @ -48,12 +45,8 @@ FenceManagerOpenGL::FenceManagerOpenGL(VideoCore::RasterizerInterface& rasterize | |||
|                                        BufferCache& buffer_cache_, QueryCache& query_cache_) | ||||
|     : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_} {} | ||||
| 
 | ||||
| Fence FenceManagerOpenGL::CreateFence(u32 value, bool is_stubbed) { | ||||
|     return std::make_shared<GLInnerFence>(value, is_stubbed); | ||||
| } | ||||
| 
 | ||||
| Fence FenceManagerOpenGL::CreateFence(u8* addr, u32 value, bool is_stubbed) { | ||||
|     return std::make_shared<GLInnerFence>(addr, value, is_stubbed); | ||||
| Fence FenceManagerOpenGL::CreateFence(bool is_stubbed) { | ||||
|     return std::make_shared<GLInnerFence>(is_stubbed); | ||||
| } | ||||
| 
 | ||||
| void FenceManagerOpenGL::QueueFence(Fence& fence) { | ||||
|  |  | |||
|  | @ -16,8 +16,7 @@ namespace OpenGL { | |||
| 
 | ||||
| class GLInnerFence : public VideoCommon::FenceBase { | ||||
| public: | ||||
|     explicit GLInnerFence(u32 payload_, bool is_stubbed_); | ||||
|     explicit GLInnerFence(u8* address_, u32 payload_, bool is_stubbed_); | ||||
|     explicit GLInnerFence(bool is_stubbed_); | ||||
|     ~GLInnerFence(); | ||||
| 
 | ||||
|     void Queue(); | ||||
|  | @ -40,8 +39,7 @@ public: | |||
|                                 QueryCache& query_cache); | ||||
| 
 | ||||
| protected: | ||||
|     Fence CreateFence(u32 value, bool is_stubbed) override; | ||||
|     Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override; | ||||
|     Fence CreateFence(bool is_stubbed) override; | ||||
|     void QueueFence(Fence& fence) override; | ||||
|     bool IsFenceSignaled(Fence& fence) const override; | ||||
|     void WaitFence(Fence& fence) override; | ||||
|  |  | |||
|  | @ -358,7 +358,7 @@ void RasterizerOpenGL::OnCPUWrite(VAddr addr, u64 size) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SyncGuestHost() { | ||||
| void RasterizerOpenGL::InvalidateGPUCache() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||
|     shader_cache.SyncGuestHost(); | ||||
|     { | ||||
|  | @ -386,13 +386,12 @@ void RasterizerOpenGL::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SignalSemaphore(GPUVAddr addr, u32 value) { | ||||
|     if (!gpu.IsAsync()) { | ||||
|         gpu_memory->Write<u32>(addr, value); | ||||
|         return; | ||||
| void RasterizerOpenGL::SignalFence(std::function<void()>&& func) { | ||||
|     fence_manager.SignalFence(std::move(func)); | ||||
| } | ||||
|     auto paddr = gpu_memory->GetPointer(addr); | ||||
|     fence_manager.SignalSemaphore(paddr, value); | ||||
| 
 | ||||
| void RasterizerOpenGL::SyncOperation(std::function<void()>&& func) { | ||||
|     fence_manager.SyncOperation(std::move(func)); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SignalSyncPoint(u32 value) { | ||||
|  | @ -400,16 +399,10 @@ void RasterizerOpenGL::SignalSyncPoint(u32 value) { | |||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SignalReference() { | ||||
|     if (!gpu.IsAsync()) { | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.SignalOrdering(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::ReleaseFences() { | ||||
|     if (!gpu.IsAsync()) { | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.WaitPendingFences(); | ||||
| } | ||||
| 
 | ||||
|  | @ -426,6 +419,7 @@ void RasterizerOpenGL::WaitForIdle() { | |||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::FragmentBarrier() { | ||||
|     glTextureBarrier(); | ||||
|     glMemoryBarrier(GL_FRAMEBUFFER_BARRIER_BIT | GL_TEXTURE_FETCH_BARRIER_BIT); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -80,10 +80,11 @@ public: | |||
|     bool MustFlushRegion(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void SyncGuestHost() override; | ||||
|     void InvalidateGPUCache() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||||
|     void SignalFence(std::function<void()>&& func) override; | ||||
|     void SyncOperation(std::function<void()>&& func) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void SignalReference() override; | ||||
|     void ReleaseFences() override; | ||||
|  |  | |||
|  | @ -11,11 +11,8 @@ | |||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| InnerFence::InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_) | ||||
|     : FenceBase{payload_, is_stubbed_}, scheduler{scheduler_} {} | ||||
| 
 | ||||
| InnerFence::InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_) | ||||
|     : FenceBase{address_, payload_, is_stubbed_}, scheduler{scheduler_} {} | ||||
| InnerFence::InnerFence(Scheduler& scheduler_, bool is_stubbed_) | ||||
|     : FenceBase{is_stubbed_}, scheduler{scheduler_} {} | ||||
| 
 | ||||
| InnerFence::~InnerFence() = default; | ||||
| 
 | ||||
|  | @ -48,12 +45,8 @@ FenceManager::FenceManager(VideoCore::RasterizerInterface& rasterizer_, Tegra::G | |||
|     : GenericFenceManager{rasterizer_, gpu_, texture_cache_, buffer_cache_, query_cache_}, | ||||
|       scheduler{scheduler_} {} | ||||
| 
 | ||||
| Fence FenceManager::CreateFence(u32 value, bool is_stubbed) { | ||||
|     return std::make_shared<InnerFence>(scheduler, value, is_stubbed); | ||||
| } | ||||
| 
 | ||||
| Fence FenceManager::CreateFence(u8* addr, u32 value, bool is_stubbed) { | ||||
|     return std::make_shared<InnerFence>(scheduler, addr, value, is_stubbed); | ||||
| Fence FenceManager::CreateFence(bool is_stubbed) { | ||||
|     return std::make_shared<InnerFence>(scheduler, is_stubbed); | ||||
| } | ||||
| 
 | ||||
| void FenceManager::QueueFence(Fence& fence) { | ||||
|  |  | |||
|  | @ -25,8 +25,7 @@ class Scheduler; | |||
| 
 | ||||
| class InnerFence : public VideoCommon::FenceBase { | ||||
| public: | ||||
|     explicit InnerFence(Scheduler& scheduler_, u32 payload_, bool is_stubbed_); | ||||
|     explicit InnerFence(Scheduler& scheduler_, u8* address_, u32 payload_, bool is_stubbed_); | ||||
|     explicit InnerFence(Scheduler& scheduler_, bool is_stubbed_); | ||||
|     ~InnerFence(); | ||||
| 
 | ||||
|     void Queue(); | ||||
|  | @ -50,8 +49,7 @@ public: | |||
|                           QueryCache& query_cache, const Device& device, Scheduler& scheduler); | ||||
| 
 | ||||
| protected: | ||||
|     Fence CreateFence(u32 value, bool is_stubbed) override; | ||||
|     Fence CreateFence(u8* addr, u32 value, bool is_stubbed) override; | ||||
|     Fence CreateFence(bool is_stubbed) override; | ||||
|     void QueueFence(Fence& fence) override; | ||||
|     bool IsFenceSignaled(Fence& fence) const override; | ||||
|     void WaitFence(Fence& fence) override; | ||||
|  |  | |||
|  | @ -428,7 +428,7 @@ void RasterizerVulkan::OnCPUWrite(VAddr addr, u64 size) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SyncGuestHost() { | ||||
| void RasterizerVulkan::InvalidateGPUCache() { | ||||
|     pipeline_cache.SyncGuestHost(); | ||||
|     { | ||||
|         std::scoped_lock lock{buffer_cache.mutex}; | ||||
|  | @ -455,13 +455,12 @@ void RasterizerVulkan::ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) { | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SignalSemaphore(GPUVAddr addr, u32 value) { | ||||
|     if (!gpu.IsAsync()) { | ||||
|         gpu_memory->Write<u32>(addr, value); | ||||
|         return; | ||||
| void RasterizerVulkan::SignalFence(std::function<void()>&& func) { | ||||
|     fence_manager.SignalFence(std::move(func)); | ||||
| } | ||||
|     auto paddr = gpu_memory->GetPointer(addr); | ||||
|     fence_manager.SignalSemaphore(paddr, value); | ||||
| 
 | ||||
| void RasterizerVulkan::SyncOperation(std::function<void()>&& func) { | ||||
|     fence_manager.SyncOperation(std::move(func)); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SignalSyncPoint(u32 value) { | ||||
|  | @ -469,16 +468,10 @@ void RasterizerVulkan::SignalSyncPoint(u32 value) { | |||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::SignalReference() { | ||||
|     if (!gpu.IsAsync()) { | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.SignalOrdering(); | ||||
| } | ||||
| 
 | ||||
| void RasterizerVulkan::ReleaseFences() { | ||||
|     if (!gpu.IsAsync()) { | ||||
|         return; | ||||
|     } | ||||
|     fence_manager.WaitPendingFences(); | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -76,10 +76,11 @@ public: | |||
|     bool MustFlushRegion(VAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(VAddr addr, u64 size) override; | ||||
|     void OnCPUWrite(VAddr addr, u64 size) override; | ||||
|     void SyncGuestHost() override; | ||||
|     void InvalidateGPUCache() override; | ||||
|     void UnmapMemory(VAddr addr, u64 size) override; | ||||
|     void ModifyGPUMemory(size_t as_id, GPUVAddr addr, u64 size) override; | ||||
|     void SignalSemaphore(GPUVAddr addr, u32 value) override; | ||||
|     void SignalFence(std::function<void()>&& func) override; | ||||
|     void SyncOperation(std::function<void()>&& func) override; | ||||
|     void SignalSyncPoint(u32 value) override; | ||||
|     void SignalReference() override; | ||||
|     void ReleaseFences() override; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow