forked from eden-emu/eden
		
	gpu: Use host address for caching instead of guest address.
This commit is contained in:
		
							parent
							
								
									e400f8b65f
								
							
						
					
					
						commit
						7fccfc3ee7
					
				
					 26 changed files with 393 additions and 293 deletions
				
			
		|  | @ -10,6 +10,7 @@ | ||||||
| #include "core/core.h" | #include "core/core.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" | #include "core/hle/service/nvdrv/devices/nvhost_as_gpu.h" | ||||||
| #include "core/hle/service/nvdrv/devices/nvmap.h" | #include "core/hle/service/nvdrv/devices/nvmap.h" | ||||||
|  | #include "core/memory.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_base.h" | #include "video_core/renderer_base.h" | ||||||
|  | @ -178,7 +179,7 @@ u32 nvhost_as_gpu::UnmapBuffer(const std::vector<u8>& input, std::vector<u8>& ou | ||||||
|     auto& gpu = system_instance.GPU(); |     auto& gpu = system_instance.GPU(); | ||||||
|     auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); |     auto cpu_addr = gpu.MemoryManager().GpuToCpuAddress(params.offset); | ||||||
|     ASSERT(cpu_addr); |     ASSERT(cpu_addr); | ||||||
|     gpu.FlushAndInvalidateRegion(*cpu_addr, itr->second.size); |     gpu.FlushAndInvalidateRegion(ToCacheAddr(Memory::GetPointer(*cpu_addr)), itr->second.size); | ||||||
| 
 | 
 | ||||||
|     params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); |     params.offset = gpu.MemoryManager().UnmapBuffer(params.offset, itr->second.size); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -67,8 +67,11 @@ static void MapPages(PageTable& page_table, VAddr base, u64 size, u8* memory, Pa | ||||||
|     LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, |     LOG_DEBUG(HW_Memory, "Mapping {} onto {:016X}-{:016X}", fmt::ptr(memory), base * PAGE_SIZE, | ||||||
|               (base + size) * PAGE_SIZE); |               (base + size) * PAGE_SIZE); | ||||||
| 
 | 
 | ||||||
|  |     // During boot, current_page_table might not be set yet, in which case we need not flush
 | ||||||
|  |     if (current_page_table) { | ||||||
|         RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, |         RasterizerFlushVirtualRegion(base << PAGE_BITS, size * PAGE_SIZE, | ||||||
|                                      FlushMode::FlushAndInvalidate); |                                      FlushMode::FlushAndInvalidate); | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|     VAddr end = base + size; |     VAddr end = base + size; | ||||||
|     ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", |     ASSERT_MSG(end <= page_table.pointers.size(), "out of range mapping at {:016X}", | ||||||
|  | @ -359,13 +362,13 @@ void RasterizerFlushVirtualRegion(VAddr start, u64 size, FlushMode mode) { | ||||||
|         auto& gpu = system_instance.GPU(); |         auto& gpu = system_instance.GPU(); | ||||||
|         switch (mode) { |         switch (mode) { | ||||||
|         case FlushMode::Flush: |         case FlushMode::Flush: | ||||||
|             gpu.FlushRegion(overlap_start, overlap_size); |             gpu.FlushRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); | ||||||
|             break; |             break; | ||||||
|         case FlushMode::Invalidate: |         case FlushMode::Invalidate: | ||||||
|             gpu.InvalidateRegion(overlap_start, overlap_size); |             gpu.InvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); | ||||||
|             break; |             break; | ||||||
|         case FlushMode::FlushAndInvalidate: |         case FlushMode::FlushAndInvalidate: | ||||||
|             gpu.FlushAndInvalidateRegion(overlap_start, overlap_size); |             gpu.FlushAndInvalidateRegion(ToCacheAddr(GetPointer(overlap_start)), overlap_size); | ||||||
|             break; |             break; | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|  | @ -9,6 +9,7 @@ | ||||||
| #include "video_core/engines/kepler_memory.h" | #include "video_core/engines/kepler_memory.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
|  | #include "video_core/renderer_base.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
| 
 | 
 | ||||||
|  | @ -48,7 +49,8 @@ void KeplerMemory::ProcessData(u32 data) { | ||||||
|     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
 |     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
 | ||||||
|     // We do this before actually writing the new data because the destination address might contain
 |     // We do this before actually writing the new data because the destination address might contain
 | ||||||
|     // a dirty surface that will have to be written back to memory.
 |     // a dirty surface that will have to be written back to memory.
 | ||||||
|     Core::System::GetInstance().GPU().InvalidateRegion(*dest_address, sizeof(u32)); |     system.Renderer().Rasterizer().InvalidateRegion(ToCacheAddr(Memory::GetPointer(*dest_address)), | ||||||
|  |                                                     sizeof(u32)); | ||||||
| 
 | 
 | ||||||
|     Memory::Write32(*dest_address, data); |     Memory::Write32(*dest_address, data); | ||||||
|     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); |     system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||||
|  |  | ||||||
|  | @ -396,7 +396,10 @@ void Maxwell3D::ProcessCBData(u32 value) { | ||||||
|     const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); |     const auto address = memory_manager.GpuToCpuAddress(buffer_address + regs.const_buffer.cb_pos); | ||||||
|     ASSERT_MSG(address, "Invalid GPU address"); |     ASSERT_MSG(address, "Invalid GPU address"); | ||||||
| 
 | 
 | ||||||
|     Memory::Write32(*address, value); |     u8* ptr{Memory::GetPointer(*address)}; | ||||||
|  |     rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32)); | ||||||
|  |     std::memcpy(ptr, &value, sizeof(u32)); | ||||||
|  | 
 | ||||||
|     dirty_flags.OnMemoryWrite(); |     dirty_flags.OnMemoryWrite(); | ||||||
| 
 | 
 | ||||||
|     // Increment the current buffer position.
 |     // Increment the current buffer position.
 | ||||||
|  |  | ||||||
|  | @ -9,6 +9,7 @@ | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/engines/maxwell_dma.h" | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
|  | #include "video_core/renderer_base.h" | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
| 
 | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
|  | @ -92,12 +93,14 @@ void MaxwellDMA::HandleCopy() { | ||||||
|     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { |     const auto FlushAndInvalidate = [&](u32 src_size, u64 dst_size) { | ||||||
|         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
 |         // TODO(Subv): For now, manually flush the regions until we implement GPU-accelerated
 | ||||||
|         // copying.
 |         // copying.
 | ||||||
|         Core::System::GetInstance().GPU().FlushRegion(*source_cpu, src_size); |         Core::System::GetInstance().Renderer().Rasterizer().FlushRegion( | ||||||
|  |             ToCacheAddr(Memory::GetPointer(*source_cpu)), src_size); | ||||||
| 
 | 
 | ||||||
|         // We have to invalidate the destination region to evict any outdated surfaces from the
 |         // We have to invalidate the destination region to evict any outdated surfaces from the
 | ||||||
|         // cache. We do this before actually writing the new data because the destination address
 |         // cache. We do this before actually writing the new data because the destination address
 | ||||||
|         // might contain a dirty surface that will have to be written back to memory.
 |         // might contain a dirty surface that will have to be written back to memory.
 | ||||||
|         Core::System::GetInstance().GPU().InvalidateRegion(*dest_cpu, dst_size); |         Core::System::GetInstance().Renderer().Rasterizer().InvalidateRegion( | ||||||
|  |             ToCacheAddr(Memory::GetPointer(*dest_cpu)), dst_size); | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | ||||||
|  |  | ||||||
|  | @ -11,6 +11,11 @@ | ||||||
| #include "video_core/dma_pusher.h" | #include "video_core/dma_pusher.h" | ||||||
| #include "video_core/memory_manager.h" | #include "video_core/memory_manager.h" | ||||||
| 
 | 
 | ||||||
|  | using CacheAddr = std::uintptr_t; | ||||||
|  | inline CacheAddr ToCacheAddr(const void* host_ptr) { | ||||||
|  |     return reinterpret_cast<CacheAddr>(host_ptr); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| namespace Core { | namespace Core { | ||||||
| class System; | class System; | ||||||
| } | } | ||||||
|  | @ -209,13 +214,13 @@ public: | ||||||
|         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; |         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     virtual void FlushRegion(VAddr addr, u64 size) = 0; |     virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 |     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||||
|     virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |     virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 |     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||||
|     virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |     virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     void ProcessBindMethod(const MethodCall& method_call); |     void ProcessBindMethod(const MethodCall& method_call); | ||||||
|  |  | ||||||
|  | @ -22,15 +22,15 @@ void GPUAsynch::SwapBuffers( | ||||||
|     gpu_thread.SwapBuffers(std::move(framebuffer)); |     gpu_thread.SwapBuffers(std::move(framebuffer)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPUAsynch::FlushRegion(VAddr addr, u64 size) { | void GPUAsynch::FlushRegion(CacheAddr addr, u64 size) { | ||||||
|     gpu_thread.FlushRegion(addr, size); |     gpu_thread.FlushRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPUAsynch::InvalidateRegion(VAddr addr, u64 size) { | void GPUAsynch::InvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|     gpu_thread.InvalidateRegion(addr, size); |     gpu_thread.InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPUAsynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void GPUAsynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|     gpu_thread.FlushAndInvalidateRegion(addr, size); |     gpu_thread.FlushAndInvalidateRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -26,9 +26,9 @@ public: | ||||||
|     void PushGPUEntries(Tegra::CommandList&& entries) override; |     void PushGPUEntries(Tegra::CommandList&& entries) override; | ||||||
|     void SwapBuffers( |     void SwapBuffers( | ||||||
|         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; |         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||||||
|     void FlushRegion(VAddr addr, u64 size) override; |     void FlushRegion(CacheAddr addr, u64 size) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size) override; |     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     GPUThread::ThreadManager gpu_thread; |     GPUThread::ThreadManager gpu_thread; | ||||||
|  |  | ||||||
|  | @ -22,15 +22,15 @@ void GPUSynch::SwapBuffers( | ||||||
|     renderer.SwapBuffers(std::move(framebuffer)); |     renderer.SwapBuffers(std::move(framebuffer)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPUSynch::FlushRegion(VAddr addr, u64 size) { | void GPUSynch::FlushRegion(CacheAddr addr, u64 size) { | ||||||
|     renderer.Rasterizer().FlushRegion(addr, size); |     renderer.Rasterizer().FlushRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPUSynch::InvalidateRegion(VAddr addr, u64 size) { | void GPUSynch::InvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|     renderer.Rasterizer().InvalidateRegion(addr, size); |     renderer.Rasterizer().InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GPUSynch::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void GPUSynch::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|     renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); |     renderer.Rasterizer().FlushAndInvalidateRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -21,9 +21,9 @@ public: | ||||||
|     void PushGPUEntries(Tegra::CommandList&& entries) override; |     void PushGPUEntries(Tegra::CommandList&& entries) override; | ||||||
|     void SwapBuffers( |     void SwapBuffers( | ||||||
|         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; |         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) override; | ||||||
|     void FlushRegion(VAddr addr, u64 size) override; |     void FlushRegion(CacheAddr addr, u64 size) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size) override; |     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCommon
 | } // namespace VideoCommon
 | ||||||
|  |  | ||||||
|  | @ -5,7 +5,6 @@ | ||||||
| #include "common/assert.h" | #include "common/assert.h" | ||||||
| #include "common/microprofile.h" | #include "common/microprofile.h" | ||||||
| #include "core/frontend/scope_acquire_window_context.h" | #include "core/frontend/scope_acquire_window_context.h" | ||||||
| #include "core/settings.h" |  | ||||||
| #include "video_core/dma_pusher.h" | #include "video_core/dma_pusher.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/gpu_thread.h" | #include "video_core/gpu_thread.h" | ||||||
|  | @ -13,38 +12,13 @@ | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon::GPUThread { | namespace VideoCommon::GPUThread { | ||||||
| 
 | 
 | ||||||
| /// Executes a single GPU thread command
 |  | ||||||
| static void ExecuteCommand(CommandData* command, VideoCore::RendererBase& renderer, |  | ||||||
|                            Tegra::DmaPusher& dma_pusher) { |  | ||||||
|     if (const auto submit_list = std::get_if<SubmitListCommand>(command)) { |  | ||||||
|         dma_pusher.Push(std::move(submit_list->entries)); |  | ||||||
|         dma_pusher.DispatchCalls(); |  | ||||||
|     } else if (const auto data = std::get_if<SwapBuffersCommand>(command)) { |  | ||||||
|         renderer.SwapBuffers(data->framebuffer); |  | ||||||
|     } else if (const auto data = std::get_if<FlushRegionCommand>(command)) { |  | ||||||
|         renderer.Rasterizer().FlushRegion(data->addr, data->size); |  | ||||||
|     } else if (const auto data = std::get_if<InvalidateRegionCommand>(command)) { |  | ||||||
|         renderer.Rasterizer().InvalidateRegion(data->addr, data->size); |  | ||||||
|     } else if (const auto data = std::get_if<FlushAndInvalidateRegionCommand>(command)) { |  | ||||||
|         renderer.Rasterizer().FlushAndInvalidateRegion(data->addr, data->size); |  | ||||||
|     } else { |  | ||||||
|         UNREACHABLE(); |  | ||||||
|     } |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| /// Runs the GPU thread
 | /// Runs the GPU thread
 | ||||||
| static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, | static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher, | ||||||
|                       SynchState& state) { |                       SynchState& state) { | ||||||
| 
 |  | ||||||
|     MicroProfileOnThreadCreate("GpuThread"); |     MicroProfileOnThreadCreate("GpuThread"); | ||||||
| 
 | 
 | ||||||
|     auto WaitForWakeup = [&]() { |  | ||||||
|         std::unique_lock<std::mutex> lock{state.signal_mutex}; |  | ||||||
|         state.signal_condition.wait(lock, [&] { return !state.is_idle || !state.is_running; }); |  | ||||||
|     }; |  | ||||||
| 
 |  | ||||||
|     // Wait for first GPU command before acquiring the window context
 |     // Wait for first GPU command before acquiring the window context
 | ||||||
|     WaitForWakeup(); |     state.WaitForCommands(); | ||||||
| 
 | 
 | ||||||
|     // If emulation was stopped during disk shader loading, abort before trying to acquire context
 |     // If emulation was stopped during disk shader loading, abort before trying to acquire context
 | ||||||
|     if (!state.is_running) { |     if (!state.is_running) { | ||||||
|  | @ -53,100 +27,72 @@ static void RunThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_p | ||||||
| 
 | 
 | ||||||
|     Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; |     Core::Frontend::ScopeAcquireWindowContext acquire_context{renderer.GetRenderWindow()}; | ||||||
| 
 | 
 | ||||||
|  |     CommandDataContainer next; | ||||||
|     while (state.is_running) { |     while (state.is_running) { | ||||||
|         if (!state.is_running) { |         state.WaitForCommands(); | ||||||
|  |         while (!state.queue.Empty()) { | ||||||
|  |             state.queue.Pop(next); | ||||||
|  |             if (const auto submit_list = std::get_if<SubmitListCommand>(&next.data)) { | ||||||
|  |                 dma_pusher.Push(std::move(submit_list->entries)); | ||||||
|  |                 dma_pusher.DispatchCalls(); | ||||||
|  |             } else if (const auto data = std::get_if<SwapBuffersCommand>(&next.data)) { | ||||||
|  |                 state.DecrementFramesCounter(); | ||||||
|  |                 renderer.SwapBuffers(std::move(data->framebuffer)); | ||||||
|  |             } else if (const auto data = std::get_if<FlushRegionCommand>(&next.data)) { | ||||||
|  |                 renderer.Rasterizer().FlushRegion(data->addr, data->size); | ||||||
|  |             } else if (const auto data = std::get_if<InvalidateRegionCommand>(&next.data)) { | ||||||
|  |                 renderer.Rasterizer().InvalidateRegion(data->addr, data->size); | ||||||
|  |             } else if (const auto data = std::get_if<EndProcessingCommand>(&next.data)) { | ||||||
|                 return; |                 return; | ||||||
|  |             } else { | ||||||
|  |                 UNREACHABLE(); | ||||||
|             } |             } | ||||||
| 
 |  | ||||||
|         { |  | ||||||
|             // Thread has been woken up, so make the previous write queue the next read queue
 |  | ||||||
|             std::lock_guard<std::mutex> lock{state.signal_mutex}; |  | ||||||
|             std::swap(state.push_queue, state.pop_queue); |  | ||||||
|         } |         } | ||||||
| 
 |  | ||||||
|         // Execute all of the GPU commands
 |  | ||||||
|         while (!state.pop_queue->empty()) { |  | ||||||
|             ExecuteCommand(&state.pop_queue->front(), renderer, dma_pusher); |  | ||||||
|             state.pop_queue->pop(); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         state.UpdateIdleState(); |  | ||||||
| 
 |  | ||||||
|         // Signal that the GPU thread has finished processing commands
 |  | ||||||
|         if (state.is_idle) { |  | ||||||
|             state.idle_condition.notify_one(); |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Wait for CPU thread to send more GPU commands
 |  | ||||||
|         WaitForWakeup(); |  | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) | ThreadManager::ThreadManager(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) | ||||||
|     : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), |     : renderer{renderer}, dma_pusher{dma_pusher}, thread{RunThread, std::ref(renderer), | ||||||
|                                                          std::ref(dma_pusher), std::ref(state)}, |                                                          std::ref(dma_pusher), std::ref(state)} {} | ||||||
|       thread_id{thread.get_id()} {} |  | ||||||
| 
 | 
 | ||||||
| ThreadManager::~ThreadManager() { | ThreadManager::~ThreadManager() { | ||||||
|     { |  | ||||||
|     // Notify GPU thread that a shutdown is pending
 |     // Notify GPU thread that a shutdown is pending
 | ||||||
|         std::lock_guard<std::mutex> lock{state.signal_mutex}; |     PushCommand(EndProcessingCommand()); | ||||||
|         state.is_running = false; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     state.signal_condition.notify_one(); |  | ||||||
|     thread.join(); |     thread.join(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | void ThreadManager::SubmitList(Tegra::CommandList&& entries) { | ||||||
|     if (entries.empty()) { |     PushCommand(SubmitListCommand(std::move(entries))); | ||||||
|         return; |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     PushCommand(SubmitListCommand(std::move(entries)), false, false); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::SwapBuffers( | void ThreadManager::SwapBuffers( | ||||||
|     std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { |     std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer) { | ||||||
|     PushCommand(SwapBuffersCommand(std::move(framebuffer)), true, false); |     state.IncrementFramesCounter(); | ||||||
|  |     PushCommand(SwapBuffersCommand(std::move(framebuffer))); | ||||||
|  |     state.WaitForFrames(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::FlushRegion(VAddr addr, u64 size) { | void ThreadManager::FlushRegion(CacheAddr addr, u64 size) { | ||||||
|     // Block the CPU when using accurate emulation
 |     PushCommand(FlushRegionCommand(addr, size)); | ||||||
|     PushCommand(FlushRegionCommand(addr, size), Settings::values.use_accurate_gpu_emulation, false); |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::InvalidateRegion(VAddr addr, u64 size) { | void ThreadManager::InvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|     PushCommand(InvalidateRegionCommand(addr, size), true, true); |     if (state.queue.Empty()) { | ||||||
|  |         // It's quicker to invalidate a single region on the CPU if the queue is already empty
 | ||||||
|  |         renderer.Rasterizer().InvalidateRegion(addr, size); | ||||||
|  |     } else { | ||||||
|  |         PushCommand(InvalidateRegionCommand(addr, size)); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void ThreadManager::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|  |     // Skip flush on asynch mode, as FlushAndInvalidateRegion is not used for anything too important
 | ||||||
|     InvalidateRegion(addr, size); |     InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void ThreadManager::PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu) { | void ThreadManager::PushCommand(CommandData&& command_data) { | ||||||
|     { |     state.queue.Push(CommandDataContainer(std::move(command_data))); | ||||||
|         std::lock_guard<std::mutex> lock{state.signal_mutex}; |     state.SignalCommands(); | ||||||
| 
 |  | ||||||
|         if ((allow_on_cpu && state.is_idle) || IsGpuThread()) { |  | ||||||
|             // Execute the command synchronously on the current thread
 |  | ||||||
|             ExecuteCommand(&command_data, renderer, dma_pusher); |  | ||||||
|             return; |  | ||||||
|         } |  | ||||||
| 
 |  | ||||||
|         // Push the command to the GPU thread
 |  | ||||||
|         state.UpdateIdleState(); |  | ||||||
|         state.push_queue->emplace(command_data); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     // Signal the GPU thread that commands are pending
 |  | ||||||
|     state.signal_condition.notify_one(); |  | ||||||
| 
 |  | ||||||
|     if (wait_for_idle) { |  | ||||||
|         // Wait for the GPU to be idle (all commands to be executed)
 |  | ||||||
|         std::unique_lock<std::mutex> lock{state.idle_mutex}; |  | ||||||
|         state.idle_condition.wait(lock, [this] { return static_cast<bool>(state.is_idle); }); |  | ||||||
|     } |  | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCommon::GPUThread
 | } // namespace VideoCommon::GPUThread
 | ||||||
|  |  | ||||||
|  | @ -13,6 +13,9 @@ | ||||||
| #include <thread> | #include <thread> | ||||||
| #include <variant> | #include <variant> | ||||||
| 
 | 
 | ||||||
|  | #include "common/threadsafe_queue.h" | ||||||
|  | #include "video_core/gpu.h" | ||||||
|  | 
 | ||||||
| namespace Tegra { | namespace Tegra { | ||||||
| struct FramebufferConfig; | struct FramebufferConfig; | ||||||
| class DmaPusher; | class DmaPusher; | ||||||
|  | @ -24,6 +27,9 @@ class RendererBase; | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon::GPUThread { | namespace VideoCommon::GPUThread { | ||||||
| 
 | 
 | ||||||
|  | /// Command to signal to the GPU thread that processing has ended
 | ||||||
|  | struct EndProcessingCommand final {}; | ||||||
|  | 
 | ||||||
| /// Command to signal to the GPU thread that a command list is ready for processing
 | /// Command to signal to the GPU thread that a command list is ready for processing
 | ||||||
| struct SubmitListCommand final { | struct SubmitListCommand final { | ||||||
|     explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} |     explicit SubmitListCommand(Tegra::CommandList&& entries) : entries{std::move(entries)} {} | ||||||
|  | @ -36,59 +42,110 @@ struct SwapBuffersCommand final { | ||||||
|     explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) |     explicit SwapBuffersCommand(std::optional<const Tegra::FramebufferConfig> framebuffer) | ||||||
|         : framebuffer{std::move(framebuffer)} {} |         : framebuffer{std::move(framebuffer)} {} | ||||||
| 
 | 
 | ||||||
|     std::optional<const Tegra::FramebufferConfig> framebuffer; |     std::optional<Tegra::FramebufferConfig> framebuffer; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /// Command to signal to the GPU thread to flush a region
 | /// Command to signal to the GPU thread to flush a region
 | ||||||
| struct FlushRegionCommand final { | struct FlushRegionCommand final { | ||||||
|     explicit constexpr FlushRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |     explicit constexpr FlushRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} | ||||||
| 
 | 
 | ||||||
|     const VAddr addr; |     CacheAddr addr; | ||||||
|     const u64 size; |     u64 size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /// Command to signal to the GPU thread to invalidate a region
 | /// Command to signal to the GPU thread to invalidate a region
 | ||||||
| struct InvalidateRegionCommand final { | struct InvalidateRegionCommand final { | ||||||
|     explicit constexpr InvalidateRegionCommand(VAddr addr, u64 size) : addr{addr}, size{size} {} |     explicit constexpr InvalidateRegionCommand(CacheAddr addr, u64 size) : addr{addr}, size{size} {} | ||||||
| 
 | 
 | ||||||
|     const VAddr addr; |     CacheAddr addr; | ||||||
|     const u64 size; |     u64 size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /// Command to signal to the GPU thread to flush and invalidate a region
 | /// Command to signal to the GPU thread to flush and invalidate a region
 | ||||||
| struct FlushAndInvalidateRegionCommand final { | struct FlushAndInvalidateRegionCommand final { | ||||||
|     explicit constexpr FlushAndInvalidateRegionCommand(VAddr addr, u64 size) |     explicit constexpr FlushAndInvalidateRegionCommand(CacheAddr addr, u64 size) | ||||||
|         : addr{addr}, size{size} {} |         : addr{addr}, size{size} {} | ||||||
| 
 | 
 | ||||||
|     const VAddr addr; |     CacheAddr addr; | ||||||
|     const u64 size; |     u64 size; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| using CommandData = std::variant<SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | using CommandData = | ||||||
|  |     std::variant<EndProcessingCommand, SubmitListCommand, SwapBuffersCommand, FlushRegionCommand, | ||||||
|                  InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; |                  InvalidateRegionCommand, FlushAndInvalidateRegionCommand>; | ||||||
| 
 | 
 | ||||||
|  | struct CommandDataContainer { | ||||||
|  |     CommandDataContainer() = default; | ||||||
|  | 
 | ||||||
|  |     CommandDataContainer(CommandData&& data) : data{std::move(data)} {} | ||||||
|  | 
 | ||||||
|  |     CommandDataContainer& operator=(const CommandDataContainer& t) { | ||||||
|  |         data = std::move(t.data); | ||||||
|  |         return *this; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     CommandData data; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| /// Struct used to synchronize the GPU thread
 | /// Struct used to synchronize the GPU thread
 | ||||||
| struct SynchState final { | struct SynchState final { | ||||||
|     std::atomic<bool> is_running{true}; |     std::atomic_bool is_running{true}; | ||||||
|     std::atomic<bool> is_idle{true}; |     std::atomic_int queued_frame_count{}; | ||||||
|     std::condition_variable signal_condition; |     std::mutex frames_mutex; | ||||||
|     std::mutex signal_mutex; |     std::mutex commands_mutex; | ||||||
|     std::condition_variable idle_condition; |     std::condition_variable commands_condition; | ||||||
|     std::mutex idle_mutex; |     std::condition_variable frames_condition; | ||||||
| 
 | 
 | ||||||
|     // We use two queues for sending commands to the GPU thread, one for writing (push_queue) to and
 |     void IncrementFramesCounter() { | ||||||
|     // one for reading from (pop_queue). These are swapped whenever the current pop_queue becomes
 |         std::lock_guard<std::mutex> lock{frames_mutex}; | ||||||
|     // empty. This allows for efficient thread-safe access, as it does not require any copies.
 |         ++queued_frame_count; | ||||||
| 
 |  | ||||||
|     using CommandQueue = std::queue<CommandData>; |  | ||||||
|     std::array<CommandQueue, 2> command_queues; |  | ||||||
|     CommandQueue* push_queue{&command_queues[0]}; |  | ||||||
|     CommandQueue* pop_queue{&command_queues[1]}; |  | ||||||
| 
 |  | ||||||
|     void UpdateIdleState() { |  | ||||||
|         std::lock_guard<std::mutex> lock{idle_mutex}; |  | ||||||
|         is_idle = command_queues[0].empty() && command_queues[1].empty(); |  | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     void DecrementFramesCounter() { | ||||||
|  |         { | ||||||
|  |             std::lock_guard<std::mutex> lock{frames_mutex}; | ||||||
|  |             --queued_frame_count; | ||||||
|  | 
 | ||||||
|  |             if (queued_frame_count) { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |         frames_condition.notify_one(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void WaitForFrames() { | ||||||
|  |         { | ||||||
|  |             std::lock_guard<std::mutex> lock{frames_mutex}; | ||||||
|  |             if (!queued_frame_count) { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         // Wait for the GPU to be idle (all commands to be executed)
 | ||||||
|  |         { | ||||||
|  |             std::unique_lock<std::mutex> lock{frames_mutex}; | ||||||
|  |             frames_condition.wait(lock, [this] { return !queued_frame_count; }); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void SignalCommands() { | ||||||
|  |         { | ||||||
|  |             std::unique_lock<std::mutex> lock{commands_mutex}; | ||||||
|  |             if (queue.Empty()) { | ||||||
|  |                 return; | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         commands_condition.notify_one(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     void WaitForCommands() { | ||||||
|  |         std::unique_lock<std::mutex> lock{commands_mutex}; | ||||||
|  |         commands_condition.wait(lock, [this] { return !queue.Empty(); }); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     using CommandQueue = Common::SPSCQueue<CommandDataContainer>; | ||||||
|  |     CommandQueue queue; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /// Class used to manage the GPU thread
 | /// Class used to manage the GPU thread
 | ||||||
|  | @ -105,22 +162,17 @@ public: | ||||||
|         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); |         std::optional<std::reference_wrapper<const Tegra::FramebufferConfig>> framebuffer); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     void FlushRegion(VAddr addr, u64 size); |     void FlushRegion(CacheAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 |     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||||
|     void InvalidateRegion(VAddr addr, u64 size); |     void InvalidateRegion(CacheAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 |     /// Notify rasterizer that any caches of the specified region should be flushed and invalidated
 | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size); |     void FlushAndInvalidateRegion(CacheAddr addr, u64 size); | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     /// Pushes a command to be executed by the GPU thread
 |     /// Pushes a command to be executed by the GPU thread
 | ||||||
|     void PushCommand(CommandData&& command_data, bool wait_for_idle, bool allow_on_cpu); |     void PushCommand(CommandData&& command_data); | ||||||
| 
 |  | ||||||
|     /// Returns true if this is called by the GPU thread
 |  | ||||||
|     bool IsGpuThread() const { |  | ||||||
|         return std::this_thread::get_id() == thread_id; |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     SynchState state; |     SynchState state; | ||||||
|  |  | ||||||
|  | @ -4,6 +4,7 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <mutex> | ||||||
| #include <set> | #include <set> | ||||||
| #include <unordered_map> | #include <unordered_map> | ||||||
| 
 | 
 | ||||||
|  | @ -12,14 +13,26 @@ | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "core/settings.h" | #include "core/settings.h" | ||||||
|  | #include "video_core/gpu.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| 
 | 
 | ||||||
| class RasterizerCacheObject { | class RasterizerCacheObject { | ||||||
| public: | public: | ||||||
|  |     explicit RasterizerCacheObject(const u8* host_ptr) | ||||||
|  |         : host_ptr{host_ptr}, cache_addr{ToCacheAddr(host_ptr)} {} | ||||||
|  | 
 | ||||||
|     virtual ~RasterizerCacheObject(); |     virtual ~RasterizerCacheObject(); | ||||||
| 
 | 
 | ||||||
|  |     CacheAddr GetCacheAddr() const { | ||||||
|  |         return cache_addr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const u8* GetHostPtr() const { | ||||||
|  |         return host_ptr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /// Gets the address of the shader in guest memory, required for cache management
 |     /// Gets the address of the shader in guest memory, required for cache management
 | ||||||
|     virtual VAddr GetAddr() const = 0; |     virtual VAddr GetCpuAddr() const = 0; | ||||||
| 
 | 
 | ||||||
|     /// Gets the size of the shader in guest memory, required for cache management
 |     /// Gets the size of the shader in guest memory, required for cache management
 | ||||||
|     virtual std::size_t GetSizeInBytes() const = 0; |     virtual std::size_t GetSizeInBytes() const = 0; | ||||||
|  | @ -58,6 +71,8 @@ private: | ||||||
|     bool is_registered{};      ///< Whether the object is currently registered with the cache
 |     bool is_registered{};      ///< Whether the object is currently registered with the cache
 | ||||||
|     bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
 |     bool is_dirty{};           ///< Whether the object is dirty (out of sync with guest memory)
 | ||||||
|     u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
 |     u64 last_modified_ticks{}; ///< When the object was last modified, used for in-order flushing
 | ||||||
|  |     CacheAddr cache_addr{};    ///< Cache address memory, unique from emulated virtual address space
 | ||||||
|  |     const u8* host_ptr{};      ///< Pointer to the memory backing this cached region
 | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| template <class T> | template <class T> | ||||||
|  | @ -68,7 +83,9 @@ public: | ||||||
|     explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} |     explicit RasterizerCache(VideoCore::RasterizerInterface& rasterizer) : rasterizer{rasterizer} {} | ||||||
| 
 | 
 | ||||||
|     /// Write any cached resources overlapping the specified region back to memory
 |     /// Write any cached resources overlapping the specified region back to memory
 | ||||||
|     void FlushRegion(Tegra::GPUVAddr addr, size_t size) { |     void FlushRegion(CacheAddr addr, std::size_t size) { | ||||||
|  |         std::lock_guard<std::recursive_mutex> lock{mutex}; | ||||||
|  | 
 | ||||||
|         const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |         const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | ||||||
|         for (auto& object : objects) { |         for (auto& object : objects) { | ||||||
|             FlushObject(object); |             FlushObject(object); | ||||||
|  | @ -76,7 +93,9 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Mark the specified region as being invalidated
 |     /// Mark the specified region as being invalidated
 | ||||||
|     void InvalidateRegion(VAddr addr, u64 size) { |     void InvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|  |         std::lock_guard<std::recursive_mutex> lock{mutex}; | ||||||
|  | 
 | ||||||
|         const auto& objects{GetSortedObjectsFromRegion(addr, size)}; |         const auto& objects{GetSortedObjectsFromRegion(addr, size)}; | ||||||
|         for (auto& object : objects) { |         for (auto& object : objects) { | ||||||
|             if (!object->IsRegistered()) { |             if (!object->IsRegistered()) { | ||||||
|  | @ -89,48 +108,60 @@ public: | ||||||
| 
 | 
 | ||||||
|     /// Invalidates everything in the cache
 |     /// Invalidates everything in the cache
 | ||||||
|     void InvalidateAll() { |     void InvalidateAll() { | ||||||
|  |         std::lock_guard<std::recursive_mutex> lock{mutex}; | ||||||
|  | 
 | ||||||
|         while (interval_cache.begin() != interval_cache.end()) { |         while (interval_cache.begin() != interval_cache.end()) { | ||||||
|             Unregister(*interval_cache.begin()->second.begin()); |             Unregister(*interval_cache.begin()->second.begin()); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| protected: | protected: | ||||||
|     /// Tries to get an object from the cache with the specified address
 |     /// Tries to get an object from the cache with the specified cache address
 | ||||||
|     T TryGet(VAddr addr) const { |     T TryGet(CacheAddr addr) const { | ||||||
|         const auto iter = map_cache.find(addr); |         const auto iter = map_cache.find(addr); | ||||||
|         if (iter != map_cache.end()) |         if (iter != map_cache.end()) | ||||||
|             return iter->second; |             return iter->second; | ||||||
|         return nullptr; |         return nullptr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     T TryGet(const void* addr) const { | ||||||
|  |         const auto iter = map_cache.find(ToCacheAddr(addr)); | ||||||
|  |         if (iter != map_cache.end()) | ||||||
|  |             return iter->second; | ||||||
|  |         return nullptr; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /// Register an object into the cache
 |     /// Register an object into the cache
 | ||||||
|     void Register(const T& object) { |     void Register(const T& object) { | ||||||
|  |         std::lock_guard<std::recursive_mutex> lock{mutex}; | ||||||
|  | 
 | ||||||
|         object->SetIsRegistered(true); |         object->SetIsRegistered(true); | ||||||
|         interval_cache.add({GetInterval(object), ObjectSet{object}}); |         interval_cache.add({GetInterval(object), ObjectSet{object}}); | ||||||
|         map_cache.insert({object->GetAddr(), object}); |         map_cache.insert({object->GetCacheAddr(), object}); | ||||||
|         rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), 1); |         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), 1); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Unregisters an object from the cache
 |     /// Unregisters an object from the cache
 | ||||||
|     void Unregister(const T& object) { |     void Unregister(const T& object) { | ||||||
|         object->SetIsRegistered(false); |         std::lock_guard<std::recursive_mutex> lock{mutex}; | ||||||
|         rasterizer.UpdatePagesCachedCount(object->GetAddr(), object->GetSizeInBytes(), -1); |  | ||||||
|         // Only flush if use_accurate_gpu_emulation is enabled, as it incurs a performance hit
 |  | ||||||
|         if (Settings::values.use_accurate_gpu_emulation) { |  | ||||||
|             FlushObject(object); |  | ||||||
|         } |  | ||||||
| 
 | 
 | ||||||
|  |         object->SetIsRegistered(false); | ||||||
|  |         rasterizer.UpdatePagesCachedCount(object->GetCpuAddr(), object->GetSizeInBytes(), -1); | ||||||
|         interval_cache.subtract({GetInterval(object), ObjectSet{object}}); |         interval_cache.subtract({GetInterval(object), ObjectSet{object}}); | ||||||
|         map_cache.erase(object->GetAddr()); |         map_cache.erase(object->GetCacheAddr()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Returns a ticks counter used for tracking when cached objects were last modified
 |     /// Returns a ticks counter used for tracking when cached objects were last modified
 | ||||||
|     u64 GetModifiedTicks() { |     u64 GetModifiedTicks() { | ||||||
|  |         std::lock_guard<std::recursive_mutex> lock{mutex}; | ||||||
|  | 
 | ||||||
|         return ++modified_ticks; |         return ++modified_ticks; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Flushes the specified object, updating appropriate cache state as needed
 |     /// Flushes the specified object, updating appropriate cache state as needed
 | ||||||
|     void FlushObject(const T& object) { |     void FlushObject(const T& object) { | ||||||
|  |         std::lock_guard<std::recursive_mutex> lock{mutex}; | ||||||
|  | 
 | ||||||
|         if (!object->IsDirty()) { |         if (!object->IsDirty()) { | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  | @ -140,7 +171,7 @@ protected: | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     /// Returns a list of cached objects from the specified memory region, ordered by access time
 |     /// Returns a list of cached objects from the specified memory region, ordered by access time
 | ||||||
|     std::vector<T> GetSortedObjectsFromRegion(VAddr addr, u64 size) { |     std::vector<T> GetSortedObjectsFromRegion(CacheAddr addr, u64 size) { | ||||||
|         if (size == 0) { |         if (size == 0) { | ||||||
|             return {}; |             return {}; | ||||||
|         } |         } | ||||||
|  | @ -164,17 +195,18 @@ private: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     using ObjectSet = std::set<T>; |     using ObjectSet = std::set<T>; | ||||||
|     using ObjectCache = std::unordered_map<VAddr, T>; |     using ObjectCache = std::unordered_map<CacheAddr, T>; | ||||||
|     using IntervalCache = boost::icl::interval_map<VAddr, ObjectSet>; |     using IntervalCache = boost::icl::interval_map<CacheAddr, ObjectSet>; | ||||||
|     using ObjectInterval = typename IntervalCache::interval_type; |     using ObjectInterval = typename IntervalCache::interval_type; | ||||||
| 
 | 
 | ||||||
|     static auto GetInterval(const T& object) { |     static auto GetInterval(const T& object) { | ||||||
|         return ObjectInterval::right_open(object->GetAddr(), |         return ObjectInterval::right_open(object->GetCacheAddr(), | ||||||
|                                           object->GetAddr() + object->GetSizeInBytes()); |                                           object->GetCacheAddr() + object->GetSizeInBytes()); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     ObjectCache map_cache; |     ObjectCache map_cache; | ||||||
|     IntervalCache interval_cache; ///< Cache of objects
 |     IntervalCache interval_cache; ///< Cache of objects
 | ||||||
|     u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
 |     u64 modified_ticks{};         ///< Counter of cache state ticks, used for in-order flushing
 | ||||||
|     VideoCore::RasterizerInterface& rasterizer; |     VideoCore::RasterizerInterface& rasterizer; | ||||||
|  |     std::recursive_mutex mutex; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -35,14 +35,14 @@ public: | ||||||
|     virtual void FlushAll() = 0; |     virtual void FlushAll() = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     virtual void FlushRegion(VAddr addr, u64 size) = 0; |     virtual void FlushRegion(CacheAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be invalidated
 |     /// Notify rasterizer that any caches of the specified region should be invalidated
 | ||||||
|     virtual void InvalidateRegion(VAddr addr, u64 size) = 0; |     virtual void InvalidateRegion(CacheAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 |     /// Notify rasterizer that any caches of the specified region should be flushed to Switch memory
 | ||||||
|     /// and invalidated
 |     /// and invalidated
 | ||||||
|     virtual void FlushAndInvalidateRegion(VAddr addr, u64 size) = 0; |     virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; | ||||||
| 
 | 
 | ||||||
|     /// Attempt to use a faster method to perform a surface copy
 |     /// Attempt to use a faster method to perform a surface copy
 | ||||||
|     virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |     virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||||||
|  | @ -63,7 +63,7 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Increase/decrease the number of object in pages touching the specified region
 |     /// Increase/decrease the number of object in pages touching the specified region
 | ||||||
|     virtual void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) {} |     virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {} | ||||||
| 
 | 
 | ||||||
|     /// Initialize disk cached resources for the game being emulated
 |     /// Initialize disk cached resources for the game being emulated
 | ||||||
|     virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, |     virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false, | ||||||
|  |  | ||||||
|  | @ -13,6 +13,11 @@ | ||||||
| 
 | 
 | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
| 
 | 
 | ||||||
|  | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | ||||||
|  |                                      std::size_t alignment, u8* host_ptr) | ||||||
|  |     : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{ | ||||||
|  |                                                                                 host_ptr} {} | ||||||
|  | 
 | ||||||
| OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) | ||||||
|     : RasterizerCache{rasterizer}, stream_buffer(size, true) {} |     : RasterizerCache{rasterizer}, stream_buffer(size, true) {} | ||||||
| 
 | 
 | ||||||
|  | @ -26,11 +31,12 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size | ||||||
|     // TODO: Figure out which size is the best for given games.
 |     // TODO: Figure out which size is the best for given games.
 | ||||||
|     cache &= size >= 2048; |     cache &= size >= 2048; | ||||||
| 
 | 
 | ||||||
|  |     const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; | ||||||
|     if (cache) { |     if (cache) { | ||||||
|         auto entry = TryGet(*cpu_addr); |         auto entry = TryGet(host_ptr); | ||||||
|         if (entry) { |         if (entry) { | ||||||
|             if (entry->size >= size && entry->alignment == alignment) { |             if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||||||
|                 return entry->offset; |                 return entry->GetOffset(); | ||||||
|             } |             } | ||||||
|             Unregister(entry); |             Unregister(entry); | ||||||
|         } |         } | ||||||
|  | @ -39,17 +45,17 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size | ||||||
|     AlignBuffer(alignment); |     AlignBuffer(alignment); | ||||||
|     const GLintptr uploaded_offset = buffer_offset; |     const GLintptr uploaded_offset = buffer_offset; | ||||||
| 
 | 
 | ||||||
|     Memory::ReadBlock(*cpu_addr, buffer_ptr, size); |     if (!host_ptr) { | ||||||
|  |         return uploaded_offset; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|  |     std::memcpy(buffer_ptr, host_ptr, size); | ||||||
|     buffer_ptr += size; |     buffer_ptr += size; | ||||||
|     buffer_offset += size; |     buffer_offset += size; | ||||||
| 
 | 
 | ||||||
|     if (cache) { |     if (cache) { | ||||||
|         auto entry = std::make_shared<CachedBufferEntry>(); |         auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, | ||||||
|         entry->offset = uploaded_offset; |                                                          alignment, host_ptr); | ||||||
|         entry->size = size; |  | ||||||
|         entry->alignment = alignment; |  | ||||||
|         entry->addr = *cpu_addr; |  | ||||||
|         Register(entry); |         Register(entry); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -17,22 +17,39 @@ namespace OpenGL { | ||||||
| 
 | 
 | ||||||
| class RasterizerOpenGL; | class RasterizerOpenGL; | ||||||
| 
 | 
 | ||||||
| struct CachedBufferEntry final : public RasterizerCacheObject { | class CachedBufferEntry final : public RasterizerCacheObject { | ||||||
|     VAddr GetAddr() const override { | public: | ||||||
|         return addr; |     explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, | ||||||
|  |                                std::size_t alignment, u8* host_ptr); | ||||||
|  | 
 | ||||||
|  |     VAddr GetCpuAddr() const override { | ||||||
|  |         return cpu_addr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     std::size_t GetSizeInBytes() const override { |     std::size_t GetSizeInBytes() const override { | ||||||
|         return size; |         return size; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     std::size_t GetSize() const { | ||||||
|  |         return size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     GLintptr GetOffset() const { | ||||||
|  |         return offset; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::size_t GetAlignment() const { | ||||||
|  |         return alignment; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     // We do not have to flush this cache as things in it are never modified by us.
 |     // We do not have to flush this cache as things in it are never modified by us.
 | ||||||
|     void Flush() override {} |     void Flush() override {} | ||||||
| 
 | 
 | ||||||
|     VAddr addr; | private: | ||||||
|     std::size_t size; |     VAddr cpu_addr{}; | ||||||
|     GLintptr offset; |     std::size_t size{}; | ||||||
|     std::size_t alignment; |     GLintptr offset{}; | ||||||
|  |     std::size_t alignment{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | ||||||
|  |  | ||||||
|  | @ -15,12 +15,13 @@ | ||||||
| 
 | 
 | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
| 
 | 
 | ||||||
| CachedGlobalRegion::CachedGlobalRegion(VAddr addr, u32 size) : addr{addr}, size{size} { | CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr) | ||||||
|  |     : cpu_addr{cpu_addr}, size{size}, RasterizerCacheObject{host_ptr} { | ||||||
|     buffer.Create(); |     buffer.Create(); | ||||||
|     // Bind and unbind the buffer so it gets allocated by the driver
 |     // Bind and unbind the buffer so it gets allocated by the driver
 | ||||||
|     glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); |     glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); | ||||||
|     glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); |     glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); | ||||||
|     LabelGLObject(GL_BUFFER, buffer.handle, addr, "GlobalMemory"); |     LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void CachedGlobalRegion::Reload(u32 size_) { | void CachedGlobalRegion::Reload(u32 size_) { | ||||||
|  | @ -35,7 +36,7 @@ void CachedGlobalRegion::Reload(u32 size_) { | ||||||
| 
 | 
 | ||||||
|     // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
 |     // TODO(Rodrigo): Get rid of Memory::GetPointer with a staging buffer
 | ||||||
|     glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); |     glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer.handle); | ||||||
|     glBufferData(GL_SHADER_STORAGE_BUFFER, size, Memory::GetPointer(addr), GL_DYNAMIC_DRAW); |     glBufferData(GL_SHADER_STORAGE_BUFFER, size, GetHostPtr(), GL_DYNAMIC_DRAW); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { | GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 size) const { | ||||||
|  | @ -46,11 +47,11 @@ GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(VAddr addr, u32 | ||||||
|     return search->second; |     return search->second; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size) { | GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr) { | ||||||
|     GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; |     GlobalRegion region{TryGetReservedGlobalRegion(addr, size)}; | ||||||
|     if (!region) { |     if (!region) { | ||||||
|         // No reserved surface available, create a new one and reserve it
 |         // No reserved surface available, create a new one and reserve it
 | ||||||
|         region = std::make_shared<CachedGlobalRegion>(addr, size); |         region = std::make_shared<CachedGlobalRegion>(addr, size, host_ptr); | ||||||
|         ReserveGlobalRegion(region); |         ReserveGlobalRegion(region); | ||||||
|     } |     } | ||||||
|     region->Reload(size); |     region->Reload(size); | ||||||
|  | @ -58,7 +59,7 @@ GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(VAddr addr, u32 si | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { | ||||||
|     reserve.insert_or_assign(region->GetAddr(), std::move(region)); |     reserve.insert_or_assign(region->GetCpuAddr(), std::move(region)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) | ||||||
|  | @ -80,11 +81,12 @@ GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( | ||||||
|     ASSERT(actual_addr); |     ASSERT(actual_addr); | ||||||
| 
 | 
 | ||||||
|     // Look up global region in the cache based on address
 |     // Look up global region in the cache based on address
 | ||||||
|     GlobalRegion region = TryGet(*actual_addr); |     const auto& host_ptr{Memory::GetPointer(*actual_addr)}; | ||||||
|  |     GlobalRegion region{TryGet(host_ptr)}; | ||||||
| 
 | 
 | ||||||
|     if (!region) { |     if (!region) { | ||||||
|         // No global region found - create a new one
 |         // No global region found - create a new one
 | ||||||
|         region = GetUncachedGlobalRegion(*actual_addr, size); |         region = GetUncachedGlobalRegion(*actual_addr, size, host_ptr); | ||||||
|         Register(region); |         Register(region); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -27,14 +27,12 @@ using GlobalRegion = std::shared_ptr<CachedGlobalRegion>; | ||||||
| 
 | 
 | ||||||
| class CachedGlobalRegion final : public RasterizerCacheObject { | class CachedGlobalRegion final : public RasterizerCacheObject { | ||||||
| public: | public: | ||||||
|     explicit CachedGlobalRegion(VAddr addr, u32 size); |     explicit CachedGlobalRegion(VAddr cpu_addr, u32 size, u8* host_ptr); | ||||||
| 
 | 
 | ||||||
|     /// Gets the address of the shader in guest memory, required for cache management
 |     VAddr GetCpuAddr() const override { | ||||||
|     VAddr GetAddr() const override { |         return cpu_addr; | ||||||
|         return addr; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /// Gets the size of the shader in guest memory, required for cache management
 |  | ||||||
|     std::size_t GetSizeInBytes() const override { |     std::size_t GetSizeInBytes() const override { | ||||||
|         return size; |         return size; | ||||||
|     } |     } | ||||||
|  | @ -53,9 +51,8 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     VAddr addr{}; |     VAddr cpu_addr{}; | ||||||
|     u32 size{}; |     u32 size{}; | ||||||
| 
 |  | ||||||
|     OGLBuffer buffer; |     OGLBuffer buffer; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | @ -69,7 +66,7 @@ public: | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; |     GlobalRegion TryGetReservedGlobalRegion(VAddr addr, u32 size) const; | ||||||
|     GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size); |     GlobalRegion GetUncachedGlobalRegion(VAddr addr, u32 size, u8* host_ptr); | ||||||
|     void ReserveGlobalRegion(GlobalRegion region); |     void ReserveGlobalRegion(GlobalRegion region); | ||||||
| 
 | 
 | ||||||
|     std::unordered_map<VAddr, GlobalRegion> reserve; |     std::unordered_map<VAddr, GlobalRegion> reserve; | ||||||
|  |  | ||||||
|  | @ -449,7 +449,7 @@ static constexpr auto RangeFromInterval(Map& map, const Interval& interval) { | ||||||
|     return boost::make_iterator_range(map.equal_range(interval)); |     return boost::make_iterator_range(map.equal_range(interval)); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) { | void RasterizerOpenGL::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { | ||||||
|     const u64 page_start{addr >> Memory::PAGE_BITS}; |     const u64 page_start{addr >> Memory::PAGE_BITS}; | ||||||
|     const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; |     const u64 page_end{(addr + size + Memory::PAGE_SIZE - 1) >> Memory::PAGE_BITS}; | ||||||
| 
 | 
 | ||||||
|  | @ -747,12 +747,12 @@ void RasterizerOpenGL::DrawArrays() { | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::FlushAll() {} | void RasterizerOpenGL::FlushAll() {} | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) { | void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
|     res_cache.FlushRegion(addr, size); |     res_cache.FlushRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
|     res_cache.InvalidateRegion(addr, size); |     res_cache.InvalidateRegion(addr, size); | ||||||
|     shader_cache.InvalidateRegion(addr, size); |     shader_cache.InvalidateRegion(addr, size); | ||||||
|  | @ -760,7 +760,7 @@ void RasterizerOpenGL::InvalidateRegion(VAddr addr, u64 size) { | ||||||
|     buffer_cache.InvalidateRegion(addr, size); |     buffer_cache.InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { | void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { | ||||||
|     FlushRegion(addr, size); |     FlushRegion(addr, size); | ||||||
|     InvalidateRegion(addr, size); |     InvalidateRegion(addr, size); | ||||||
| } | } | ||||||
|  | @ -782,7 +782,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
| 
 | 
 | ||||||
|     MICROPROFILE_SCOPE(OpenGL_CacheManagement); |     MICROPROFILE_SCOPE(OpenGL_CacheManagement); | ||||||
| 
 | 
 | ||||||
|     const auto& surface{res_cache.TryFindFramebufferSurface(framebuffer_addr)}; |     const auto& surface{res_cache.TryFindFramebufferSurface(Memory::GetPointer(framebuffer_addr))}; | ||||||
|     if (!surface) { |     if (!surface) { | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -57,9 +57,9 @@ public: | ||||||
|     void DrawArrays() override; |     void DrawArrays() override; | ||||||
|     void Clear() override; |     void Clear() override; | ||||||
|     void FlushAll() override; |     void FlushAll() override; | ||||||
|     void FlushRegion(VAddr addr, u64 size) override; |     void FlushRegion(CacheAddr addr, u64 size) override; | ||||||
|     void InvalidateRegion(VAddr addr, u64 size) override; |     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||||
|     void FlushAndInvalidateRegion(VAddr addr, u64 size) override; |     void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; | ||||||
|     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, |     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, | ||||||
|                                const Tegra::Engines::Fermi2D::Regs::Surface& dst, |                                const Tegra::Engines::Fermi2D::Regs::Surface& dst, | ||||||
|                                const Common::Rectangle<u32>& src_rect, |                                const Common::Rectangle<u32>& src_rect, | ||||||
|  | @ -67,7 +67,7 @@ public: | ||||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||||
|                            u32 pixel_stride) override; |                            u32 pixel_stride) override; | ||||||
|     bool AccelerateDrawBatch(bool is_indexed) override; |     bool AccelerateDrawBatch(bool is_indexed) override; | ||||||
|     void UpdatePagesCachedCount(Tegra::GPUVAddr addr, u64 size, int delta) override; |     void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; | ||||||
|     void LoadDiskResources(const std::atomic_bool& stop_loading, |     void LoadDiskResources(const std::atomic_bool& stop_loading, | ||||||
|                            const VideoCore::DiskResourceLoadCallback& callback) override; |                            const VideoCore::DiskResourceLoadCallback& callback) override; | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -61,6 +61,7 @@ void SurfaceParams::InitCacheParameters(Tegra::GPUVAddr gpu_addr_) { | ||||||
| 
 | 
 | ||||||
|     addr = cpu_addr ? *cpu_addr : 0; |     addr = cpu_addr ? *cpu_addr : 0; | ||||||
|     gpu_addr = gpu_addr_; |     gpu_addr = gpu_addr_; | ||||||
|  |     host_ptr = Memory::GetPointer(addr); | ||||||
|     size_in_bytes = SizeInBytesRaw(); |     size_in_bytes = SizeInBytesRaw(); | ||||||
| 
 | 
 | ||||||
|     if (IsPixelFormatASTC(pixel_format)) { |     if (IsPixelFormatASTC(pixel_format)) { | ||||||
|  | @ -563,8 +564,8 @@ void RasterizerCacheOpenGL::CopySurface(const Surface& src_surface, const Surfac | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| CachedSurface::CachedSurface(const SurfaceParams& params) | CachedSurface::CachedSurface(const SurfaceParams& params) | ||||||
|     : params(params), gl_target(SurfaceTargetToGL(params.target)), |     : params{params}, gl_target{SurfaceTargetToGL(params.target)}, | ||||||
|       cached_size_in_bytes(params.size_in_bytes) { |       cached_size_in_bytes{params.size_in_bytes}, RasterizerCacheObject{params.host_ptr} { | ||||||
|     texture.Create(gl_target); |     texture.Create(gl_target); | ||||||
| 
 | 
 | ||||||
|     // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
 |     // TODO(Rodrigo): Using params.GetRect() returns a different size than using its Mip*(0)
 | ||||||
|  | @ -633,10 +634,9 @@ void CachedSurface::LoadGLBuffer() { | ||||||
|         const u32 bpp = params.GetFormatBpp() / 8; |         const u32 bpp = params.GetFormatBpp() / 8; | ||||||
|         const u32 copy_size = params.width * bpp; |         const u32 copy_size = params.width * bpp; | ||||||
|         if (params.pitch == copy_size) { |         if (params.pitch == copy_size) { | ||||||
|             std::memcpy(gl_buffer[0].data(), Memory::GetPointer(params.addr), |             std::memcpy(gl_buffer[0].data(), params.host_ptr, params.size_in_bytes_gl); | ||||||
|                         params.size_in_bytes_gl); |  | ||||||
|         } else { |         } else { | ||||||
|             const u8* start = Memory::GetPointer(params.addr); |             const u8* start{params.host_ptr}; | ||||||
|             u8* write_to = gl_buffer[0].data(); |             u8* write_to = gl_buffer[0].data(); | ||||||
|             for (u32 h = params.height; h > 0; h--) { |             for (u32 h = params.height; h > 0; h--) { | ||||||
|                 std::memcpy(write_to, start, copy_size); |                 std::memcpy(write_to, start, copy_size); | ||||||
|  | @ -680,8 +680,6 @@ void CachedSurface::FlushGLBuffer() { | ||||||
|     glPixelStorei(GL_PACK_ROW_LENGTH, 0); |     glPixelStorei(GL_PACK_ROW_LENGTH, 0); | ||||||
|     Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, |     Tegra::Texture::ConvertFromHostToGuest(gl_buffer[0].data(), params.pixel_format, params.width, | ||||||
|                                            params.height, params.depth, true, true); |                                            params.height, params.depth, true, true); | ||||||
|     const u8* const texture_src_data = Memory::GetPointer(params.addr); |  | ||||||
|     ASSERT(texture_src_data); |  | ||||||
|     if (params.is_tiled) { |     if (params.is_tiled) { | ||||||
|         ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", |         ASSERT_MSG(params.block_width == 1, "Block width is defined as {} on texture type {}", | ||||||
|                    params.block_width, static_cast<u32>(params.target)); |                    params.block_width, static_cast<u32>(params.target)); | ||||||
|  | @ -691,9 +689,9 @@ void CachedSurface::FlushGLBuffer() { | ||||||
|         const u32 bpp = params.GetFormatBpp() / 8; |         const u32 bpp = params.GetFormatBpp() / 8; | ||||||
|         const u32 copy_size = params.width * bpp; |         const u32 copy_size = params.width * bpp; | ||||||
|         if (params.pitch == copy_size) { |         if (params.pitch == copy_size) { | ||||||
|             std::memcpy(Memory::GetPointer(params.addr), gl_buffer[0].data(), GetSizeInBytes()); |             std::memcpy(params.host_ptr, gl_buffer[0].data(), GetSizeInBytes()); | ||||||
|         } else { |         } else { | ||||||
|             u8* start = Memory::GetPointer(params.addr); |             u8* start{params.host_ptr}; | ||||||
|             const u8* read_to = gl_buffer[0].data(); |             const u8* read_to = gl_buffer[0].data(); | ||||||
|             for (u32 h = params.height; h > 0; h--) { |             for (u32 h = params.height; h > 0; h--) { | ||||||
|                 std::memcpy(start, read_to, copy_size); |                 std::memcpy(start, read_to, copy_size); | ||||||
|  | @ -932,7 +930,7 @@ Surface RasterizerCacheOpenGL::GetSurface(const SurfaceParams& params, bool pres | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Look up surface in the cache based on address
 |     // Look up surface in the cache based on address
 | ||||||
|     Surface surface{TryGet(params.addr)}; |     Surface surface{TryGet(params.host_ptr)}; | ||||||
|     if (surface) { |     if (surface) { | ||||||
|         if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { |         if (surface->GetSurfaceParams().IsCompatibleSurface(params)) { | ||||||
|             // Use the cached surface as-is unless it's not synced with memory
 |             // Use the cached surface as-is unless it's not synced with memory
 | ||||||
|  | @ -986,7 +984,7 @@ void RasterizerCacheOpenGL::FastLayeredCopySurface(const Surface& src_surface, | ||||||
|     for (u32 layer = 0; layer < dst_params.depth; layer++) { |     for (u32 layer = 0; layer < dst_params.depth; layer++) { | ||||||
|         for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { |         for (u32 mipmap = 0; mipmap < dst_params.max_mip_level; mipmap++) { | ||||||
|             const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); |             const VAddr sub_address = address + dst_params.GetMipmapLevelOffset(mipmap); | ||||||
|             const Surface& copy = TryGet(sub_address); |             const Surface& copy = TryGet(Memory::GetPointer(sub_address)); | ||||||
|             if (!copy) |             if (!copy) | ||||||
|                 continue; |                 continue; | ||||||
|             const auto& src_params{copy->GetSurfaceParams()}; |             const auto& src_params{copy->GetSurfaceParams()}; | ||||||
|  | @ -1163,7 +1161,8 @@ void RasterizerCacheOpenGL::AccurateCopySurface(const Surface& src_surface, | ||||||
|     const auto& dst_params{dst_surface->GetSurfaceParams()}; |     const auto& dst_params{dst_surface->GetSurfaceParams()}; | ||||||
| 
 | 
 | ||||||
|     // Flush enough memory for both the source and destination surface
 |     // Flush enough memory for both the source and destination surface
 | ||||||
|     FlushRegion(src_params.addr, std::max(src_params.MemorySize(), dst_params.MemorySize())); |     FlushRegion(ToCacheAddr(src_params.host_ptr), | ||||||
|  |                 std::max(src_params.MemorySize(), dst_params.MemorySize())); | ||||||
| 
 | 
 | ||||||
|     LoadSurface(dst_surface); |     LoadSurface(dst_surface); | ||||||
| } | } | ||||||
|  | @ -1215,8 +1214,8 @@ Surface RasterizerCacheOpenGL::RecreateSurface(const Surface& old_surface, | ||||||
|     return new_surface; |     return new_surface; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(VAddr addr) const { | Surface RasterizerCacheOpenGL::TryFindFramebufferSurface(const u8* host_ptr) const { | ||||||
|     return TryGet(addr); |     return TryGet(host_ptr); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { | void RasterizerCacheOpenGL::ReserveSurface(const Surface& surface) { | ||||||
|  | @ -1267,7 +1266,7 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa | ||||||
|             src_params.height == dst_params.MipHeight(*level) && |             src_params.height == dst_params.MipHeight(*level) && | ||||||
|             src_params.block_height >= dst_params.MipBlockHeight(*level)) { |             src_params.block_height >= dst_params.MipBlockHeight(*level)) { | ||||||
|             const std::optional<u32> slot = |             const std::optional<u32> slot = | ||||||
|                 TryFindBestLayer(render_surface->GetAddr(), dst_params, *level); |                 TryFindBestLayer(render_surface->GetCpuAddr(), dst_params, *level); | ||||||
|             if (slot.has_value()) { |             if (slot.has_value()) { | ||||||
|                 glCopyImageSubData(render_surface->Texture().handle, |                 glCopyImageSubData(render_surface->Texture().handle, | ||||||
|                                    SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, |                                    SurfaceTargetToGL(src_params.target), 0, 0, 0, 0, | ||||||
|  | @ -1283,8 +1282,8 @@ static bool LayerFitReinterpretSurface(RasterizerCacheOpenGL& cache, const Surfa | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { | static bool IsReinterpretInvalid(const Surface render_surface, const Surface blitted_surface) { | ||||||
|     const VAddr bound1 = blitted_surface->GetAddr() + blitted_surface->GetMemorySize(); |     const VAddr bound1 = blitted_surface->GetCpuAddr() + blitted_surface->GetMemorySize(); | ||||||
|     const VAddr bound2 = render_surface->GetAddr() + render_surface->GetMemorySize(); |     const VAddr bound2 = render_surface->GetCpuAddr() + render_surface->GetMemorySize(); | ||||||
|     if (bound2 > bound1) |     if (bound2 > bound1) | ||||||
|         return true; |         return true; | ||||||
|     const auto& dst_params = blitted_surface->GetSurfaceParams(); |     const auto& dst_params = blitted_surface->GetSurfaceParams(); | ||||||
|  | @ -1327,7 +1326,8 @@ void RasterizerCacheOpenGL::SignalPreDrawCall() { | ||||||
| void RasterizerCacheOpenGL::SignalPostDrawCall() { | void RasterizerCacheOpenGL::SignalPostDrawCall() { | ||||||
|     for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { |     for (u32 i = 0; i < Maxwell::NumRenderTargets; i++) { | ||||||
|         if (current_color_buffers[i] != nullptr) { |         if (current_color_buffers[i] != nullptr) { | ||||||
|             Surface intersect = CollideOnReinterpretedSurface(current_color_buffers[i]->GetAddr()); |             Surface intersect = | ||||||
|  |                 CollideOnReinterpretedSurface(current_color_buffers[i]->GetCacheAddr()); | ||||||
|             if (intersect != nullptr) { |             if (intersect != nullptr) { | ||||||
|                 PartialReinterpretSurface(current_color_buffers[i], intersect); |                 PartialReinterpretSurface(current_color_buffers[i], intersect); | ||||||
|                 texception = true; |                 texception = true; | ||||||
|  |  | ||||||
|  | @ -297,6 +297,7 @@ struct SurfaceParams { | ||||||
|     bool srgb_conversion; |     bool srgb_conversion; | ||||||
|     // Parameters used for caching
 |     // Parameters used for caching
 | ||||||
|     VAddr addr; |     VAddr addr; | ||||||
|  |     u8* host_ptr; | ||||||
|     Tegra::GPUVAddr gpu_addr; |     Tegra::GPUVAddr gpu_addr; | ||||||
|     std::size_t size_in_bytes; |     std::size_t size_in_bytes; | ||||||
|     std::size_t size_in_bytes_gl; |     std::size_t size_in_bytes_gl; | ||||||
|  | @ -345,9 +346,9 @@ class RasterizerOpenGL; | ||||||
| 
 | 
 | ||||||
| class CachedSurface final : public RasterizerCacheObject { | class CachedSurface final : public RasterizerCacheObject { | ||||||
| public: | public: | ||||||
|     CachedSurface(const SurfaceParams& params); |     explicit CachedSurface(const SurfaceParams& params); | ||||||
| 
 | 
 | ||||||
|     VAddr GetAddr() const override { |     VAddr GetCpuAddr() const override { | ||||||
|         return params.addr; |         return params.addr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  | @ -449,7 +450,7 @@ public: | ||||||
|     Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); |     Surface GetColorBufferSurface(std::size_t index, bool preserve_contents); | ||||||
| 
 | 
 | ||||||
|     /// Tries to find a framebuffer using on the provided CPU address
 |     /// Tries to find a framebuffer using on the provided CPU address
 | ||||||
|     Surface TryFindFramebufferSurface(VAddr addr) const; |     Surface TryFindFramebufferSurface(const u8* host_ptr) const; | ||||||
| 
 | 
 | ||||||
|     /// Copies the contents of one surface to another
 |     /// Copies the contents of one surface to another
 | ||||||
|     void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, |     void FermiCopySurface(const Tegra::Engines::Fermi2D::Regs::Surface& src_config, | ||||||
|  | @ -506,12 +507,12 @@ private: | ||||||
|     std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; |     std::array<Surface, Maxwell::NumRenderTargets> current_color_buffers; | ||||||
|     Surface last_depth_buffer; |     Surface last_depth_buffer; | ||||||
| 
 | 
 | ||||||
|     using SurfaceIntervalCache = boost::icl::interval_map<VAddr, Surface>; |     using SurfaceIntervalCache = boost::icl::interval_map<CacheAddr, Surface>; | ||||||
|     using SurfaceInterval = typename SurfaceIntervalCache::interval_type; |     using SurfaceInterval = typename SurfaceIntervalCache::interval_type; | ||||||
| 
 | 
 | ||||||
|     static auto GetReinterpretInterval(const Surface& object) { |     static auto GetReinterpretInterval(const Surface& object) { | ||||||
|         return SurfaceInterval::right_open(object->GetAddr() + 1, |         return SurfaceInterval::right_open(object->GetCacheAddr() + 1, | ||||||
|                                            object->GetAddr() + object->GetMemorySize() - 1); |                                            object->GetCacheAddr() + object->GetMemorySize() - 1); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
 |     // Reinterpreted surfaces are very fragil as the game may keep rendering into them.
 | ||||||
|  | @ -523,7 +524,7 @@ private: | ||||||
|         reinterpret_surface->MarkReinterpreted(); |         reinterpret_surface->MarkReinterpreted(); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Surface CollideOnReinterpretedSurface(VAddr addr) const { |     Surface CollideOnReinterpretedSurface(CacheAddr addr) const { | ||||||
|         const SurfaceInterval interval{addr}; |         const SurfaceInterval interval{addr}; | ||||||
|         for (auto& pair : |         for (auto& pair : | ||||||
|              boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { |              boost::make_iterator_range(reinterpreted_surfaces.equal_range(interval))) { | ||||||
|  |  | ||||||
|  | @ -42,9 +42,9 @@ VAddr GetShaderAddress(Maxwell::ShaderProgram program) { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| /// Gets the shader program code from memory for the specified address
 | /// Gets the shader program code from memory for the specified address
 | ||||||
| ProgramCode GetShaderCode(VAddr addr) { | ProgramCode GetShaderCode(const u8* host_ptr) { | ||||||
|     ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); |     ProgramCode program_code(VideoCommon::Shader::MAX_PROGRAM_LENGTH); | ||||||
|     Memory::ReadBlock(addr, program_code.data(), program_code.size() * sizeof(u64)); |     std::memcpy(program_code.data(), host_ptr, program_code.size() * sizeof(u64)); | ||||||
|     return program_code; |     return program_code; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | @ -214,12 +214,13 @@ std::set<GLenum> GetSupportedFormats() { | ||||||
| 
 | 
 | ||||||
| } // namespace
 | } // namespace
 | ||||||
| 
 | 
 | ||||||
| CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, | ||||||
|                            ShaderDiskCacheOpenGL& disk_cache, |                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | ||||||
|                            const PrecompiledPrograms& precompiled_programs, |                            const PrecompiledPrograms& precompiled_programs, | ||||||
|                            ProgramCode&& program_code, ProgramCode&& program_code_b) |                            ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr) | ||||||
|     : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, |     : host_ptr{host_ptr}, guest_addr{guest_addr}, unique_identifier{unique_identifier}, | ||||||
|       disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { |       program_type{program_type}, disk_cache{disk_cache}, | ||||||
|  |       precompiled_programs{precompiled_programs}, RasterizerCacheObject{host_ptr} { | ||||||
| 
 | 
 | ||||||
|     const std::size_t code_size = CalculateProgramSize(program_code); |     const std::size_t code_size = CalculateProgramSize(program_code); | ||||||
|     const std::size_t code_size_b = |     const std::size_t code_size_b = | ||||||
|  | @ -243,12 +244,13 @@ CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderPro | ||||||
|     disk_cache.SaveRaw(raw); |     disk_cache.SaveRaw(raw); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| CachedShader::CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, | CachedShader::CachedShader(VAddr guest_addr, u64 unique_identifier, | ||||||
|                            ShaderDiskCacheOpenGL& disk_cache, |                            Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | ||||||
|                            const PrecompiledPrograms& precompiled_programs, |                            const PrecompiledPrograms& precompiled_programs, | ||||||
|                            GLShader::ProgramResult result) |                            GLShader::ProgramResult result, u8* host_ptr) | ||||||
|     : addr{addr}, unique_identifier{unique_identifier}, program_type{program_type}, |     : guest_addr{guest_addr}, unique_identifier{unique_identifier}, program_type{program_type}, | ||||||
|       disk_cache{disk_cache}, precompiled_programs{precompiled_programs} { |       disk_cache{disk_cache}, precompiled_programs{precompiled_programs}, RasterizerCacheObject{ | ||||||
|  |                                                                               host_ptr} { | ||||||
| 
 | 
 | ||||||
|     code = std::move(result.first); |     code = std::move(result.first); | ||||||
|     entries = result.second; |     entries = result.second; | ||||||
|  | @ -271,7 +273,7 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(GLenum primitive | ||||||
|                 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); |                 disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             LabelGLObject(GL_PROGRAM, program->handle, addr); |             LabelGLObject(GL_PROGRAM, program->handle, guest_addr); | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         handle = program->handle; |         handle = program->handle; | ||||||
|  | @ -323,7 +325,7 @@ GLuint CachedShader::LazyGeometryProgram(CachedProgram& target_program, BaseBind | ||||||
|         disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); |         disk_cache.SaveUsage(GetUsage(primitive_mode, base_bindings)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     LabelGLObject(GL_PROGRAM, target_program->handle, addr, debug_name); |     LabelGLObject(GL_PROGRAM, target_program->handle, guest_addr, debug_name); | ||||||
| 
 | 
 | ||||||
|     return target_program->handle; |     return target_program->handle; | ||||||
| }; | }; | ||||||
|  | @ -489,14 +491,17 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | ||||||
|     const VAddr program_addr{GetShaderAddress(program)}; |     const VAddr program_addr{GetShaderAddress(program)}; | ||||||
| 
 | 
 | ||||||
|     // Look up shader in the cache based on address
 |     // Look up shader in the cache based on address
 | ||||||
|     Shader shader{TryGet(program_addr)}; |     const auto& host_ptr{Memory::GetPointer(program_addr)}; | ||||||
|  |     Shader shader{TryGet(host_ptr)}; | ||||||
| 
 | 
 | ||||||
|     if (!shader) { |     if (!shader) { | ||||||
|         // No shader found - create a new one
 |         // No shader found - create a new one
 | ||||||
|         ProgramCode program_code = GetShaderCode(program_addr); |         const auto& host_ptr{Memory::GetPointer(program_addr)}; | ||||||
|  |         ProgramCode program_code{GetShaderCode(host_ptr)}; | ||||||
|         ProgramCode program_code_b; |         ProgramCode program_code_b; | ||||||
|         if (program == Maxwell::ShaderProgram::VertexA) { |         if (program == Maxwell::ShaderProgram::VertexA) { | ||||||
|             program_code_b = GetShaderCode(GetShaderAddress(Maxwell::ShaderProgram::VertexB)); |             program_code_b = GetShaderCode( | ||||||
|  |                 Memory::GetPointer(GetShaderAddress(Maxwell::ShaderProgram::VertexB))); | ||||||
|         } |         } | ||||||
|         const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); |         const u64 unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | ||||||
| 
 | 
 | ||||||
|  | @ -504,11 +509,11 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | ||||||
|         if (found != precompiled_shaders.end()) { |         if (found != precompiled_shaders.end()) { | ||||||
|             shader = |             shader = | ||||||
|                 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, |                 std::make_shared<CachedShader>(program_addr, unique_identifier, program, disk_cache, | ||||||
|                                                precompiled_programs, found->second); |                                                precompiled_programs, found->second, host_ptr); | ||||||
|         } else { |         } else { | ||||||
|             shader = std::make_shared<CachedShader>( |             shader = std::make_shared<CachedShader>( | ||||||
|                 program_addr, unique_identifier, program, disk_cache, precompiled_programs, |                 program_addr, unique_identifier, program, disk_cache, precompiled_programs, | ||||||
|                 std::move(program_code), std::move(program_code_b)); |                 std::move(program_code), std::move(program_code_b), host_ptr); | ||||||
|         } |         } | ||||||
|         Register(shader); |         Register(shader); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -39,18 +39,18 @@ using PrecompiledShaders = std::unordered_map<u64, GLShader::ProgramResult>; | ||||||
| 
 | 
 | ||||||
| class CachedShader final : public RasterizerCacheObject { | class CachedShader final : public RasterizerCacheObject { | ||||||
| public: | public: | ||||||
|     explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, |     explicit CachedShader(VAddr guest_addr, u64 unique_identifier, | ||||||
|                           ShaderDiskCacheOpenGL& disk_cache, |                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | ||||||
|                           const PrecompiledPrograms& precompiled_programs, |                           const PrecompiledPrograms& precompiled_programs, | ||||||
|                           ProgramCode&& program_code, ProgramCode&& program_code_b); |                           ProgramCode&& program_code, ProgramCode&& program_code_b, u8* host_ptr); | ||||||
| 
 | 
 | ||||||
|     explicit CachedShader(VAddr addr, u64 unique_identifier, Maxwell::ShaderProgram program_type, |     explicit CachedShader(VAddr guest_addr, u64 unique_identifier, | ||||||
|                           ShaderDiskCacheOpenGL& disk_cache, |                           Maxwell::ShaderProgram program_type, ShaderDiskCacheOpenGL& disk_cache, | ||||||
|                           const PrecompiledPrograms& precompiled_programs, |                           const PrecompiledPrograms& precompiled_programs, | ||||||
|                           GLShader::ProgramResult result); |                           GLShader::ProgramResult result, u8* host_ptr); | ||||||
| 
 | 
 | ||||||
|     VAddr GetAddr() const override { |     VAddr GetCpuAddr() const override { | ||||||
|         return addr; |         return guest_addr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     std::size_t GetSizeInBytes() const override { |     std::size_t GetSizeInBytes() const override { | ||||||
|  | @ -91,7 +91,8 @@ private: | ||||||
| 
 | 
 | ||||||
|     ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; |     ShaderDiskCacheUsage GetUsage(GLenum primitive_mode, BaseBindings base_bindings) const; | ||||||
| 
 | 
 | ||||||
|     VAddr addr{}; |     u8* host_ptr{}; | ||||||
|  |     VAddr guest_addr{}; | ||||||
|     u64 unique_identifier{}; |     u64 unique_identifier{}; | ||||||
|     Maxwell::ShaderProgram program_type{}; |     Maxwell::ShaderProgram program_type{}; | ||||||
|     ShaderDiskCacheOpenGL& disk_cache; |     ShaderDiskCacheOpenGL& disk_cache; | ||||||
|  |  | ||||||
|  | @ -17,6 +17,11 @@ | ||||||
| 
 | 
 | ||||||
| namespace Vulkan { | namespace Vulkan { | ||||||
| 
 | 
 | ||||||
|  | CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, | ||||||
|  |                                      std::size_t alignment, u8* host_ptr) | ||||||
|  |     : cpu_addr{cpu_addr}, size{size}, offset{offset}, alignment{alignment}, RasterizerCacheObject{ | ||||||
|  |                                                                                 host_ptr} {} | ||||||
|  | 
 | ||||||
| VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, | VKBufferCache::VKBufferCache(Tegra::MemoryManager& tegra_memory_manager, | ||||||
|                              VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, |                              VideoCore::RasterizerInterface& rasterizer, const VKDevice& device, | ||||||
|                              VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) |                              VKMemoryManager& memory_manager, VKScheduler& scheduler, u64 size) | ||||||
|  | @ -37,16 +42,18 @@ VKBufferCache::~VKBufferCache() = default; | ||||||
| u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment, | u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 alignment, | ||||||
|                                 bool cache) { |                                 bool cache) { | ||||||
|     const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; |     const auto cpu_addr{tegra_memory_manager.GpuToCpuAddress(gpu_addr)}; | ||||||
|     ASSERT(cpu_addr); |     ASSERT_MSG(cpu_addr, "Invalid GPU address"); | ||||||
| 
 | 
 | ||||||
|     // Cache management is a big overhead, so only cache entries with a given size.
 |     // Cache management is a big overhead, so only cache entries with a given size.
 | ||||||
|     // TODO: Figure out which size is the best for given games.
 |     // TODO: Figure out which size is the best for given games.
 | ||||||
|     cache &= size >= 2048; |     cache &= size >= 2048; | ||||||
| 
 | 
 | ||||||
|  |     const auto& host_ptr{Memory::GetPointer(*cpu_addr)}; | ||||||
|     if (cache) { |     if (cache) { | ||||||
|         if (auto entry = TryGet(*cpu_addr); entry) { |         auto entry = TryGet(host_ptr); | ||||||
|             if (entry->size >= size && entry->alignment == alignment) { |         if (entry) { | ||||||
|                 return entry->offset; |             if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { | ||||||
|  |                 return entry->GetOffset(); | ||||||
|             } |             } | ||||||
|             Unregister(entry); |             Unregister(entry); | ||||||
|         } |         } | ||||||
|  | @ -55,17 +62,17 @@ u64 VKBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, u64 | ||||||
|     AlignBuffer(alignment); |     AlignBuffer(alignment); | ||||||
|     const u64 uploaded_offset = buffer_offset; |     const u64 uploaded_offset = buffer_offset; | ||||||
| 
 | 
 | ||||||
|     Memory::ReadBlock(*cpu_addr, buffer_ptr, size); |     if (!host_ptr) { | ||||||
|  |         return uploaded_offset; | ||||||
|  |     } | ||||||
| 
 | 
 | ||||||
|  |     std::memcpy(buffer_ptr, host_ptr, size); | ||||||
|     buffer_ptr += size; |     buffer_ptr += size; | ||||||
|     buffer_offset += size; |     buffer_offset += size; | ||||||
| 
 | 
 | ||||||
|     if (cache) { |     if (cache) { | ||||||
|         auto entry = std::make_shared<CachedBufferEntry>(); |         auto entry = std::make_shared<CachedBufferEntry>(*cpu_addr, size, uploaded_offset, | ||||||
|         entry->offset = uploaded_offset; |                                                          alignment, host_ptr); | ||||||
|         entry->size = size; |  | ||||||
|         entry->alignment = alignment; |  | ||||||
|         entry->addr = *cpu_addr; |  | ||||||
|         Register(entry); |         Register(entry); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -24,22 +24,39 @@ class VKFence; | ||||||
| class VKMemoryManager; | class VKMemoryManager; | ||||||
| class VKStreamBuffer; | class VKStreamBuffer; | ||||||
| 
 | 
 | ||||||
| struct CachedBufferEntry final : public RasterizerCacheObject { | class CachedBufferEntry final : public RasterizerCacheObject { | ||||||
|     VAddr GetAddr() const override { | public: | ||||||
|         return addr; |     explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, u64 offset, std::size_t alignment, | ||||||
|  |                                u8* host_ptr); | ||||||
|  | 
 | ||||||
|  |     VAddr GetCpuAddr() const override { | ||||||
|  |         return cpu_addr; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     std::size_t GetSizeInBytes() const override { |     std::size_t GetSizeInBytes() const override { | ||||||
|         return size; |         return size; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     std::size_t GetSize() const { | ||||||
|  |         return size; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     u64 GetOffset() const { | ||||||
|  |         return offset; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::size_t GetAlignment() const { | ||||||
|  |         return alignment; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     // We do not have to flush this cache as things in it are never modified by us.
 |     // We do not have to flush this cache as things in it are never modified by us.
 | ||||||
|     void Flush() override {} |     void Flush() override {} | ||||||
| 
 | 
 | ||||||
|     VAddr addr; | private: | ||||||
|     std::size_t size; |     VAddr cpu_addr{}; | ||||||
|     u64 offset; |     std::size_t size{}; | ||||||
|     std::size_t alignment; |     u64 offset{}; | ||||||
|  |     std::size_t alignment{}; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | class VKBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> { | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei