forked from eden-emu/eden
		
	accelerateDMA: Accelerate Buffer Copies.
This commit is contained in:
		
							parent
							
								
									907b2324d3
								
							
						
					
					
						commit
						be1a3f7a0f
					
				
					 9 changed files with 176 additions and 13 deletions
				
			
		|  | @ -164,6 +164,8 @@ public: | ||||||
|     /// Pop asynchronous downloads
 |     /// Pop asynchronous downloads
 | ||||||
|     void PopAsyncFlushes(); |     void PopAsyncFlushes(); | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] bool DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount); | ||||||
|  | 
 | ||||||
|     /// Return true when a CPU region is modified from the GPU
 |     /// Return true when a CPU region is modified from the GPU
 | ||||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||||||
| 
 | 
 | ||||||
|  | @ -430,6 +432,83 @@ void BufferCache<P>::DownloadMemory(VAddr cpu_addr, u64 size) { | ||||||
|     }); |     }); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | template <class P> | ||||||
|  | bool BufferCache<P>::DMACopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | ||||||
|  |     const std::optional<VAddr> cpu_src_address = gpu_memory.GpuToCpuAddress(src_address); | ||||||
|  |     const std::optional<VAddr> cpu_dest_address = gpu_memory.GpuToCpuAddress(dest_address); | ||||||
|  |     if (!cpu_src_address || !cpu_dest_address) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     const bool source_dirty = IsRegionGpuModified(*cpu_src_address, amount); | ||||||
|  |     const bool dest_dirty = IsRegionGpuModified(*cpu_dest_address, amount); | ||||||
|  |     if (!(source_dirty || dest_dirty)) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     const IntervalType subtract_interval{*cpu_dest_address, *cpu_dest_address + amount}; | ||||||
|  |     common_ranges.subtract(subtract_interval); | ||||||
|  | 
 | ||||||
|  |     BufferId buffer_a; | ||||||
|  |     BufferId buffer_b; | ||||||
|  |     do { | ||||||
|  |         has_deleted_buffers = false; | ||||||
|  |         buffer_a = FindBuffer(*cpu_src_address, static_cast<u32>(amount)); | ||||||
|  |         buffer_b = FindBuffer(*cpu_dest_address, static_cast<u32>(amount)); | ||||||
|  |     } while (has_deleted_buffers); | ||||||
|  |     auto& src_buffer = slot_buffers[buffer_a]; | ||||||
|  |     auto& dest_buffer = slot_buffers[buffer_b]; | ||||||
|  |     SynchronizeBuffer(src_buffer, *cpu_src_address, amount); | ||||||
|  |     SynchronizeBuffer(dest_buffer, *cpu_dest_address, amount); | ||||||
|  |     std::array copies{BufferCopy{ | ||||||
|  |         .src_offset = src_buffer.Offset(*cpu_src_address), | ||||||
|  |         .dst_offset = dest_buffer.Offset(*cpu_dest_address), | ||||||
|  |         .size = amount, | ||||||
|  |     }}; | ||||||
|  | 
 | ||||||
|  |     auto mirror = [&](VAddr base_address, u64 size) { | ||||||
|  |         VAddr diff = base_address - *cpu_src_address; | ||||||
|  |         VAddr new_base_address = *cpu_dest_address + diff; | ||||||
|  |         const IntervalType add_interval{new_base_address, new_base_address + size}; | ||||||
|  |         common_ranges.add(add_interval); | ||||||
|  |     }; | ||||||
|  | 
 | ||||||
|  |     const VAddr start_address = *cpu_src_address; | ||||||
|  |     const VAddr end_address = start_address + amount; | ||||||
|  |     const IntervalType search_interval{start_address - amount, 1}; | ||||||
|  |     auto it = common_ranges.lower_bound(search_interval); | ||||||
|  |     if (it == common_ranges.end()) { | ||||||
|  |         it = common_ranges.begin(); | ||||||
|  |     } | ||||||
|  |     while (it != common_ranges.end()) { | ||||||
|  |         VAddr inter_addr_end = it->upper(); | ||||||
|  |         VAddr inter_addr = it->lower(); | ||||||
|  |         if (inter_addr >= end_address) { | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  |         if (inter_addr_end <= start_address) { | ||||||
|  |             it++; | ||||||
|  |             continue; | ||||||
|  |         } | ||||||
|  |         if (inter_addr_end > end_address) { | ||||||
|  |             inter_addr_end = end_address; | ||||||
|  |         } | ||||||
|  |         if (inter_addr < start_address) { | ||||||
|  |             inter_addr = start_address; | ||||||
|  |         } | ||||||
|  |         mirror(inter_addr, inter_addr_end - inter_addr); | ||||||
|  |         it++; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     runtime.CopyBuffer(dest_buffer, src_buffer, copies); | ||||||
|  |     if (source_dirty) { | ||||||
|  |         dest_buffer.MarkRegionAsGpuModified(*cpu_dest_address, amount); | ||||||
|  |     } | ||||||
|  |     std::vector<u8> tmp_buffer(amount); | ||||||
|  |     cpu_memory.ReadBlockUnsafe(*cpu_src_address, tmp_buffer.data(), amount); | ||||||
|  |     cpu_memory.WriteBlockUnsafe(*cpu_dest_address, tmp_buffer.data(), amount); | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||||
|                                                u32 size) { |                                                u32 size) { | ||||||
|  | @ -951,7 +1030,7 @@ void BufferCache<P>::UpdateIndexBuffer() { | ||||||
|     const GPUVAddr gpu_addr_end = index_array.EndAddress(); |     const GPUVAddr gpu_addr_end = index_array.EndAddress(); | ||||||
|     const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); |     const std::optional<VAddr> cpu_addr = gpu_memory.GpuToCpuAddress(gpu_addr_begin); | ||||||
|     const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); |     const u32 address_size = static_cast<u32>(gpu_addr_end - gpu_addr_begin); | ||||||
|     const u32 draw_size = index_array.count * index_array.FormatSizeInBytes(); |     const u32 draw_size = (index_array.count + index_array.first) * index_array.FormatSizeInBytes(); | ||||||
|     const u32 size = std::min(address_size, draw_size); |     const u32 size = std::min(address_size, draw_size); | ||||||
|     if (size == 0 || !cpu_addr) { |     if (size == 0 || !cpu_addr) { | ||||||
|         index_buffer = NULL_BINDING; |         index_buffer = NULL_BINDING; | ||||||
|  |  | ||||||
|  | @ -21,6 +21,10 @@ MaxwellDMA::MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_) | ||||||
| 
 | 
 | ||||||
| MaxwellDMA::~MaxwellDMA() = default; | MaxwellDMA::~MaxwellDMA() = default; | ||||||
| 
 | 
 | ||||||
|  | void MaxwellDMA::BindRasterizer(VideoCore::RasterizerInterface* rasterizer_) { | ||||||
|  |     rasterizer = rasterizer_; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | void MaxwellDMA::CallMethod(u32 method, u32 method_argument, bool is_last_call) { | ||||||
|     ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); |     ASSERT_MSG(method < NUM_REGS, "Invalid MaxwellDMA register"); | ||||||
| 
 | 
 | ||||||
|  | @ -44,7 +48,6 @@ void MaxwellDMA::Launch() { | ||||||
| 
 | 
 | ||||||
|     // TODO(Subv): Perform more research and implement all features of this engine.
 |     // TODO(Subv): Perform more research and implement all features of this engine.
 | ||||||
|     const LaunchDMA& launch = regs.launch_dma; |     const LaunchDMA& launch = regs.launch_dma; | ||||||
|     ASSERT(launch.remap_enable == 0); |  | ||||||
|     ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); |     ASSERT(launch.semaphore_type == LaunchDMA::SemaphoreType::NONE); | ||||||
|     ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); |     ASSERT(launch.interrupt_type == LaunchDMA::InterruptType::NONE); | ||||||
|     ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); |     ASSERT(launch.data_transfer_type == LaunchDMA::DataTransferType::NON_PIPELINED); | ||||||
|  | @ -77,10 +80,28 @@ void MaxwellDMA::CopyPitchToPitch() { | ||||||
|     // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
 |     // When `multi_line_enable` bit is disabled the copy is performed as if we were copying a 1D
 | ||||||
|     // buffer of length `line_length_in`.
 |     // buffer of length `line_length_in`.
 | ||||||
|     // Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
 |     // Otherwise we copy a 2D image of dimensions (line_length_in, line_count).
 | ||||||
|  |     auto& accelerate = rasterizer->AccessAccelerateDMA(); | ||||||
|     if (!regs.launch_dma.multi_line_enable) { |     if (!regs.launch_dma.multi_line_enable) { | ||||||
|         memory_manager.CopyBlock(regs.offset_out, regs.offset_in, regs.line_length_in); |         const bool is_buffer_clear = regs.launch_dma.remap_enable != 0 && | ||||||
|  |                                      regs.remap_const.dst_x == RemapConst::Swizzle::CONST_A; | ||||||
|  |         // TODO: allow multisized components.
 | ||||||
|  |         if (is_buffer_clear) { | ||||||
|  |             ASSERT(regs.remap_const.component_size_minus_one == 3); | ||||||
|  |             std::vector<u32> tmp_buffer(regs.line_length_in, regs.remap_consta_value); | ||||||
|  |             memory_manager.WriteBlock(regs.offset_out, reinterpret_cast<u8*>(tmp_buffer.data()), | ||||||
|  |                                       regs.line_length_in * sizeof(u32)); | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|  |         UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||||||
|  |         if (!accelerate.BufferCopy(regs.offset_in, regs.offset_out, regs.line_length_in)) { | ||||||
|  |             std::vector<u8> tmp_buffer(regs.line_length_in); | ||||||
|  |             memory_manager.ReadBlockUnsafe(regs.offset_in, tmp_buffer.data(), regs.line_length_in); | ||||||
|  |             memory_manager.WriteBlock(regs.offset_out, tmp_buffer.data(), regs.line_length_in); | ||||||
|  |         } | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||||||
| 
 | 
 | ||||||
|     // Perform a line-by-line copy.
 |     // Perform a line-by-line copy.
 | ||||||
|     // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
 |     // We're going to take a subrect of size (line_length_in, line_count) from the source rectangle.
 | ||||||
|  | @ -105,6 +126,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     // Deswizzle the input and copy it over.
 |     // Deswizzle the input and copy it over.
 | ||||||
|  |     UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||||||
|     const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; |     const u32 bytes_per_pixel = regs.pitch_out / regs.line_length_in; | ||||||
|     const Parameters& src_params = regs.src_params; |     const Parameters& src_params = regs.src_params; | ||||||
|     const u32 width = src_params.width; |     const u32 width = src_params.width; | ||||||
|  | @ -134,6 +156,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::CopyPitchToBlockLinear() { | void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|     UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); |     UNIMPLEMENTED_IF_MSG(regs.dst_params.block_size.width != 0, "Block width is not one"); | ||||||
|  |     UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||||||
| 
 | 
 | ||||||
|     const auto& dst_params = regs.dst_params; |     const auto& dst_params = regs.dst_params; | ||||||
|     const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; |     const u32 bytes_per_pixel = regs.pitch_in / regs.line_length_in; | ||||||
|  | @ -156,13 +179,8 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|         write_buffer.resize(dst_size); |         write_buffer.resize(dst_size); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (Settings::IsGPULevelExtreme()) { |  | ||||||
|     memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |     memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); | ||||||
|     memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |     memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); | ||||||
|     } else { |  | ||||||
|         memory_manager.ReadBlockUnsafe(regs.offset_in, read_buffer.data(), src_size); |  | ||||||
|         memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); |  | ||||||
|     } |  | ||||||
| 
 | 
 | ||||||
|     // If the input is linear and the output is tiled, swizzle the input and copy it over.
 |     // If the input is linear and the output is tiled, swizzle the input and copy it over.
 | ||||||
|     if (regs.dst_params.block_size.depth > 0) { |     if (regs.dst_params.block_size.depth > 0) { | ||||||
|  |  | ||||||
|  | @ -21,8 +21,18 @@ namespace Tegra { | ||||||
| class MemoryManager; | class MemoryManager; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | namespace VideoCore { | ||||||
|  | class RasterizerInterface; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
| 
 | 
 | ||||||
|  | class AccelerateDMAInterface { | ||||||
|  | public: | ||||||
|  |     /// Write the value to the register identified by method.
 | ||||||
|  |     virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * This engine is known as gk104_copy. Documentation can be found in: |  * This engine is known as gk104_copy. Documentation can be found in: | ||||||
|  * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
 |  * https://github.com/NVIDIA/open-gpu-doc/blob/master/classes/dma-copy/clb0b5.h
 | ||||||
|  | @ -187,6 +197,8 @@ public: | ||||||
|     }; |     }; | ||||||
|     static_assert(sizeof(RemapConst) == 12); |     static_assert(sizeof(RemapConst) == 12); | ||||||
| 
 | 
 | ||||||
|  |     void BindRasterizer(VideoCore::RasterizerInterface* rasterizer); | ||||||
|  | 
 | ||||||
|     explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); |     explicit MaxwellDMA(Core::System& system_, MemoryManager& memory_manager_); | ||||||
|     ~MaxwellDMA() override; |     ~MaxwellDMA() override; | ||||||
| 
 | 
 | ||||||
|  | @ -213,6 +225,7 @@ private: | ||||||
|     Core::System& system; |     Core::System& system; | ||||||
| 
 | 
 | ||||||
|     MemoryManager& memory_manager; |     MemoryManager& memory_manager; | ||||||
|  |     VideoCore::RasterizerInterface* rasterizer; | ||||||
| 
 | 
 | ||||||
|     std::vector<u8> read_buffer; |     std::vector<u8> read_buffer; | ||||||
|     std::vector<u8> write_buffer; |     std::vector<u8> write_buffer; | ||||||
|  | @ -240,7 +253,9 @@ private: | ||||||
|                 u32 pitch_out; |                 u32 pitch_out; | ||||||
|                 u32 line_length_in; |                 u32 line_length_in; | ||||||
|                 u32 line_count; |                 u32 line_count; | ||||||
|                 u32 reserved06[0xb8]; |                 u32 reserved06[0xb6]; | ||||||
|  |                 u32 remap_consta_value; | ||||||
|  |                 u32 remap_constb_value; | ||||||
|                 RemapConst remap_const; |                 RemapConst remap_const; | ||||||
|                 Parameters dst_params; |                 Parameters dst_params; | ||||||
|                 u32 reserved07[0x1]; |                 u32 reserved07[0x1]; | ||||||
|  |  | ||||||
|  | @ -50,6 +50,7 @@ void GPU::BindRenderer(std::unique_ptr<VideoCore::RendererBase> renderer_) { | ||||||
|     maxwell_3d->BindRasterizer(rasterizer); |     maxwell_3d->BindRasterizer(rasterizer); | ||||||
|     fermi_2d->BindRasterizer(rasterizer); |     fermi_2d->BindRasterizer(rasterizer); | ||||||
|     kepler_compute->BindRasterizer(rasterizer); |     kepler_compute->BindRasterizer(rasterizer); | ||||||
|  |     maxwell_dma->BindRasterizer(rasterizer); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| Engines::Maxwell3D& GPU::Maxwell3D() { | Engines::Maxwell3D& GPU::Maxwell3D() { | ||||||
|  |  | ||||||
|  | @ -10,6 +10,7 @@ | ||||||
| #include <stop_token> | #include <stop_token> | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/fermi_2d.h" | #include "video_core/engines/fermi_2d.h" | ||||||
|  | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/gpu.h" | #include "video_core/gpu.h" | ||||||
| #include "video_core/guest_driver.h" | #include "video_core/guest_driver.h" | ||||||
| 
 | 
 | ||||||
|  | @ -119,6 +120,8 @@ public: | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] virtual Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() = 0; | ||||||
|  | 
 | ||||||
|     /// Attempt to use a faster method to display the framebuffer to screen
 |     /// Attempt to use a faster method to display the framebuffer to screen
 | ||||||
|     [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, |     [[nodiscard]] virtual bool AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
|                                                  VAddr framebuffer_addr, u32 pixel_stride) { |                                                  VAddr framebuffer_addr, u32 pixel_stride) { | ||||||
|  |  | ||||||
|  | @ -171,7 +171,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | ||||||
|       buffer_cache_runtime(device), |       buffer_cache_runtime(device), | ||||||
|       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||||||
|       shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), |       shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | ||||||
|       query_cache(*this, maxwell3d, gpu_memory), |       query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), | ||||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), |       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache), | ||||||
|       async_shaders(emu_window_) { |       async_shaders(emu_window_) { | ||||||
|     if (device.UseAsynchronousShaders()) { |     if (device.UseAsynchronousShaders()) { | ||||||
|  | @ -701,6 +701,10 @@ bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Tegra::Engines::AccelerateDMAInterface& RasterizerOpenGL::AccessAccelerateDMA() { | ||||||
|  |     return accelerate_dma; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
|                                          VAddr framebuffer_addr, u32 pixel_stride) { |                                          VAddr framebuffer_addr, u32 pixel_stride) { | ||||||
|     if (framebuffer_addr == 0) { |     if (framebuffer_addr == 0) { | ||||||
|  | @ -1396,4 +1400,11 @@ void RasterizerOpenGL::EndTransformFeedback() { | ||||||
|     glEndTransformFeedback(); |     glEndTransformFeedback(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} | ||||||
|  | 
 | ||||||
|  | bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | ||||||
|  |     std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|  |     return buffer_cache.DMACopy(src_address, dest_address, amount); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace OpenGL
 | } // namespace OpenGL
 | ||||||
|  |  | ||||||
|  | @ -19,6 +19,7 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/const_buffer_info.h" | #include "video_core/engines/const_buffer_info.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/rasterizer_accelerated.h" | #include "video_core/rasterizer_accelerated.h" | ||||||
| #include "video_core/rasterizer_interface.h" | #include "video_core/rasterizer_interface.h" | ||||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||||
|  | @ -58,6 +59,16 @@ struct BindlessSSBO { | ||||||
| }; | }; | ||||||
| static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); | static_assert(sizeof(BindlessSSBO) * CHAR_BIT == 128); | ||||||
| 
 | 
 | ||||||
|  | class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { | ||||||
|  | public: | ||||||
|  |     explicit AccelerateDMA(BufferCache& buffer_cache); | ||||||
|  | 
 | ||||||
|  |     bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) override; | ||||||
|  | 
 | ||||||
|  | private: | ||||||
|  |     BufferCache& buffer_cache; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | class RasterizerOpenGL : public VideoCore::RasterizerAccelerated { | ||||||
| public: | public: | ||||||
|     explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |     explicit RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
|  | @ -94,6 +105,7 @@ public: | ||||||
|     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | ||||||
|                                const Tegra::Engines::Fermi2D::Surface& dst, |                                const Tegra::Engines::Fermi2D::Surface& dst, | ||||||
|                                const Tegra::Engines::Fermi2D::Config& copy_config) override; |                                const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||||||
|  |     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||||
|                            u32 pixel_stride) override; |                            u32 pixel_stride) override; | ||||||
|     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, |     void LoadDiskResources(u64 title_id, std::stop_token stop_loading, | ||||||
|  | @ -234,6 +246,7 @@ private: | ||||||
|     BufferCache buffer_cache; |     BufferCache buffer_cache; | ||||||
|     ShaderCacheOpenGL shader_cache; |     ShaderCacheOpenGL shader_cache; | ||||||
|     QueryCache query_cache; |     QueryCache query_cache; | ||||||
|  |     AccelerateDMA accelerate_dma; | ||||||
|     FenceManagerOpenGL fence_manager; |     FenceManagerOpenGL fence_manager; | ||||||
| 
 | 
 | ||||||
|     VideoCommon::Shader::AsyncShaders async_shaders; |     VideoCommon::Shader::AsyncShaders async_shaders; | ||||||
|  |  | ||||||
|  | @ -251,7 +251,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | ||||||
|       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), |       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||||||
|       pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, |       pipeline_cache(*this, gpu, maxwell3d, kepler_compute, gpu_memory, device, scheduler, | ||||||
|                      descriptor_pool, update_descriptor_queue), |                      descriptor_pool, update_descriptor_queue), | ||||||
|       query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, |       query_cache{*this, maxwell3d, gpu_memory, device, scheduler}, accelerate_dma{buffer_cache}, | ||||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | ||||||
|       wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { |       wfi_event(device.GetLogical().CreateEvent()), async_shaders(emu_window_) { | ||||||
|     scheduler.SetQueryCache(query_cache); |     scheduler.SetQueryCache(query_cache); | ||||||
|  | @ -660,6 +660,10 @@ bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surf | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | Tegra::Engines::AccelerateDMAInterface& RasterizerVulkan::AccessAccelerateDMA() { | ||||||
|  |     return accelerate_dma; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, | ||||||
|                                          VAddr framebuffer_addr, u32 pixel_stride) { |                                          VAddr framebuffer_addr, u32 pixel_stride) { | ||||||
|     if (!framebuffer_addr) { |     if (!framebuffer_addr) { | ||||||
|  | @ -698,6 +702,13 @@ void RasterizerVulkan::FlushWork() { | ||||||
|     draw_counter = 0; |     draw_counter = 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} | ||||||
|  | 
 | ||||||
|  | bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) { | ||||||
|  |     std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|  |     return buffer_cache.DMACopy(src_address, dest_address, amount); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void RasterizerVulkan::SetupShaderDescriptors( | void RasterizerVulkan::SetupShaderDescriptors( | ||||||
|     const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { |     const std::array<Shader*, Maxwell::MaxShaderProgram>& shaders, bool is_indexed) { | ||||||
|     image_view_indices.clear(); |     image_view_indices.clear(); | ||||||
|  |  | ||||||
|  | @ -49,6 +49,16 @@ struct VKScreenInfo; | ||||||
| 
 | 
 | ||||||
| class StateTracker; | class StateTracker; | ||||||
| 
 | 
 | ||||||
|  | class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { | ||||||
|  | public: | ||||||
|  |   explicit AccelerateDMA(BufferCache& buffer_cache); | ||||||
|  | 
 | ||||||
|  |   bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; | ||||||
|  | 
 | ||||||
|  |   private: | ||||||
|  |   BufferCache& buffer_cache; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated { | ||||||
| public: | public: | ||||||
|     explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, |     explicit RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra::GPU& gpu_, | ||||||
|  | @ -86,6 +96,7 @@ public: | ||||||
|     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, |     bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Surface& src, | ||||||
|                                const Tegra::Engines::Fermi2D::Surface& dst, |                                const Tegra::Engines::Fermi2D::Surface& dst, | ||||||
|                                const Tegra::Engines::Fermi2D::Config& copy_config) override; |                                const Tegra::Engines::Fermi2D::Config& copy_config) override; | ||||||
|  |     Tegra::Engines::AccelerateDMAInterface& AccessAccelerateDMA() override; | ||||||
|     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, |     bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, | ||||||
|                            u32 pixel_stride) override; |                            u32 pixel_stride) override; | ||||||
| 
 | 
 | ||||||
|  | @ -186,6 +197,7 @@ private: | ||||||
|     BufferCache buffer_cache; |     BufferCache buffer_cache; | ||||||
|     VKPipelineCache pipeline_cache; |     VKPipelineCache pipeline_cache; | ||||||
|     VKQueryCache query_cache; |     VKQueryCache query_cache; | ||||||
|  |     AccelerateDMA accelerate_dma; | ||||||
|     VKFenceManager fence_manager; |     VKFenceManager fence_manager; | ||||||
| 
 | 
 | ||||||
|     vk::Event wfi_event; |     vk::Event wfi_event; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow