Engines: Implement Accelerate DMA Texture.
This commit is contained in:
		
					parent
					
						
							
								ce8f4da638
							
						
					
				
			
			
				commit
				
					
						8a3411b417
					
				
			
		
					 15 changed files with 656 additions and 95 deletions
				
			
		|  | @ -55,6 +55,19 @@ constexpr u32 NUM_STORAGE_BUFFERS = 16; | ||||||
| constexpr u32 NUM_TEXTURE_BUFFERS = 16; | constexpr u32 NUM_TEXTURE_BUFFERS = 16; | ||||||
| constexpr u32 NUM_STAGES = 5; | constexpr u32 NUM_STAGES = 5; | ||||||
| 
 | 
 | ||||||
|  | enum class ObtainBufferSynchronize : u32 { | ||||||
|  |     NoSynchronize = 0, | ||||||
|  |     FullSynchronize = 1, | ||||||
|  |     SynchronizeNoDirty = 2, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum class ObtainBufferOperation : u32 { | ||||||
|  |     DoNothing = 0, | ||||||
|  |     MarkAsWritten = 1, | ||||||
|  |     DiscardWrite = 2, | ||||||
|  |     MarkQuery = 3, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; | using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>; | ||||||
| using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>; | ||||||
| 
 | 
 | ||||||
|  | @ -191,6 +204,10 @@ public: | ||||||
| 
 | 
 | ||||||
|     bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); |     bool DMAClear(GPUVAddr src_address, u64 amount, u32 value); | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] std::pair<Buffer*, u32> ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||||||
|  |                                                        ObtainBufferSynchronize sync_info, | ||||||
|  |                                                        ObtainBufferOperation post_op); | ||||||
|  | 
 | ||||||
|     /// Return true when a CPU region is modified from the GPU
 |     /// Return true when a CPU region is modified from the GPU
 | ||||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||||||
| 
 | 
 | ||||||
|  | @ -641,6 +658,42 @@ bool BufferCache<P>::DMAClear(GPUVAddr dst_address, u64 amount, u32 value) { | ||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | template <class P> | ||||||
|  | std::pair<typename P::Buffer*, u32> BufferCache<P>::ObtainBuffer(GPUVAddr gpu_addr, u32 size, | ||||||
|  |                                                                  ObtainBufferSynchronize sync_info, | ||||||
|  |                                                                  ObtainBufferOperation post_op) { | ||||||
|  |     const std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||||
|  |     if (!cpu_addr) { | ||||||
|  |         return {&slot_buffers[NULL_BUFFER_ID], 0}; | ||||||
|  |     } | ||||||
|  |     const BufferId buffer_id = FindBuffer(*cpu_addr, size); | ||||||
|  |     Buffer& buffer = slot_buffers[buffer_id]; | ||||||
|  | 
 | ||||||
|  |     // synchronize op
 | ||||||
|  |     switch (sync_info) { | ||||||
|  |     case ObtainBufferSynchronize::FullSynchronize: | ||||||
|  |         SynchronizeBuffer(buffer, *cpu_addr, size); | ||||||
|  |         break; | ||||||
|  |     default: | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     switch (post_op) { | ||||||
|  |     case ObtainBufferOperation::MarkAsWritten: | ||||||
|  |         MarkWrittenBuffer(buffer_id, *cpu_addr, size); | ||||||
|  |         break; | ||||||
|  |     case ObtainBufferOperation::DiscardWrite: { | ||||||
|  |         IntervalType interval{*cpu_addr, size}; | ||||||
|  |         ClearDownload(interval); | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  |     default: | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return {&buffer, buffer.Offset(*cpu_addr)}; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| template <class P> | template <class P> | ||||||
| void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | void BufferCache<P>::BindGraphicsUniformBuffer(size_t stage, u32 index, GPUVAddr gpu_addr, | ||||||
|                                                u32 size) { |                                                u32 size) { | ||||||
|  |  | ||||||
|  | @ -14,7 +14,13 @@ | ||||||
| #include "video_core/textures/decoders.h" | #include "video_core/textures/decoders.h" | ||||||
| 
 | 
 | ||||||
| MICROPROFILE_DECLARE(GPU_DMAEngine); | MICROPROFILE_DECLARE(GPU_DMAEngine); | ||||||
|  | MICROPROFILE_DECLARE(GPU_DMAEngineBL); | ||||||
|  | MICROPROFILE_DECLARE(GPU_DMAEngineLB); | ||||||
|  | MICROPROFILE_DECLARE(GPU_DMAEngineBB); | ||||||
| MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128)); | MICROPROFILE_DEFINE(GPU_DMAEngine, "GPU", "DMA Engine", MP_RGB(224, 224, 128)); | ||||||
|  | MICROPROFILE_DEFINE(GPU_DMAEngineBL, "GPU", "DMA Engine Block - Linear", MP_RGB(224, 224, 128)); | ||||||
|  | MICROPROFILE_DEFINE(GPU_DMAEngineLB, "GPU", "DMA Engine Linear - Block", MP_RGB(224, 224, 128)); | ||||||
|  | MICROPROFILE_DEFINE(GPU_DMAEngineBB, "GPU", "DMA Engine Block - Block", MP_RGB(224, 224, 128)); | ||||||
| 
 | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
| 
 | 
 | ||||||
|  | @ -72,6 +78,7 @@ void MaxwellDMA::Launch() { | ||||||
|         memory_manager.FlushCaching(); |         memory_manager.FlushCaching(); | ||||||
|         if (!is_src_pitch && !is_dst_pitch) { |         if (!is_src_pitch && !is_dst_pitch) { | ||||||
|             // If both the source and the destination are in block layout, assert.
 |             // If both the source and the destination are in block layout, assert.
 | ||||||
|  |             MICROPROFILE_SCOPE(GPU_DMAEngineBB); | ||||||
|             CopyBlockLinearToBlockLinear(); |             CopyBlockLinearToBlockLinear(); | ||||||
|             ReleaseSemaphore(); |             ReleaseSemaphore(); | ||||||
|             return; |             return; | ||||||
|  | @ -87,8 +94,10 @@ void MaxwellDMA::Launch() { | ||||||
|             } |             } | ||||||
|         } else { |         } else { | ||||||
|             if (!is_src_pitch && is_dst_pitch) { |             if (!is_src_pitch && is_dst_pitch) { | ||||||
|  |                 MICROPROFILE_SCOPE(GPU_DMAEngineBL); | ||||||
|                 CopyBlockLinearToPitch(); |                 CopyBlockLinearToPitch(); | ||||||
|             } else { |             } else { | ||||||
|  |                 MICROPROFILE_SCOPE(GPU_DMAEngineLB); | ||||||
|                 CopyPitchToBlockLinear(); |                 CopyPitchToBlockLinear(); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  | @ -153,21 +162,35 @@ void MaxwellDMA::Launch() { | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::CopyBlockLinearToPitch() { | void MaxwellDMA::CopyBlockLinearToPitch() { | ||||||
|     UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); |     UNIMPLEMENTED_IF(regs.launch_dma.remap_enable != 0); | ||||||
|     UNIMPLEMENTED_IF(regs.src_params.layer != 0); |  | ||||||
| 
 | 
 | ||||||
|     const bool is_remapping = regs.launch_dma.remap_enable != 0; |     u32 bytes_per_pixel = 1; | ||||||
|  |     DMA::ImageOperand src_operand; | ||||||
|  |     src_operand.bytes_per_pixel = bytes_per_pixel; | ||||||
|  |     src_operand.params = regs.src_params; | ||||||
|  |     src_operand.address = regs.offset_in; | ||||||
| 
 | 
 | ||||||
|     // Optimized path for micro copies.
 |     DMA::BufferOperand dst_operand; | ||||||
|     const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |     dst_operand.pitch = regs.pitch_out; | ||||||
|     if (!is_remapping && dst_size < GOB_SIZE && regs.pitch_out <= GOB_SIZE_X && |     dst_operand.width = regs.line_length_in; | ||||||
|         regs.src_params.height > GOB_SIZE_Y) { |     dst_operand.height = regs.line_count; | ||||||
|         FastCopyBlockLinearToPitch(); |     dst_operand.address = regs.offset_out; | ||||||
|  |     DMA::ImageCopy copy_info{}; | ||||||
|  |     copy_info.length_x = regs.line_length_in; | ||||||
|  |     copy_info.length_y = regs.line_count; | ||||||
|  |     auto& accelerate = rasterizer->AccessAccelerateDMA(); | ||||||
|  |     if (accelerate.ImageToBuffer(copy_info, src_operand, dst_operand)) { | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | ||||||
|  |     UNIMPLEMENTED_IF(regs.src_params.block_size.depth != 0); | ||||||
|  |     UNIMPLEMENTED_IF(regs.src_params.block_size.depth == 0 && regs.src_params.depth != 1); | ||||||
|  | 
 | ||||||
|     // Deswizzle the input and copy it over.
 |     // Deswizzle the input and copy it over.
 | ||||||
|     const Parameters& src_params = regs.src_params; |     const DMA::Parameters& src_params = regs.src_params; | ||||||
|  | 
 | ||||||
|  |     const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||||||
| 
 | 
 | ||||||
|     const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; |     const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||||||
|     const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; |     const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||||||
|  | @ -187,7 +210,7 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | ||||||
|         x_offset >>= bpp_shift; |         x_offset >>= bpp_shift; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const u32 bytes_per_pixel = base_bpp << bpp_shift; |     bytes_per_pixel = base_bpp << bpp_shift; | ||||||
|     const u32 height = src_params.height; |     const u32 height = src_params.height; | ||||||
|     const u32 depth = src_params.depth; |     const u32 depth = src_params.depth; | ||||||
|     const u32 block_height = src_params.block_size.height; |     const u32 block_height = src_params.block_size.height; | ||||||
|  | @ -195,11 +218,12 @@ void MaxwellDMA::CopyBlockLinearToPitch() { | ||||||
|     const size_t src_size = |     const size_t src_size = | ||||||
|         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); |         CalculateSize(true, bytes_per_pixel, width, height, depth, block_height, block_depth); | ||||||
| 
 | 
 | ||||||
|  |     const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; | ||||||
|     read_buffer.resize_destructive(src_size); |     read_buffer.resize_destructive(src_size); | ||||||
|     write_buffer.resize_destructive(dst_size); |     write_buffer.resize_destructive(dst_size); | ||||||
| 
 | 
 | ||||||
|     memory_manager.ReadBlock(regs.offset_in, read_buffer.data(), src_size); |     memory_manager.ReadBlock(src_operand.address, read_buffer.data(), src_size); | ||||||
|     memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |     memory_manager.ReadBlockUnsafe(dst_operand.address, write_buffer.data(), dst_size); | ||||||
| 
 | 
 | ||||||
|     UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, |     UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, width, height, depth, x_offset, | ||||||
|                      src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, |                      src_params.origin.y, x_elements, regs.line_count, block_height, block_depth, | ||||||
|  | @ -216,6 +240,24 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|     const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; |     const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||||||
|     const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; |     const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||||||
| 
 | 
 | ||||||
|  |     u32 bytes_per_pixel = 1; | ||||||
|  |     DMA::ImageOperand dst_operand; | ||||||
|  |     dst_operand.bytes_per_pixel = bytes_per_pixel; | ||||||
|  |     dst_operand.params = regs.dst_params; | ||||||
|  |     dst_operand.address = regs.offset_out; | ||||||
|  |     DMA::BufferOperand src_operand; | ||||||
|  |     src_operand.pitch = regs.pitch_in; | ||||||
|  |     src_operand.width = regs.line_length_in; | ||||||
|  |     src_operand.height = regs.line_count; | ||||||
|  |     src_operand.address = regs.offset_in; | ||||||
|  |     DMA::ImageCopy copy_info{}; | ||||||
|  |     copy_info.length_x = regs.line_length_in; | ||||||
|  |     copy_info.length_y = regs.line_count; | ||||||
|  |     auto& accelerate = rasterizer->AccessAccelerateDMA(); | ||||||
|  |     if (accelerate.BufferToImage(copy_info, src_operand, dst_operand)) { | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     const auto& dst_params = regs.dst_params; |     const auto& dst_params = regs.dst_params; | ||||||
| 
 | 
 | ||||||
|     const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; |     const u32 base_bpp = !is_remapping ? 1U : num_remap_components * remap_components_size; | ||||||
|  | @ -233,7 +275,7 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|         x_offset >>= bpp_shift; |         x_offset >>= bpp_shift; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     const u32 bytes_per_pixel = base_bpp << bpp_shift; |     bytes_per_pixel = base_bpp << bpp_shift; | ||||||
|     const u32 height = dst_params.height; |     const u32 height = dst_params.height; | ||||||
|     const u32 depth = dst_params.depth; |     const u32 depth = dst_params.depth; | ||||||
|     const u32 block_height = dst_params.block_size.height; |     const u32 block_height = dst_params.block_size.height; | ||||||
|  | @ -260,45 +302,14 @@ void MaxwellDMA::CopyPitchToBlockLinear() { | ||||||
|     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void MaxwellDMA::FastCopyBlockLinearToPitch() { |  | ||||||
|     const u32 bytes_per_pixel = 1U; |  | ||||||
|     const size_t src_size = GOB_SIZE; |  | ||||||
|     const size_t dst_size = static_cast<size_t>(regs.pitch_out) * regs.line_count; |  | ||||||
|     u32 pos_x = regs.src_params.origin.x; |  | ||||||
|     u32 pos_y = regs.src_params.origin.y; |  | ||||||
|     const u64 offset = GetGOBOffset(regs.src_params.width, regs.src_params.height, pos_x, pos_y, |  | ||||||
|                                     regs.src_params.block_size.height, bytes_per_pixel); |  | ||||||
|     const u32 x_in_gob = 64 / bytes_per_pixel; |  | ||||||
|     pos_x = pos_x % x_in_gob; |  | ||||||
|     pos_y = pos_y % 8; |  | ||||||
| 
 |  | ||||||
|     read_buffer.resize_destructive(src_size); |  | ||||||
|     write_buffer.resize_destructive(dst_size); |  | ||||||
| 
 |  | ||||||
|     if (Settings::IsGPULevelExtreme()) { |  | ||||||
|         memory_manager.ReadBlock(regs.offset_in + offset, read_buffer.data(), src_size); |  | ||||||
|         memory_manager.ReadBlock(regs.offset_out, write_buffer.data(), dst_size); |  | ||||||
|     } else { |  | ||||||
|         memory_manager.ReadBlockUnsafe(regs.offset_in + offset, read_buffer.data(), src_size); |  | ||||||
|         memory_manager.ReadBlockUnsafe(regs.offset_out, write_buffer.data(), dst_size); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     UnswizzleSubrect(write_buffer, read_buffer, bytes_per_pixel, regs.src_params.width, |  | ||||||
|                      regs.src_params.height, 1, pos_x, pos_y, regs.line_length_in, regs.line_count, |  | ||||||
|                      regs.src_params.block_size.height, regs.src_params.block_size.depth, |  | ||||||
|                      regs.pitch_out); |  | ||||||
| 
 |  | ||||||
|     memory_manager.WriteBlockCached(regs.offset_out, write_buffer.data(), dst_size); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void MaxwellDMA::CopyBlockLinearToBlockLinear() { | void MaxwellDMA::CopyBlockLinearToBlockLinear() { | ||||||
|     UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); |     UNIMPLEMENTED_IF(regs.src_params.block_size.width != 0); | ||||||
| 
 | 
 | ||||||
|     const bool is_remapping = regs.launch_dma.remap_enable != 0; |     const bool is_remapping = regs.launch_dma.remap_enable != 0; | ||||||
| 
 | 
 | ||||||
|     // Deswizzle the input and copy it over.
 |     // Deswizzle the input and copy it over.
 | ||||||
|     const Parameters& src = regs.src_params; |     const DMA::Parameters& src = regs.src_params; | ||||||
|     const Parameters& dst = regs.dst_params; |     const DMA::Parameters& dst = regs.dst_params; | ||||||
| 
 | 
 | ||||||
|     const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; |     const u32 num_remap_components = regs.remap_const.num_dst_components_minus_one + 1; | ||||||
|     const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; |     const u32 remap_components_size = regs.remap_const.component_size_minus_one + 1; | ||||||
|  |  | ||||||
|  | @ -24,6 +24,54 @@ namespace VideoCore { | ||||||
| class RasterizerInterface; | class RasterizerInterface; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | namespace Tegra { | ||||||
|  | namespace DMA { | ||||||
|  | 
 | ||||||
|  | union Origin { | ||||||
|  |     BitField<0, 16, u32> x; | ||||||
|  |     BitField<16, 16, u32> y; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(Origin) == 4); | ||||||
|  | 
 | ||||||
|  | struct ImageCopy { | ||||||
|  |     u32 length_x{}; | ||||||
|  |     u32 length_y{}; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | union BlockSize { | ||||||
|  |     BitField<0, 4, u32> width; | ||||||
|  |     BitField<4, 4, u32> height; | ||||||
|  |     BitField<8, 4, u32> depth; | ||||||
|  |     BitField<12, 4, u32> gob_height; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(BlockSize) == 4); | ||||||
|  | 
 | ||||||
|  | struct Parameters { | ||||||
|  |     BlockSize block_size; | ||||||
|  |     u32 width; | ||||||
|  |     u32 height; | ||||||
|  |     u32 depth; | ||||||
|  |     u32 layer; | ||||||
|  |     Origin origin; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(Parameters) == 24); | ||||||
|  | 
 | ||||||
|  | struct ImageOperand { | ||||||
|  |     u32 bytes_per_pixel; | ||||||
|  |     Parameters params; | ||||||
|  |     GPUVAddr address; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | struct BufferOperand { | ||||||
|  |     u32 pitch; | ||||||
|  |     u32 width; | ||||||
|  |     u32 height; | ||||||
|  |     GPUVAddr address; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | } // namespace DMA
 | ||||||
|  | } // namespace Tegra
 | ||||||
|  | 
 | ||||||
| namespace Tegra::Engines { | namespace Tegra::Engines { | ||||||
| 
 | 
 | ||||||
| class AccelerateDMAInterface { | class AccelerateDMAInterface { | ||||||
|  | @ -32,6 +80,12 @@ public: | ||||||
|     virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; |     virtual bool BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 amount) = 0; | ||||||
| 
 | 
 | ||||||
|     virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0; |     virtual bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) = 0; | ||||||
|  | 
 | ||||||
|  |     virtual bool ImageToBuffer(const DMA::ImageCopy& copy_info, const DMA::ImageOperand& src, | ||||||
|  |                                const DMA::BufferOperand& dst) = 0; | ||||||
|  | 
 | ||||||
|  |     virtual bool BufferToImage(const DMA::ImageCopy& copy_info, const DMA::BufferOperand& src, | ||||||
|  |                                const DMA::ImageOperand& dst) = 0; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| /**
 | /**
 | ||||||
|  | @ -51,30 +105,6 @@ public: | ||||||
|         } |         } | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
|     union BlockSize { |  | ||||||
|         BitField<0, 4, u32> width; |  | ||||||
|         BitField<4, 4, u32> height; |  | ||||||
|         BitField<8, 4, u32> depth; |  | ||||||
|         BitField<12, 4, u32> gob_height; |  | ||||||
|     }; |  | ||||||
|     static_assert(sizeof(BlockSize) == 4); |  | ||||||
| 
 |  | ||||||
|     union Origin { |  | ||||||
|         BitField<0, 16, u32> x; |  | ||||||
|         BitField<16, 16, u32> y; |  | ||||||
|     }; |  | ||||||
|     static_assert(sizeof(Origin) == 4); |  | ||||||
| 
 |  | ||||||
|     struct Parameters { |  | ||||||
|         BlockSize block_size; |  | ||||||
|         u32 width; |  | ||||||
|         u32 height; |  | ||||||
|         u32 depth; |  | ||||||
|         u32 layer; |  | ||||||
|         Origin origin; |  | ||||||
|     }; |  | ||||||
|     static_assert(sizeof(Parameters) == 24); |  | ||||||
| 
 |  | ||||||
|     struct Semaphore { |     struct Semaphore { | ||||||
|         PackedGPUVAddr address; |         PackedGPUVAddr address; | ||||||
|         u32 payload; |         u32 payload; | ||||||
|  | @ -227,8 +257,6 @@ private: | ||||||
| 
 | 
 | ||||||
|     void CopyBlockLinearToBlockLinear(); |     void CopyBlockLinearToBlockLinear(); | ||||||
| 
 | 
 | ||||||
|     void FastCopyBlockLinearToPitch(); |  | ||||||
| 
 |  | ||||||
|     void ReleaseSemaphore(); |     void ReleaseSemaphore(); | ||||||
| 
 | 
 | ||||||
|     void ConsumeSinkImpl() override; |     void ConsumeSinkImpl() override; | ||||||
|  | @ -261,17 +289,17 @@ private: | ||||||
|                 u32 reserved05[0x3f]; |                 u32 reserved05[0x3f]; | ||||||
|                 PackedGPUVAddr offset_in; |                 PackedGPUVAddr offset_in; | ||||||
|                 PackedGPUVAddr offset_out; |                 PackedGPUVAddr offset_out; | ||||||
|                 u32 pitch_in; |                 s32 pitch_in; | ||||||
|                 u32 pitch_out; |                 s32 pitch_out; | ||||||
|                 u32 line_length_in; |                 u32 line_length_in; | ||||||
|                 u32 line_count; |                 u32 line_count; | ||||||
|                 u32 reserved06[0xb6]; |                 u32 reserved06[0xb6]; | ||||||
|                 u32 remap_consta_value; |                 u32 remap_consta_value; | ||||||
|                 u32 remap_constb_value; |                 u32 remap_constb_value; | ||||||
|                 RemapConst remap_const; |                 RemapConst remap_const; | ||||||
|                 Parameters dst_params; |                 DMA::Parameters dst_params; | ||||||
|                 u32 reserved07[0x1]; |                 u32 reserved07[0x1]; | ||||||
|                 Parameters src_params; |                 DMA::Parameters src_params; | ||||||
|                 u32 reserved08[0x275]; |                 u32 reserved08[0x275]; | ||||||
|                 u32 pm_trigger_end; |                 u32 pm_trigger_end; | ||||||
|                 u32 reserved09[0x3ba]; |                 u32 reserved09[0x3ba]; | ||||||
|  |  | ||||||
|  | @ -22,6 +22,14 @@ public: | ||||||
|     explicit AccelerateDMA(); |     explicit AccelerateDMA(); | ||||||
|     bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; |     bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; | ||||||
|     bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; |     bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; | ||||||
|  |     bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src, | ||||||
|  |                        const Tegra::DMA::BufferOperand& dst) override { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src, | ||||||
|  |                        const Tegra::DMA::ImageOperand& dst) override { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class RasterizerNull final : public VideoCore::RasterizerAccelerated, | class RasterizerNull final : public VideoCore::RasterizerAccelerated, | ||||||
|  |  | ||||||
|  | @ -56,6 +56,16 @@ public: | ||||||
| 
 | 
 | ||||||
|     bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; |     bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; | ||||||
| 
 | 
 | ||||||
|  |     bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src, | ||||||
|  |                        const Tegra::DMA::BufferOperand& dst) override { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src, | ||||||
|  |                        const Tegra::DMA::ImageOperand& dst) override { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     BufferCache& buffer_cache; |     BufferCache& buffer_cache; | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  | @ -172,7 +172,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra | ||||||
|       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), |       buffer_cache(*this, cpu_memory_, buffer_cache_runtime), | ||||||
|       pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, |       pipeline_cache(*this, device, scheduler, descriptor_pool, update_descriptor_queue, | ||||||
|                      render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), |                      render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()), | ||||||
|       query_cache{*this, device, scheduler}, accelerate_dma{buffer_cache}, |       query_cache{*this, device, scheduler}, accelerate_dma(buffer_cache, texture_cache, scheduler), | ||||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), |       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache, device, scheduler), | ||||||
|       wfi_event(device.GetLogical().CreateEvent()) { |       wfi_event(device.GetLogical().CreateEvent()) { | ||||||
|     scheduler.SetQueryCache(query_cache); |     scheduler.SetQueryCache(query_cache); | ||||||
|  | @ -756,7 +756,9 @@ void RasterizerVulkan::FlushWork() { | ||||||
|     draw_counter = 0; |     draw_counter = 0; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_) : buffer_cache{buffer_cache_} {} | AccelerateDMA::AccelerateDMA(BufferCache& buffer_cache_, TextureCache& texture_cache_, | ||||||
|  |                              Scheduler& scheduler_) | ||||||
|  |     : buffer_cache{buffer_cache_}, texture_cache{texture_cache_}, scheduler{scheduler_} {} | ||||||
| 
 | 
 | ||||||
| bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { | bool AccelerateDMA::BufferClear(GPUVAddr src_address, u64 amount, u32 value) { | ||||||
|     std::scoped_lock lock{buffer_cache.mutex}; |     std::scoped_lock lock{buffer_cache.mutex}; | ||||||
|  | @ -768,6 +770,234 @@ bool AccelerateDMA::BufferCopy(GPUVAddr src_address, GPUVAddr dest_address, u64 | ||||||
|     return buffer_cache.DMACopy(src_address, dest_address, amount); |     return buffer_cache.DMACopy(src_address, dest_address, amount); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool AccelerateDMA::ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, | ||||||
|  |                                   const Tegra::DMA::ImageOperand& src, | ||||||
|  |                                   const Tegra::DMA::BufferOperand& dst) { | ||||||
|  |     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||||
|  |     auto query_image = texture_cache.ObtainImage(src, false); | ||||||
|  |     if (!query_image) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     auto* image = query_image->first; | ||||||
|  |     auto [level, base] = query_image->second; | ||||||
|  |     const u32 buffer_size = static_cast<u32>(dst.pitch * dst.height); | ||||||
|  |     const auto [buffer, offset] = buffer_cache.ObtainBuffer( | ||||||
|  |         dst.address, buffer_size, VideoCommon::ObtainBufferSynchronize::FullSynchronize, | ||||||
|  |         VideoCommon::ObtainBufferOperation::MarkAsWritten); | ||||||
|  | 
 | ||||||
|  |     const bool is_rescaled = image->IsRescaled(); | ||||||
|  |     if (is_rescaled) { | ||||||
|  |         image->ScaleDown(); | ||||||
|  |     } | ||||||
|  |     VkImageSubresourceLayers subresources{ | ||||||
|  |         .aspectMask = image->AspectMask(), | ||||||
|  |         .mipLevel = level, | ||||||
|  |         .baseArrayLayer = base, | ||||||
|  |         .layerCount = 1, | ||||||
|  |     }; | ||||||
|  |     const u32 bpp = VideoCore::Surface::BytesPerBlock(image->info.format); | ||||||
|  |     const auto convert = [old_bpp = src.bytes_per_pixel, bpp](u32 value) { | ||||||
|  |         return (old_bpp * value) / bpp; | ||||||
|  |     }; | ||||||
|  |     const u32 base_x = convert(src.params.origin.x.Value()); | ||||||
|  |     const u32 base_y = src.params.origin.y.Value(); | ||||||
|  |     const u32 length_x = convert(copy_info.length_x); | ||||||
|  |     const u32 length_y = copy_info.length_y; | ||||||
|  |     VkOffset3D image_offset{ | ||||||
|  |         .x = static_cast<s32>(base_x), | ||||||
|  |         .y = static_cast<s32>(base_y), | ||||||
|  |         .z = 0, | ||||||
|  |     }; | ||||||
|  |     VkExtent3D image_extent{ | ||||||
|  |         .width = length_x, | ||||||
|  |         .height = length_y, | ||||||
|  |         .depth = 1, | ||||||
|  |     }; | ||||||
|  |     auto buff_info(dst); | ||||||
|  |     buff_info.pitch = convert(dst.pitch); | ||||||
|  |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|  |     scheduler.Record([src_image = image->Handle(), dst_buffer = buffer->Handle(), | ||||||
|  |                       buffer_offset = offset, subresources, image_offset, image_extent, | ||||||
|  |                       buff_info](vk::CommandBuffer cmdbuf) { | ||||||
|  |         const std::array buffer_copy_info{ | ||||||
|  |             VkBufferImageCopy{ | ||||||
|  |                 .bufferOffset = buffer_offset, | ||||||
|  |                 .bufferRowLength = buff_info.pitch, | ||||||
|  |                 .bufferImageHeight = buff_info.height, | ||||||
|  |                 .imageSubresource = subresources, | ||||||
|  |                 .imageOffset = image_offset, | ||||||
|  |                 .imageExtent = image_extent, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         const VkImageSubresourceRange range{ | ||||||
|  |             .aspectMask = subresources.aspectMask, | ||||||
|  |             .baseMipLevel = subresources.mipLevel, | ||||||
|  |             .levelCount = 1, | ||||||
|  |             .baseArrayLayer = subresources.baseArrayLayer, | ||||||
|  |             .layerCount = 1, | ||||||
|  |         }; | ||||||
|  |         static constexpr VkMemoryBarrier WRITE_BARRIER{ | ||||||
|  |             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||||
|  |             .pNext = nullptr, | ||||||
|  |             .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, | ||||||
|  |             .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, | ||||||
|  |         }; | ||||||
|  |         const std::array pre_barriers{ | ||||||
|  |             VkImageMemoryBarrier{ | ||||||
|  |                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||||
|  |                 .pNext = nullptr, | ||||||
|  |                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||||
|  |                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||||||
|  |                                  VK_ACCESS_TRANSFER_WRITE_BIT, | ||||||
|  |                 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||||||
|  |                 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |                 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||||||
|  |                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .image = src_image, | ||||||
|  |                 .subresourceRange = range, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         const std::array post_barriers{ | ||||||
|  |             VkImageMemoryBarrier{ | ||||||
|  |                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||||
|  |                 .pNext = nullptr, | ||||||
|  |                 .srcAccessMask = 0, | ||||||
|  |                 .dstAccessMask = 0, | ||||||
|  |                 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, | ||||||
|  |                 .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .image = src_image, | ||||||
|  |                 .subresourceRange = range, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||||
|  |                                0, {}, {}, pre_barriers); | ||||||
|  |         cmdbuf.CopyImageToBuffer(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_buffer, | ||||||
|  |                                  buffer_copy_info); | ||||||
|  |         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||||
|  |                                0, WRITE_BARRIER, nullptr, post_barriers); | ||||||
|  |     }); | ||||||
|  |     if (is_rescaled) { | ||||||
|  |         image->ScaleUp(true); | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | bool AccelerateDMA::BufferToImage(const Tegra::DMA::ImageCopy& copy_info, | ||||||
|  |                                   const Tegra::DMA::BufferOperand& src, | ||||||
|  |                                   const Tegra::DMA::ImageOperand& dst) { | ||||||
|  |     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||||
|  |     auto query_image = texture_cache.ObtainImage(dst, true); | ||||||
|  |     if (!query_image) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     auto* image = query_image->first; | ||||||
|  |     auto [level, base] = query_image->second; | ||||||
|  |     const u32 buffer_size = static_cast<u32>(src.pitch * src.height); | ||||||
|  |     const auto [buffer, offset] = buffer_cache.ObtainBuffer( | ||||||
|  |         src.address, buffer_size, VideoCommon::ObtainBufferSynchronize::FullSynchronize, | ||||||
|  |         VideoCommon::ObtainBufferOperation::DoNothing); | ||||||
|  |     const bool is_rescaled = image->IsRescaled(); | ||||||
|  |     if (is_rescaled) { | ||||||
|  |         image->ScaleDown(true); | ||||||
|  |     } | ||||||
|  |     VkImageSubresourceLayers subresources{ | ||||||
|  |         .aspectMask = image->AspectMask(), | ||||||
|  |         .mipLevel = level, | ||||||
|  |         .baseArrayLayer = base, | ||||||
|  |         .layerCount = 1, | ||||||
|  |     }; | ||||||
|  |     const u32 bpp = VideoCore::Surface::BytesPerBlock(image->info.format); | ||||||
|  |     const auto convert = [old_bpp = dst.bytes_per_pixel, bpp](u32 value) { | ||||||
|  |         return (old_bpp * value) / bpp; | ||||||
|  |     }; | ||||||
|  |     const u32 base_x = convert(dst.params.origin.x.Value()); | ||||||
|  |     const u32 base_y = dst.params.origin.y.Value(); | ||||||
|  |     const u32 length_x = convert(copy_info.length_x); | ||||||
|  |     const u32 length_y = copy_info.length_y; | ||||||
|  |     VkOffset3D image_offset{ | ||||||
|  |         .x = static_cast<s32>(base_x), | ||||||
|  |         .y = static_cast<s32>(base_y), | ||||||
|  |         .z = 0, | ||||||
|  |     }; | ||||||
|  |     VkExtent3D image_extent{ | ||||||
|  |         .width = length_x, | ||||||
|  |         .height = length_y, | ||||||
|  |         .depth = 1, | ||||||
|  |     }; | ||||||
|  |     auto buff_info(src); | ||||||
|  |     buff_info.pitch = convert(src.pitch); | ||||||
|  |     scheduler.RequestOutsideRenderPassOperationContext(); | ||||||
|  |     scheduler.Record([dst_image = image->Handle(), src_buffer = buffer->Handle(), | ||||||
|  |                       buffer_offset = offset, subresources, image_offset, image_extent, | ||||||
|  |                       buff_info](vk::CommandBuffer cmdbuf) { | ||||||
|  |         const std::array buffer_copy_info{ | ||||||
|  |             VkBufferImageCopy{ | ||||||
|  |                 .bufferOffset = buffer_offset, | ||||||
|  |                 .bufferRowLength = buff_info.pitch, | ||||||
|  |                 .bufferImageHeight = buff_info.height, | ||||||
|  |                 .imageSubresource = subresources, | ||||||
|  |                 .imageOffset = image_offset, | ||||||
|  |                 .imageExtent = image_extent, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         const VkImageSubresourceRange range{ | ||||||
|  |             .aspectMask = subresources.aspectMask, | ||||||
|  |             .baseMipLevel = subresources.mipLevel, | ||||||
|  |             .levelCount = 1, | ||||||
|  |             .baseArrayLayer = subresources.baseArrayLayer, | ||||||
|  |             .layerCount = 1, | ||||||
|  |         }; | ||||||
|  |         static constexpr VkMemoryBarrier READ_BARRIER{ | ||||||
|  |             .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, | ||||||
|  |             .pNext = nullptr, | ||||||
|  |             .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, | ||||||
|  |             .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT | VK_ACCESS_TRANSFER_WRITE_BIT, | ||||||
|  |         }; | ||||||
|  |         const std::array pre_barriers{ | ||||||
|  |             VkImageMemoryBarrier{ | ||||||
|  |                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||||
|  |                 .pNext = nullptr, | ||||||
|  |                 .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | | ||||||
|  |                                  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | | ||||||
|  |                                  VK_ACCESS_TRANSFER_WRITE_BIT, | ||||||
|  |                 .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, | ||||||
|  |                 .oldLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |                 .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||||||
|  |                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .image = dst_image, | ||||||
|  |                 .subresourceRange = range, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         const std::array post_barriers{ | ||||||
|  |             VkImageMemoryBarrier{ | ||||||
|  |                 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, | ||||||
|  |                 .pNext = nullptr, | ||||||
|  |                 .srcAccessMask = 0, | ||||||
|  |                 .dstAccessMask = 0, | ||||||
|  |                 .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, | ||||||
|  |                 .newLayout = VK_IMAGE_LAYOUT_GENERAL, | ||||||
|  |                 .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, | ||||||
|  |                 .image = dst_image, | ||||||
|  |                 .subresourceRange = range, | ||||||
|  |             }, | ||||||
|  |         }; | ||||||
|  |         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, | ||||||
|  |                                0, READ_BARRIER, {}, pre_barriers); | ||||||
|  |         cmdbuf.CopyBufferToImage(src_buffer, dst_image, VK_IMAGE_LAYOUT_GENERAL, buffer_copy_info); | ||||||
|  |         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, | ||||||
|  |                                0, nullptr, nullptr, post_barriers); | ||||||
|  |     }); | ||||||
|  |     if (is_rescaled) { | ||||||
|  |         image->ScaleUp(); | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void RasterizerVulkan::UpdateDynamicStates() { | void RasterizerVulkan::UpdateDynamicStates() { | ||||||
|     auto& regs = maxwell3d->regs; |     auto& regs = maxwell3d->regs; | ||||||
|     UpdateViewportsState(regs); |     UpdateViewportsState(regs); | ||||||
|  |  | ||||||
|  | @ -45,14 +45,23 @@ class StateTracker; | ||||||
| 
 | 
 | ||||||
| class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { | class AccelerateDMA : public Tegra::Engines::AccelerateDMAInterface { | ||||||
| public: | public: | ||||||
|     explicit AccelerateDMA(BufferCache& buffer_cache); |     explicit AccelerateDMA(BufferCache& buffer_cache, TextureCache& texture_cache, | ||||||
|  |                            Scheduler& scheduler); | ||||||
| 
 | 
 | ||||||
|     bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; |     bool BufferCopy(GPUVAddr start_address, GPUVAddr end_address, u64 amount) override; | ||||||
| 
 | 
 | ||||||
|     bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; |     bool BufferClear(GPUVAddr src_address, u64 amount, u32 value) override; | ||||||
| 
 | 
 | ||||||
|  |     bool ImageToBuffer(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::ImageOperand& src, | ||||||
|  |                        const Tegra::DMA::BufferOperand& dst) override; | ||||||
|  | 
 | ||||||
|  |     bool BufferToImage(const Tegra::DMA::ImageCopy& copy_info, const Tegra::DMA::BufferOperand& src, | ||||||
|  |                        const Tegra::DMA::ImageOperand& dst) override; | ||||||
|  | 
 | ||||||
| private: | private: | ||||||
|     BufferCache& buffer_cache; |     BufferCache& buffer_cache; | ||||||
|  |     TextureCache& texture_cache; | ||||||
|  |     Scheduler& scheduler; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, | class RasterizerVulkan final : public VideoCore::RasterizerAccelerated, | ||||||
|  |  | ||||||
|  | @ -864,13 +864,19 @@ void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, | ||||||
|     const VkImageAspectFlags src_aspect_mask = src.AspectMask(); |     const VkImageAspectFlags src_aspect_mask = src.AspectMask(); | ||||||
|     const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); |     const VkImageAspectFlags dst_aspect_mask = dst.AspectMask(); | ||||||
| 
 | 
 | ||||||
|     std::ranges::transform(copies, vk_in_copies.begin(), [src_aspect_mask](const auto& copy) { |     const auto bpp_in = BytesPerBlock(src.info.format) / DefaultBlockWidth(src.info.format); | ||||||
|         return MakeBufferImageCopy(copy, true, src_aspect_mask); |     const auto bpp_out = BytesPerBlock(dst.info.format) / DefaultBlockWidth(dst.info.format); | ||||||
|  |     std::ranges::transform(copies, vk_in_copies.begin(), | ||||||
|  |                            [src_aspect_mask, bpp_in, bpp_out](const auto& copy) { | ||||||
|  |                                auto copy2 = copy; | ||||||
|  |                                copy2.src_offset.x = (bpp_out * copy.src_offset.x) / bpp_in; | ||||||
|  |                                copy2.extent.width = (bpp_out * copy.extent.width) / bpp_in; | ||||||
|  |                                return MakeBufferImageCopy(copy2, true, src_aspect_mask); | ||||||
|                            }); |                            }); | ||||||
|     std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { |     std::ranges::transform(copies, vk_out_copies.begin(), [dst_aspect_mask](const auto& copy) { | ||||||
|         return MakeBufferImageCopy(copy, false, dst_aspect_mask); |         return MakeBufferImageCopy(copy, false, dst_aspect_mask); | ||||||
|     }); |     }); | ||||||
|     const u32 img_bpp = BytesPerBlock(src.info.format); |     const u32 img_bpp = BytesPerBlock(dst.info.format); | ||||||
|     size_t total_size = 0; |     size_t total_size = 0; | ||||||
|     for (const auto& copy : copies) { |     for (const auto& copy : copies) { | ||||||
|         total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; |         total_size += copy.extent.width * copy.extent.height * copy.extent.depth * img_bpp; | ||||||
|  |  | ||||||
|  | @ -216,10 +216,51 @@ ImageInfo::ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept { | ||||||
|             .height = config.height, |             .height = config.height, | ||||||
|             .depth = 1, |             .depth = 1, | ||||||
|         }; |         }; | ||||||
|         rescaleable = block.depth == 0; |         rescaleable = block.depth == 0 && size.height > 256; | ||||||
|         rescaleable &= size.height > 256; |  | ||||||
|         downscaleable = size.height > 512; |         downscaleable = size.height > 512; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | static PixelFormat ByteSizeToFormat(u32 bytes_per_pixel) { | ||||||
|  |     switch (bytes_per_pixel) { | ||||||
|  |     case 1: | ||||||
|  |         return PixelFormat::R8_UINT; | ||||||
|  |     case 2: | ||||||
|  |         return PixelFormat::R8G8_UINT; | ||||||
|  |     case 4: | ||||||
|  |         return PixelFormat::A8B8G8R8_UINT; | ||||||
|  |     case 8: | ||||||
|  |         return PixelFormat::R16G16B16A16_UINT; | ||||||
|  |     case 16: | ||||||
|  |         return PixelFormat::R32G32B32A32_UINT; | ||||||
|  |     default: | ||||||
|  |         UNIMPLEMENTED(); | ||||||
|  |         return PixelFormat::Invalid; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | ImageInfo::ImageInfo(const Tegra::DMA::ImageOperand& config) noexcept { | ||||||
|  |     const u32 bytes_per_pixel = config.bytes_per_pixel; | ||||||
|  |     format = ByteSizeToFormat(bytes_per_pixel); | ||||||
|  |     type = config.params.block_size.depth > 0 ? ImageType::e3D : ImageType::e2D; | ||||||
|  |     num_samples = 1; | ||||||
|  |     block = Extent3D{ | ||||||
|  |         .width = config.params.block_size.width, | ||||||
|  |         .height = config.params.block_size.height, | ||||||
|  |         .depth = config.params.block_size.depth, | ||||||
|  |     }; | ||||||
|  |     size = Extent3D{ | ||||||
|  |         .width = config.params.width, | ||||||
|  |         .height = config.params.height, | ||||||
|  |         .depth = config.params.depth, | ||||||
|  |     }; | ||||||
|  |     tile_width_spacing = 0; | ||||||
|  |     resources.levels = 1; | ||||||
|  |     resources.layers = 1; | ||||||
|  |     layer_stride = CalculateLayerStride(*this); | ||||||
|  |     maybe_unaligned_layer_stride = CalculateLayerSize(*this); | ||||||
|  |     rescaleable = block.depth == 0 && size.height > 256; | ||||||
|  |     downscaleable = size.height > 512; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace VideoCommon
 | } // namespace VideoCommon
 | ||||||
|  |  | ||||||
|  | @ -5,6 +5,7 @@ | ||||||
| 
 | 
 | ||||||
| #include "video_core/engines/fermi_2d.h" | #include "video_core/engines/fermi_2d.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
|  | #include "video_core/engines/maxwell_dma.h" | ||||||
| #include "video_core/surface.h" | #include "video_core/surface.h" | ||||||
| #include "video_core/texture_cache/types.h" | #include "video_core/texture_cache/types.h" | ||||||
| 
 | 
 | ||||||
|  | @ -19,6 +20,7 @@ struct ImageInfo { | ||||||
|     explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; |     explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs, size_t index) noexcept; | ||||||
|     explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; |     explicit ImageInfo(const Tegra::Engines::Maxwell3D::Regs& regs) noexcept; | ||||||
|     explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; |     explicit ImageInfo(const Tegra::Engines::Fermi2D::Surface& config) noexcept; | ||||||
|  |     explicit ImageInfo(const Tegra::DMA::ImageOperand& config) noexcept; | ||||||
| 
 | 
 | ||||||
|     PixelFormat format = PixelFormat::Invalid; |     PixelFormat format = PixelFormat::Invalid; | ||||||
|     ImageType type = ImageType::e1D; |     ImageType type = ImageType::e1D; | ||||||
|  |  | ||||||
|  | @ -1358,6 +1358,75 @@ std::optional<typename TextureCache<P>::BlitImages> TextureCache<P>::GetBlitImag | ||||||
|     }}; |     }}; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | template <class P> | ||||||
|  | ImageId TextureCache<P>::FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr) { | ||||||
|  |     std::optional<VAddr> cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr); | ||||||
|  |     if (!cpu_addr) { | ||||||
|  |         cpu_addr = gpu_memory->GpuToCpuAddress(gpu_addr, CalculateGuestSizeInBytes(info)); | ||||||
|  |         if (!cpu_addr) { | ||||||
|  |             return ImageId{}; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     ImageId image_id{}; | ||||||
|  |     boost::container::small_vector<ImageId, 1> image_ids; | ||||||
|  |     const auto lambda = [&](ImageId existing_image_id, ImageBase& existing_image) { | ||||||
|  |         if (True(existing_image.flags & ImageFlagBits::Remapped)) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |         if (info.type == ImageType::Linear || existing_image.info.type == ImageType::Linear) | ||||||
|  |             [[unlikely]] { | ||||||
|  |             const bool strict_size = True(existing_image.flags & ImageFlagBits::Strong); | ||||||
|  |             const ImageInfo& existing = existing_image.info; | ||||||
|  |             if (existing_image.gpu_addr == gpu_addr && existing.type == info.type && | ||||||
|  |                 existing.pitch == info.pitch && | ||||||
|  |                 IsPitchLinearSameSize(existing, info, strict_size) && | ||||||
|  |                 IsViewCompatible(existing.format, info.format, false, true)) { | ||||||
|  |                 image_id = existing_image_id; | ||||||
|  |                 image_ids.push_back(existing_image_id); | ||||||
|  |                 return true; | ||||||
|  |             } | ||||||
|  |         } else if (IsSubCopy(info, existing_image, gpu_addr)) { | ||||||
|  |             image_id = existing_image_id; | ||||||
|  |             image_ids.push_back(existing_image_id); | ||||||
|  |             return true; | ||||||
|  |         } | ||||||
|  |         return false; | ||||||
|  |     }; | ||||||
|  |     ForEachImageInRegion(*cpu_addr, CalculateGuestSizeInBytes(info), lambda); | ||||||
|  |     if (image_ids.size() <= 1) [[likely]] { | ||||||
|  |         return image_id; | ||||||
|  |     } | ||||||
|  |     auto image_ids_compare = [this](ImageId a, ImageId b) { | ||||||
|  |         auto& image_a = slot_images[a]; | ||||||
|  |         auto& image_b = slot_images[b]; | ||||||
|  |         return image_a.modification_tick < image_b.modification_tick; | ||||||
|  |     }; | ||||||
|  |     return *std::ranges::max_element(image_ids, image_ids_compare); | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | template <class P> | ||||||
|  | std::optional<std::pair<typename TextureCache<P>::Image*, std::pair<u32, u32>>> | ||||||
|  | TextureCache<P>::ObtainImage(const Tegra::DMA::ImageOperand& operand, bool mark_as_modified) { | ||||||
|  |     ImageInfo dst_info(operand); | ||||||
|  |     ImageId dst_id = FindDMAImage(dst_info, operand.address); | ||||||
|  |     if (!dst_id) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     auto& image = slot_images[dst_id]; | ||||||
|  |     auto base = image.TryFindBase(operand.address); | ||||||
|  |     if (!base) { | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     if (False(image.flags & ImageFlagBits::GpuModified)) { | ||||||
|  |         // No need to waste time on an image that's synced with guest
 | ||||||
|  |         return std::nullopt; | ||||||
|  |     } | ||||||
|  |     PrepareImage(dst_id, mark_as_modified, false); | ||||||
|  |     auto& new_image = slot_images[dst_id]; | ||||||
|  |     lru_cache.Touch(new_image.lru_index, frame_tick); | ||||||
|  |     return std::make_pair(&new_image, std::make_pair(base->level, base->layer)); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| template <class P> | template <class P> | ||||||
| SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | SamplerId TextureCache<P>::FindSampler(const TSCEntry& config) { | ||||||
|     if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { |     if (std::ranges::all_of(config.raw, [](u64 value) { return value == 0; })) { | ||||||
|  |  | ||||||
|  | @ -209,6 +209,9 @@ public: | ||||||
|     /// Pop asynchronous downloads
 |     /// Pop asynchronous downloads
 | ||||||
|     void PopAsyncFlushes(); |     void PopAsyncFlushes(); | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] std::optional<std::pair<Image*, std::pair<u32, u32>>> ObtainImage( | ||||||
|  |         const Tegra::DMA::ImageOperand& operand, bool mark_as_modified); | ||||||
|  | 
 | ||||||
|     /// Return true when a CPU region is modified from the GPU
 |     /// Return true when a CPU region is modified from the GPU
 | ||||||
|     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); |     [[nodiscard]] bool IsRegionGpuModified(VAddr addr, size_t size); | ||||||
| 
 | 
 | ||||||
|  | @ -300,6 +303,8 @@ private: | ||||||
|     /// Remove joined images from the cache
 |     /// Remove joined images from the cache
 | ||||||
|     [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); |     [[nodiscard]] ImageId JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); | ||||||
| 
 | 
 | ||||||
|  |     [[nodiscard]] ImageId FindDMAImage(const ImageInfo& info, GPUVAddr gpu_addr); | ||||||
|  | 
 | ||||||
|     /// Return a blit image pair from the given guest blit parameters
 |     /// Return a blit image pair from the given guest blit parameters
 | ||||||
|     [[nodiscard]] std::optional<BlitImages> GetBlitImages( |     [[nodiscard]] std::optional<BlitImages> GetBlitImages( | ||||||
|         const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, |         const Tegra::Engines::Fermi2D::Surface& dst, const Tegra::Engines::Fermi2D::Surface& src, | ||||||
|  |  | ||||||
|  | @ -54,6 +54,7 @@ enum class RelaxedOptions : u32 { | ||||||
|     Format = 1 << 1, |     Format = 1 << 1, | ||||||
|     Samples = 1 << 2, |     Samples = 1 << 2, | ||||||
|     ForceBrokenViews = 1 << 3, |     ForceBrokenViews = 1 << 3, | ||||||
|  |     FormatBpp = 1 << 4, | ||||||
| }; | }; | ||||||
| DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) | DECLARE_ENUM_FLAG_OPERATORS(RelaxedOptions) | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -743,6 +743,44 @@ std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageIn | ||||||
|     return copies; |     return copies; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, u32 up_scale, | ||||||
|  |                                                   u32 down_shift) { | ||||||
|  |     std::vector<ImageCopy> copies; | ||||||
|  |     copies.reserve(src.resources.levels); | ||||||
|  |     const bool is_3d = src.type == ImageType::e3D; | ||||||
|  |     for (s32 level = 0; level < src.resources.levels; ++level) { | ||||||
|  |         ImageCopy& copy = copies.emplace_back(); | ||||||
|  |         copy.src_subresource = SubresourceLayers{ | ||||||
|  |             .base_level = level, | ||||||
|  |             .base_layer = 0, | ||||||
|  |             .num_layers = src.resources.layers, | ||||||
|  |         }; | ||||||
|  |         copy.dst_subresource = SubresourceLayers{ | ||||||
|  |             .base_level = level, | ||||||
|  |             .base_layer = 0, | ||||||
|  |             .num_layers = src.resources.layers, | ||||||
|  |         }; | ||||||
|  |         copy.src_offset = Offset3D{ | ||||||
|  |             .x = 0, | ||||||
|  |             .y = 0, | ||||||
|  |             .z = 0, | ||||||
|  |         }; | ||||||
|  |         copy.dst_offset = Offset3D{ | ||||||
|  |             .x = 0, | ||||||
|  |             .y = 0, | ||||||
|  |             .z = 0, | ||||||
|  |         }; | ||||||
|  |         const Extent3D mip_size = AdjustMipSize(src.size, level); | ||||||
|  |         copy.extent = AdjustSamplesSize(mip_size, src.num_samples); | ||||||
|  |         if (is_3d) { | ||||||
|  |             copy.extent.depth = src.size.depth; | ||||||
|  |         } | ||||||
|  |         copy.extent.width = std::max<u32>((copy.extent.width * up_scale) >> down_shift, 1); | ||||||
|  |         copy.extent.height = std::max<u32>((copy.extent.height * up_scale) >> down_shift, 1); | ||||||
|  |     } | ||||||
|  |     return copies; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config) { | ||||||
|     const GPUVAddr address = config.Address(); |     const GPUVAddr address = config.Address(); | ||||||
|     if (address == 0) { |     if (address == 0) { | ||||||
|  | @ -999,6 +1037,20 @@ bool IsBlockLinearSizeCompatible(const ImageInfo& lhs, const ImageInfo& rhs, u32 | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool IsBlockLinearSizeCompatibleBPPRelaxed(const ImageInfo& lhs, const ImageInfo& rhs, | ||||||
|  |                                            u32 lhs_level, u32 rhs_level) noexcept { | ||||||
|  |     ASSERT(lhs.type != ImageType::Linear); | ||||||
|  |     ASSERT(rhs.type != ImageType::Linear); | ||||||
|  |     const auto lhs_bpp = BytesPerBlock(lhs.format); | ||||||
|  |     const auto rhs_bpp = BytesPerBlock(rhs.format); | ||||||
|  |     const Extent3D lhs_size = AdjustMipSize(lhs.size, lhs_level); | ||||||
|  |     const Extent3D rhs_size = AdjustMipSize(rhs.size, rhs_level); | ||||||
|  |     return Common::AlignUpLog2(lhs_size.width * lhs_bpp, GOB_SIZE_X_SHIFT) == | ||||||
|  |                Common::AlignUpLog2(rhs_size.width * rhs_bpp, GOB_SIZE_X_SHIFT) && | ||||||
|  |            Common::AlignUpLog2(lhs_size.height, GOB_SIZE_Y_SHIFT) == | ||||||
|  |                Common::AlignUpLog2(rhs_size.height, GOB_SIZE_Y_SHIFT); | ||||||
|  | } | ||||||
|  | 
 | ||||||
| bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { | bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, bool strict_size) noexcept { | ||||||
|     ASSERT(lhs.type == ImageType::Linear); |     ASSERT(lhs.type == ImageType::Linear); | ||||||
|     ASSERT(rhs.type == ImageType::Linear); |     ASSERT(rhs.type == ImageType::Linear); | ||||||
|  | @ -1073,7 +1125,8 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const | ||||||
|         // Format checking is relaxed, but we still have to check for matching bytes per block.
 |         // Format checking is relaxed, but we still have to check for matching bytes per block.
 | ||||||
|         // This avoids creating a view for blits on UE4 titles where formats with different bytes
 |         // This avoids creating a view for blits on UE4 titles where formats with different bytes
 | ||||||
|         // per block are aliased.
 |         // per block are aliased.
 | ||||||
|         if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format)) { |         if (BytesPerBlock(existing.format) != BytesPerBlock(candidate.format) && | ||||||
|  |             False(options & RelaxedOptions::FormatBpp)) { | ||||||
|             return std::nullopt; |             return std::nullopt; | ||||||
|         } |         } | ||||||
|     } else { |     } else { | ||||||
|  | @ -1088,11 +1141,9 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const | ||||||
|     if (existing.type != candidate.type) { |     if (existing.type != candidate.type) { | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|     if (False(options & RelaxedOptions::Samples)) { |     if (False(options & RelaxedOptions::Samples) && existing.num_samples != candidate.num_samples) { | ||||||
|         if (existing.num_samples != candidate.num_samples) { |  | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|     } |  | ||||||
|     if (existing.resources.levels < candidate.resources.levels + base->level) { |     if (existing.resources.levels < candidate.resources.levels + base->level) { | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|  | @ -1101,14 +1152,16 @@ std::optional<SubresourceBase> FindSubresource(const ImageInfo& candidate, const | ||||||
|         if (mip_depth < candidate.size.depth + base->layer) { |         if (mip_depth < candidate.size.depth + base->layer) { | ||||||
|             return std::nullopt; |             return std::nullopt; | ||||||
|         } |         } | ||||||
|     } else { |     } else if (existing.resources.layers < candidate.resources.layers + base->layer) { | ||||||
|         if (existing.resources.layers < candidate.resources.layers + base->layer) { |  | ||||||
|         return std::nullopt; |         return std::nullopt; | ||||||
|     } |     } | ||||||
|     } |  | ||||||
|     const bool strict_size = False(options & RelaxedOptions::Size); |     const bool strict_size = False(options & RelaxedOptions::Size); | ||||||
|     if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { |     if (!IsBlockLinearSizeCompatible(existing, candidate, base->level, 0, strict_size)) { | ||||||
|  |         if (False(options & RelaxedOptions::FormatBpp)) { | ||||||
|             return std::nullopt; |             return std::nullopt; | ||||||
|  |         } else if (!IsBlockLinearSizeCompatibleBPPRelaxed(existing, candidate, base->level, 0)) { | ||||||
|  |             return std::nullopt; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|     // TODO: compare block sizes
 |     // TODO: compare block sizes
 | ||||||
|     return base; |     return base; | ||||||
|  | @ -1120,6 +1173,31 @@ bool IsSubresource(const ImageInfo& candidate, const ImageBase& image, GPUVAddr | ||||||
|         .has_value(); |         .has_value(); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | bool IsSubCopy(const ImageInfo& candidate, const ImageBase& image, GPUVAddr candidate_addr) { | ||||||
|  |     const std::optional<SubresourceBase> base = image.TryFindBase(candidate_addr); | ||||||
|  |     if (!base) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     const ImageInfo& existing = image.info; | ||||||
|  |     if (existing.resources.levels < candidate.resources.levels + base->level) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     if (existing.type == ImageType::e3D) { | ||||||
|  |         const u32 mip_depth = std::max(1U, existing.size.depth << base->level); | ||||||
|  |         if (mip_depth < candidate.size.depth + base->layer) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } else { | ||||||
|  |         if (existing.resources.layers < candidate.resources.layers + base->layer) { | ||||||
|  |             return false; | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     if (!IsBlockLinearSizeCompatibleBPPRelaxed(existing, candidate, base->level, 0)) { | ||||||
|  |         return false; | ||||||
|  |     } | ||||||
|  |     return true; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||||||
|                       const ImageBase* src) { |                       const ImageBase* src) { | ||||||
|     const auto original_dst_format = dst_info.format; |     const auto original_dst_format = dst_info.format; | ||||||
|  |  | ||||||
|  | @ -56,6 +56,10 @@ struct OverlapResult { | ||||||
|                                                            SubresourceBase base, u32 up_scale = 1, |                                                            SubresourceBase base, u32 up_scale = 1, | ||||||
|                                                            u32 down_shift = 0); |                                                            u32 down_shift = 0); | ||||||
| 
 | 
 | ||||||
|  | [[nodiscard]] std::vector<ImageCopy> MakeReinterpretImageCopies(const ImageInfo& src, | ||||||
|  |                                                                 u32 up_scale = 1, | ||||||
|  |                                                                 u32 down_shift = 0); | ||||||
|  | 
 | ||||||
| [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | [[nodiscard]] bool IsValidEntry(const Tegra::MemoryManager& gpu_memory, const TICEntry& config); | ||||||
| 
 | 
 | ||||||
| [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | [[nodiscard]] std::vector<BufferImageCopy> UnswizzleImage(Tegra::MemoryManager& gpu_memory, | ||||||
|  | @ -88,6 +92,9 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima | ||||||
| [[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, | [[nodiscard]] bool IsPitchLinearSameSize(const ImageInfo& lhs, const ImageInfo& rhs, | ||||||
|                                          bool strict_size) noexcept; |                                          bool strict_size) noexcept; | ||||||
| 
 | 
 | ||||||
|  | [[nodiscard]] bool IsBlockLinearSizeCompatibleBPPRelaxed(const ImageInfo& lhs, const ImageInfo& rhs, | ||||||
|  |                                                          u32 lhs_level, u32 rhs_level) noexcept; | ||||||
|  | 
 | ||||||
| [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, | [[nodiscard]] std::optional<OverlapResult> ResolveOverlap(const ImageInfo& new_info, | ||||||
|                                                           GPUVAddr gpu_addr, VAddr cpu_addr, |                                                           GPUVAddr gpu_addr, VAddr cpu_addr, | ||||||
|                                                           const ImageBase& overlap, |                                                           const ImageBase& overlap, | ||||||
|  | @ -106,6 +113,9 @@ void SwizzleImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr, const Ima | ||||||
|                                  GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views, |                                  GPUVAddr candidate_addr, RelaxedOptions options, bool broken_views, | ||||||
|                                  bool native_bgr); |                                  bool native_bgr); | ||||||
| 
 | 
 | ||||||
|  | [[nodiscard]] bool IsSubCopy(const ImageInfo& candidate, const ImageBase& image, | ||||||
|  |                              GPUVAddr candidate_addr); | ||||||
|  | 
 | ||||||
| void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | void DeduceBlitImages(ImageInfo& dst_info, ImageInfo& src_info, const ImageBase* dst, | ||||||
|                       const ImageBase* src); |                       const ImageBase* src); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
				Fernando Sahmkow