forked from eden-emu/eden
		
	MaxwellDMA: Optimize micro copies.
This commit is contained in:
		
							parent
							
								
									b87422a86f
								
							
						
					
					
						commit
						37c690576f
					
				
					 3 changed files with 57 additions and 0 deletions
				
			
		|  | @ -90,7 +90,47 @@ void MaxwellDMA::HandleCopy() { | ||||||
|     ASSERT(regs.exec.enable_2d == 1); |     ASSERT(regs.exec.enable_2d == 1); | ||||||
| 
 | 
 | ||||||
|     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { |     if (regs.exec.is_dst_linear && !regs.exec.is_src_linear) { | ||||||
|  | 
 | ||||||
|         ASSERT(regs.src_params.BlockDepth() == 0); |         ASSERT(regs.src_params.BlockDepth() == 0); | ||||||
|  |         // Optimized path for micro copies.
 | ||||||
|  |         if (regs.dst_pitch * regs.y_count < Texture::GetGOBSize() && regs.dst_pitch <= 64) { | ||||||
|  |             const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | ||||||
|  |             const std::size_t src_size = Texture::GetGOBSize(); | ||||||
|  |             const std::size_t dst_size = regs.dst_pitch * regs.y_count; | ||||||
|  |             u32 pos_x = regs.src_params.pos_x; | ||||||
|  |             u32 pos_y = regs.src_params.pos_y; | ||||||
|  |             const u64 offset = | ||||||
|  |                 Texture::GetGOBOffset(regs.src_params.size_x, regs.src_params.size_y, pos_x, pos_y, | ||||||
|  |                                       regs.src_params.BlockDepth(), bytes_per_pixel); | ||||||
|  |             const u32 x_in_gob = 64 / bytes_per_pixel; | ||||||
|  |             pos_x = pos_x % x_in_gob; | ||||||
|  |             pos_y = pos_y % 8; | ||||||
|  | 
 | ||||||
|  |             if (read_buffer.size() < src_size) { | ||||||
|  |                 read_buffer.resize(src_size); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if (write_buffer.size() < dst_size) { | ||||||
|  |                 write_buffer.resize(dst_size); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             if (Settings::IsGPULevelExtreme()) { | ||||||
|  |                 memory_manager.ReadBlock(source + offset, read_buffer.data(), src_size); | ||||||
|  |                 memory_manager.ReadBlock(dest, write_buffer.data(), dst_size); | ||||||
|  |             } else { | ||||||
|  |                 memory_manager.ReadBlockUnsafe(source + offset, read_buffer.data(), src_size); | ||||||
|  |                 memory_manager.ReadBlockUnsafe(dest, write_buffer.data(), dst_size); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             Texture::UnswizzleSubrect(regs.x_count, regs.y_count, regs.dst_pitch, | ||||||
|  |                                       regs.src_params.size_x, bytes_per_pixel, read_buffer.data(), | ||||||
|  |                                       write_buffer.data(), regs.src_params.BlockHeight(), pos_x, | ||||||
|  |                                       pos_y); | ||||||
|  | 
 | ||||||
|  |             memory_manager.WriteBlock(dest, write_buffer.data(), dst_size); | ||||||
|  | 
 | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|         // If the input is tiled and the output is linear, deswizzle the input and copy it over.
 |         // If the input is tiled and the output is linear, deswizzle the input and copy it over.
 | ||||||
|         const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; |         const u32 bytes_per_pixel = regs.dst_pitch / regs.x_count; | ||||||
|         const std::size_t src_size = Texture::CalculateSize( |         const std::size_t src_size = Texture::CalculateSize( | ||||||
|  |  | ||||||
|  | @ -382,4 +382,18 @@ std::size_t CalculateSize(bool tiled, u32 bytes_per_pixel, u32 width, u32 height | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||||||
|  |                  u32 bytes_per_pixel) { | ||||||
|  |     auto div_ceil = [](const u32 x, const u32 y) { return ((x + y - 1) / y); }; | ||||||
|  |     const u32 gobs_in_block = 1 << block_height; | ||||||
|  |     const u32 y_blocks = gob_size_y << block_height; | ||||||
|  |     const u32 x_per_gob = gob_size_x / bytes_per_pixel; | ||||||
|  |     const u32 x_blocks = div_ceil(width, x_per_gob); | ||||||
|  |     const u32 block_size = gob_size * gobs_in_block; | ||||||
|  |     const u32 stride = block_size * x_blocks; | ||||||
|  |     const u32 base = (dst_y / y_blocks) * stride + (dst_x / x_per_gob) * block_size; | ||||||
|  |     const u32 relative_y = dst_y % y_blocks; | ||||||
|  |     return base + (relative_y / gob_size_y) * gob_size; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| } // namespace Tegra::Texture
 | } // namespace Tegra::Texture
 | ||||||
|  |  | ||||||
|  | @ -59,4 +59,7 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | ||||||
| void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | void SwizzleKepler(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||||||
|                    std::size_t copy_size, const u8* source_data, u8* swizzle_data); |                    std::size_t copy_size, const u8* source_data, u8* swizzle_data); | ||||||
| 
 | 
 | ||||||
|  | u64 GetGOBOffset(u32 width, u32 height, u32 dst_x, u32 dst_y, u32 block_height, | ||||||
|  |                  u32 bytes_per_pixel); | ||||||
|  | 
 | ||||||
| } // namespace Tegra::Texture
 | } // namespace Tegra::Texture
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow