forked from eden-emu/eden
		
	Merge pull request #2400 from FernandoS27/corret-kepler-mem
Implement Kepler Memory on both Linear and BlockLinear.
This commit is contained in:
		
						commit
						1bf201c307
					
				
					 4 changed files with 81 additions and 17 deletions
				
			
		|  | @ -10,6 +10,7 @@ | |||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_base.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
| 
 | ||||
| namespace Tegra::Engines { | ||||
| 
 | ||||
|  | @ -27,30 +28,46 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 
 | ||||
|     switch (method_call.method) { | ||||
|     case KEPLERMEMORY_REG_INDEX(exec): { | ||||
|         state.write_offset = 0; | ||||
|         ProcessExec(); | ||||
|         break; | ||||
|     } | ||||
|     case KEPLERMEMORY_REG_INDEX(data): { | ||||
|         ProcessData(method_call.argument); | ||||
|         ProcessData(method_call.argument, method_call.IsLastCall()); | ||||
|         break; | ||||
|     } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void KeplerMemory::ProcessData(u32 data) { | ||||
|     ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported"); | ||||
|     ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0); | ||||
| 
 | ||||
|     // We have to invalidate the destination region to evict any outdated surfaces from the cache.
 | ||||
|     // We do this before actually writing the new data because the destination address might
 | ||||
|     // contain a dirty surface that will have to be written back to memory.
 | ||||
|     const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)}; | ||||
|     rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32)); | ||||
|     memory_manager.Write<u32>(address, data); | ||||
| void KeplerMemory::ProcessExec() { | ||||
|     state.write_offset = 0; | ||||
|     state.copy_size = regs.line_length_in * regs.line_count; | ||||
|     state.inner_buffer.resize(state.copy_size); | ||||
| } | ||||
| 
 | ||||
| void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | ||||
|     const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||||
|     std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); | ||||
|     state.write_offset += sub_copy_size; | ||||
|     if (is_last_call) { | ||||
|         const GPUVAddr address{regs.dest.Address()}; | ||||
|         if (regs.exec.linear != 0) { | ||||
|             memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); | ||||
|         } else { | ||||
|             UNIMPLEMENTED_IF(regs.dest.z != 0); | ||||
|             UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||||
|             UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||||
|             UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||||
|             const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||||
|                 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||||
|             std::vector<u8> tmp_buffer(dst_size); | ||||
|             memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||||
|             Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||||
|                                           regs.dest.y, regs.dest.BlockHeight(), state.copy_size, | ||||
|                                           state.inner_buffer.data(), tmp_buffer.data()); | ||||
|             memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||||
|         } | ||||
|         system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||
| 
 | ||||
|     state.write_offset++; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace Tegra::Engines
 | ||||
|  |  | |||
|  | @ -6,6 +6,7 @@ | |||
| 
 | ||||
| #include <array> | ||||
| #include <cstddef> | ||||
| #include <vector> | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
|  | @ -51,7 +52,11 @@ public: | |||
|                     u32 address_high; | ||||
|                     u32 address_low; | ||||
|                     u32 pitch; | ||||
|                     u32 block_dimensions; | ||||
|                     union { | ||||
|                         BitField<0, 4, u32> block_width; | ||||
|                         BitField<4, 4, u32> block_height; | ||||
|                         BitField<8, 4, u32> block_depth; | ||||
|                     }; | ||||
|                     u32 width; | ||||
|                     u32 height; | ||||
|                     u32 depth; | ||||
|  | @ -63,6 +68,18 @@ public: | |||
|                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||||
|                                                      address_low); | ||||
|                     } | ||||
| 
 | ||||
|                     u32 BlockWidth() const { | ||||
|                         return 1U << block_width.Value(); | ||||
|                     } | ||||
| 
 | ||||
|                     u32 BlockHeight() const { | ||||
|                         return 1U << block_height.Value(); | ||||
|                     } | ||||
| 
 | ||||
|                     u32 BlockDepth() const { | ||||
|                         return 1U << block_depth.Value(); | ||||
|                     } | ||||
|                 } dest; | ||||
| 
 | ||||
|                 struct { | ||||
|  | @ -81,6 +98,8 @@ public: | |||
| 
 | ||||
|     struct { | ||||
|         u32 write_offset = 0; | ||||
|         u32 copy_size = 0; | ||||
|         std::vector<u8> inner_buffer; | ||||
|     } state{}; | ||||
| 
 | ||||
| private: | ||||
|  | @ -88,7 +107,8 @@ private: | |||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     MemoryManager& memory_manager; | ||||
| 
 | ||||
|     void ProcessData(u32 data); | ||||
|     void ProcessExec(); | ||||
|     void ProcessData(u32 data, bool is_last_call); | ||||
| }; | ||||
| 
 | ||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||
|  |  | |||
|  | @ -288,6 +288,29 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | ||||
|                    const u32 block_height, const std::size_t copy_size, const u8* source_data, | ||||
|                    u8* swizzle_data) { | ||||
|     const u32 image_width_in_gobs{(width + gob_size_x - 1) / gob_size_x}; | ||||
|     std::size_t count = 0; | ||||
|     for (std::size_t y = dst_y; y < height && count < copy_size; ++y) { | ||||
|         const std::size_t gob_address_y = | ||||
|             (y / (gob_size_y * block_height)) * gob_size * block_height * image_width_in_gobs + | ||||
|             ((y % (gob_size_y * block_height)) / gob_size_y) * gob_size; | ||||
|         const auto& table = legacy_swizzle_table[y % gob_size_y]; | ||||
|         for (std::size_t x = dst_x; x < width && count < copy_size; ++x) { | ||||
|             const std::size_t gob_address = | ||||
|                 gob_address_y + (x / gob_size_x) * gob_size * block_height; | ||||
|             const std::size_t swizzled_offset = gob_address + table[x % gob_size_x]; | ||||
|             const u8* source_line = source_data + count; | ||||
|             u8* dest_addr = swizzle_data + swizzled_offset; | ||||
|             count++; | ||||
| 
 | ||||
|             std::memcpy(dest_addr, source_line, 1); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| std::vector<u8> DecodeTexture(const std::vector<u8>& texture_data, TextureFormat format, u32 width, | ||||
|                               u32 height) { | ||||
|     std::vector<u8> rgba_data; | ||||
|  |  | |||
|  | @ -51,4 +51,8 @@ void UnswizzleSubrect(u32 subrect_width, u32 subrect_height, u32 dest_pitch, u32 | |||
|                       u32 bytes_per_pixel, u8* swizzled_data, u8* unswizzled_data, u32 block_height, | ||||
|                       u32 offset_x, u32 offset_y); | ||||
| 
 | ||||
| void SwizzleKepler(const u32 width, const u32 height, const u32 dst_x, const u32 dst_y, | ||||
|                    const u32 block_height, const std::size_t copy_size, const u8* source_data, | ||||
|                    u8* swizzle_data); | ||||
| 
 | ||||
| } // namespace Tegra::Texture
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei