forked from eden-emu/eden
		
	Revamp Kepler Memory to use a subegine to manage uploads
This commit is contained in:
		
							parent
							
								
									b5889cbd6f
								
							
						
					
					
						commit
						a91d3fc639
					
				
					 6 changed files with 134 additions and 93 deletions
				
			
		|  | @ -3,6 +3,8 @@ add_library(video_core STATIC | |||
|     dma_pusher.h | ||||
|     debug_utils/debug_utils.cpp | ||||
|     debug_utils/debug_utils.h | ||||
|     engines/engine_upload.cpp | ||||
|     engines/engine_upload.h | ||||
|     engines/fermi_2d.cpp | ||||
|     engines/fermi_2d.h | ||||
|     engines/kepler_compute.cpp | ||||
|  |  | |||
							
								
								
									
										44
									
								
								src/video_core/engines/engine_upload.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								src/video_core/engines/engine_upload.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,44 @@ | |||
| // Copyright 2019 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "video_core/engines/engine_upload.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/textures/decoders.h" | ||||
| 
 | ||||
| namespace Tegra::Engines::Upload { | ||||
| 
 | ||||
| void State::ProcessExec(const bool is_linear) { | ||||
|     write_offset = 0; | ||||
|     copy_size = regs.line_length_in * regs.line_count; | ||||
|     inner_buffer.resize(copy_size); | ||||
|     linear = is_linear; | ||||
| } | ||||
| 
 | ||||
| void State::ProcessData(const u32 data, const bool is_last_call) { | ||||
|     const u32 sub_copy_size = std::min(4U, copy_size - write_offset); | ||||
|     std::memcpy(&inner_buffer[write_offset], &data, sub_copy_size); | ||||
|     write_offset += sub_copy_size; | ||||
|     if (is_last_call) { | ||||
|         const GPUVAddr address{regs.dest.Address()}; | ||||
|         if (linear) { | ||||
|             memory_manager.WriteBlock(address, inner_buffer.data(), copy_size); | ||||
|         } else { | ||||
|             UNIMPLEMENTED_IF(regs.dest.z != 0); | ||||
|             UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||||
|             UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||||
|             UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||||
|             const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||||
|                 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||||
|             std::vector<u8> tmp_buffer(dst_size); | ||||
|             memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||||
|             Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||||
|                                           regs.dest.y, regs.dest.BlockHeight(), copy_size, | ||||
|                                           inner_buffer.data(), tmp_buffer.data()); | ||||
|             memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace Tegra::Engines::Upload
 | ||||
							
								
								
									
										74
									
								
								src/video_core/engines/engine_upload.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										74
									
								
								src/video_core/engines/engine_upload.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,74 @@ | |||
| // Copyright 2019 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstddef> | ||||
| #include <vector> | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
| class MemoryManager; | ||||
| } | ||||
| 
 | ||||
| namespace Tegra::Engines::Upload { | ||||
| 
 | ||||
| struct Data { | ||||
|     u32 line_length_in; | ||||
|     u32 line_count; | ||||
| 
 | ||||
|     struct { | ||||
|         u32 address_high; | ||||
|         u32 address_low; | ||||
|         u32 pitch; | ||||
|         union { | ||||
|             BitField<0, 4, u32> block_width; | ||||
|             BitField<4, 4, u32> block_height; | ||||
|             BitField<8, 4, u32> block_depth; | ||||
|         }; | ||||
|         u32 width; | ||||
|         u32 height; | ||||
|         u32 depth; | ||||
|         u32 z; | ||||
|         u32 x; | ||||
|         u32 y; | ||||
| 
 | ||||
|         GPUVAddr Address() const { | ||||
|             return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | address_low); | ||||
|         } | ||||
| 
 | ||||
|         u32 BlockWidth() const { | ||||
|             return 1U << block_width.Value(); | ||||
|         } | ||||
| 
 | ||||
|         u32 BlockHeight() const { | ||||
|             return 1U << block_height.Value(); | ||||
|         } | ||||
| 
 | ||||
|         u32 BlockDepth() const { | ||||
|             return 1U << block_depth.Value(); | ||||
|         } | ||||
|     } dest; | ||||
| }; | ||||
| 
 | ||||
| class State { | ||||
| public: | ||||
|     State(MemoryManager& memory_manager, Data& regs) : memory_manager(memory_manager), regs(regs) {} | ||||
|     ~State() = default; | ||||
| 
 | ||||
|     void ProcessExec(const bool is_linear); | ||||
|     void ProcessData(const u32 data, const bool is_last_call); | ||||
| 
 | ||||
| private: | ||||
|     u32 write_offset = 0; | ||||
|     u32 copy_size = 0; | ||||
|     std::vector<u8> inner_buffer; | ||||
|     bool linear; | ||||
|     Data& regs; | ||||
|     MemoryManager& memory_manager; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Tegra::Engines::Upload
 | ||||
|  | @ -14,9 +14,8 @@ | |||
| 
 | ||||
| namespace Tegra::Engines { | ||||
| 
 | ||||
| KeplerMemory::KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||
|                            MemoryManager& memory_manager) | ||||
|     : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager} {} | ||||
| KeplerMemory::KeplerMemory(Core::System& system, MemoryManager& memory_manager) | ||||
|     : system{system}, memory_manager{memory_manager}, upload_state{memory_manager, regs.upload} {} | ||||
| 
 | ||||
| KeplerMemory::~KeplerMemory() = default; | ||||
| 
 | ||||
|  | @ -28,46 +27,18 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) { | |||
| 
 | ||||
|     switch (method_call.method) { | ||||
|     case KEPLERMEMORY_REG_INDEX(exec): { | ||||
|         ProcessExec(); | ||||
|         upload_state.ProcessExec(regs.exec.linear != 0); | ||||
|         break; | ||||
|     } | ||||
|     case KEPLERMEMORY_REG_INDEX(data): { | ||||
|         ProcessData(method_call.argument, method_call.IsLastCall()); | ||||
|         bool is_last_call = method_call.IsLastCall(); | ||||
|         upload_state.ProcessData(method_call.argument, is_last_call); | ||||
|         if (is_last_call) { | ||||
|             system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void KeplerMemory::ProcessExec() { | ||||
|     state.write_offset = 0; | ||||
|     state.copy_size = regs.line_length_in * regs.line_count; | ||||
|     state.inner_buffer.resize(state.copy_size); | ||||
| } | ||||
| 
 | ||||
| void KeplerMemory::ProcessData(u32 data, bool is_last_call) { | ||||
|     const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset); | ||||
|     std::memcpy(&state.inner_buffer[state.write_offset], ®s.data, sub_copy_size); | ||||
|     state.write_offset += sub_copy_size; | ||||
|     if (is_last_call) { | ||||
|         const GPUVAddr address{regs.dest.Address()}; | ||||
|         if (regs.exec.linear != 0) { | ||||
|             memory_manager.WriteBlock(address, state.inner_buffer.data(), state.copy_size); | ||||
|         } else { | ||||
|             UNIMPLEMENTED_IF(regs.dest.z != 0); | ||||
|             UNIMPLEMENTED_IF(regs.dest.depth != 1); | ||||
|             UNIMPLEMENTED_IF(regs.dest.BlockWidth() != 1); | ||||
|             UNIMPLEMENTED_IF(regs.dest.BlockDepth() != 1); | ||||
|             const std::size_t dst_size = Tegra::Texture::CalculateSize( | ||||
|                 true, 1, regs.dest.width, regs.dest.height, 1, regs.dest.BlockHeight(), 1); | ||||
|             std::vector<u8> tmp_buffer(dst_size); | ||||
|             memory_manager.ReadBlock(address, tmp_buffer.data(), dst_size); | ||||
|             Tegra::Texture::SwizzleKepler(regs.dest.width, regs.dest.height, regs.dest.x, | ||||
|                                           regs.dest.y, regs.dest.BlockHeight(), state.copy_size, | ||||
|                                           state.inner_buffer.data(), tmp_buffer.data()); | ||||
|             memory_manager.WriteBlock(address, tmp_buffer.data(), dst_size); | ||||
|         } | ||||
|         system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| } // namespace Tegra::Engines
 | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ | |||
| #include "common/bit_field.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/engines/engine_upload.h" | ||||
| #include "video_core/gpu.h" | ||||
| 
 | ||||
| namespace Core { | ||||
|  | @ -20,10 +21,6 @@ namespace Tegra { | |||
| class MemoryManager; | ||||
| } | ||||
| 
 | ||||
| namespace VideoCore { | ||||
| class RasterizerInterface; | ||||
| } | ||||
| 
 | ||||
| namespace Tegra::Engines { | ||||
| 
 | ||||
| #define KEPLERMEMORY_REG_INDEX(field_name)                                                         \ | ||||
|  | @ -31,8 +28,7 @@ namespace Tegra::Engines { | |||
| 
 | ||||
| class KeplerMemory final { | ||||
| public: | ||||
|     KeplerMemory(Core::System& system, VideoCore::RasterizerInterface& rasterizer, | ||||
|                  MemoryManager& memory_manager); | ||||
|     KeplerMemory(Core::System& system, MemoryManager& memory_manager); | ||||
|     ~KeplerMemory(); | ||||
| 
 | ||||
|     /// Write the value to the register identified by method.
 | ||||
|  | @ -45,42 +41,7 @@ public: | |||
|             struct { | ||||
|                 INSERT_PADDING_WORDS(0x60); | ||||
| 
 | ||||
|                 u32 line_length_in; | ||||
|                 u32 line_count; | ||||
| 
 | ||||
|                 struct { | ||||
|                     u32 address_high; | ||||
|                     u32 address_low; | ||||
|                     u32 pitch; | ||||
|                     union { | ||||
|                         BitField<0, 4, u32> block_width; | ||||
|                         BitField<4, 4, u32> block_height; | ||||
|                         BitField<8, 4, u32> block_depth; | ||||
|                     }; | ||||
|                     u32 width; | ||||
|                     u32 height; | ||||
|                     u32 depth; | ||||
|                     u32 z; | ||||
|                     u32 x; | ||||
|                     u32 y; | ||||
| 
 | ||||
|                     GPUVAddr Address() const { | ||||
|                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) | | ||||
|                                                      address_low); | ||||
|                     } | ||||
| 
 | ||||
|                     u32 BlockWidth() const { | ||||
|                         return 1U << block_width.Value(); | ||||
|                     } | ||||
| 
 | ||||
|                     u32 BlockHeight() const { | ||||
|                         return 1U << block_height.Value(); | ||||
|                     } | ||||
| 
 | ||||
|                     u32 BlockDepth() const { | ||||
|                         return 1U << block_depth.Value(); | ||||
|                     } | ||||
|                 } dest; | ||||
|                 Upload::Data upload; | ||||
| 
 | ||||
|                 struct { | ||||
|                     union { | ||||
|  | @ -96,28 +57,17 @@ public: | |||
|         }; | ||||
|     } regs{}; | ||||
| 
 | ||||
|     struct { | ||||
|         u32 write_offset = 0; | ||||
|         u32 copy_size = 0; | ||||
|         std::vector<u8> inner_buffer; | ||||
|     } state{}; | ||||
| 
 | ||||
| private: | ||||
|     Core::System& system; | ||||
|     VideoCore::RasterizerInterface& rasterizer; | ||||
|     MemoryManager& memory_manager; | ||||
| 
 | ||||
|     void ProcessExec(); | ||||
|     void ProcessData(u32 data, bool is_last_call); | ||||
|     Upload::State upload_state; | ||||
| }; | ||||
| 
 | ||||
| #define ASSERT_REG_POSITION(field_name, position)                                                  \ | ||||
|     static_assert(offsetof(KeplerMemory::Regs, field_name) == position * 4,                        \ | ||||
|                   "Field " #field_name " has invalid position") | ||||
| 
 | ||||
| ASSERT_REG_POSITION(line_length_in, 0x60); | ||||
| ASSERT_REG_POSITION(line_count, 0x61); | ||||
| ASSERT_REG_POSITION(dest, 0x62); | ||||
| ASSERT_REG_POSITION(upload, 0x60); | ||||
| ASSERT_REG_POSITION(exec, 0x6C); | ||||
| ASSERT_REG_POSITION(data, 0x6D); | ||||
| #undef ASSERT_REG_POSITION | ||||
|  |  | |||
|  | @ -37,7 +37,7 @@ GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer) : renderer{ren | |||
|     fermi_2d = std::make_unique<Engines::Fermi2D>(rasterizer, *memory_manager); | ||||
|     kepler_compute = std::make_unique<Engines::KeplerCompute>(*memory_manager); | ||||
|     maxwell_dma = std::make_unique<Engines::MaxwellDMA>(system, rasterizer, *memory_manager); | ||||
|     kepler_memory = std::make_unique<Engines::KeplerMemory>(system, rasterizer, *memory_manager); | ||||
|     kepler_memory = std::make_unique<Engines::KeplerMemory>(system, *memory_manager); | ||||
| } | ||||
| 
 | ||||
| GPU::~GPU() = default; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow