Refactor: Extract VertexLoader from command_processor.cpp.
Preparation for a similar concept to Dolphin or PPSSPP. These can be JIT-ed and cached.
This commit is contained in:
		
							parent
							
								
									03f90e7dfd
								
							
						
					
					
						commit
						c1ee661492
					
				
					 5 changed files with 185 additions and 125 deletions
				
			
		|  | @ -16,6 +16,7 @@ set(SRCS | |||
|             shader/shader_interpreter.cpp | ||||
|             swrasterizer.cpp | ||||
|             utils.cpp | ||||
|             vertex_loader.cpp | ||||
|             video_core.cpp | ||||
|             ) | ||||
| 
 | ||||
|  | @ -43,6 +44,7 @@ set(HEADERS | |||
|             shader/shader_interpreter.h | ||||
|             swrasterizer.h | ||||
|             utils.h | ||||
|             vertex_loader.h | ||||
|             video_core.h | ||||
|             ) | ||||
| 
 | ||||
|  |  | |||
|  | @ -22,6 +22,7 @@ | |||
| #include "video_core/video_core.h" | ||||
| #include "video_core/debug_utils/debug_utils.h" | ||||
| #include "video_core/shader/shader_interpreter.h" | ||||
| #include "video_core/vertex_loader.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| 
 | ||||
|  | @ -192,62 +193,19 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
| #if PICA_LOG_TEV | ||||
|             DebugUtils::DumpTevStageConfig(regs.GetTevStages()); | ||||
| #endif | ||||
| 
 | ||||
|             if (g_debug_context) | ||||
|                 g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); | ||||
| 
 | ||||
|             const auto& attribute_config = regs.vertex_attributes; | ||||
|             const u32 base_address = attribute_config.GetPhysicalBaseAddress(); | ||||
|             int num_total_attributes = attribute_config.GetNumTotalAttributes(); | ||||
| 
 | ||||
|             // Information about internal vertex attributes
 | ||||
|             u32 vertex_attribute_sources[16]; | ||||
|             boost::fill(vertex_attribute_sources, 0xdeadbeef); | ||||
|             u32 vertex_attribute_strides[16] = {}; | ||||
|             Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; | ||||
| 
 | ||||
|             u32 vertex_attribute_elements[16] = {}; | ||||
|             u32 vertex_attribute_element_size[16] = {}; | ||||
|             bool vertex_attribute_default[16] = {}; | ||||
|             // Setup attribute data from loaders
 | ||||
|             for (int loader = 0; loader < 12; ++loader) { | ||||
|                 const auto& loader_config = attribute_config.attribute_loaders[loader]; | ||||
| 
 | ||||
|                 u32 offset = 0; | ||||
| 
 | ||||
|                 // TODO: What happens if a loader overwrites a previous one's data?
 | ||||
|                 for (unsigned component = 0; component < loader_config.component_count; ++component) { | ||||
|                     if (component >= 12) { | ||||
|                         LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); | ||||
|                         continue; | ||||
|                     } | ||||
| 
 | ||||
|                     u32 attribute_index = loader_config.GetComponent(component); | ||||
|                     if (attribute_index < 12) { | ||||
|                         int element_size = attribute_config.GetElementSizeInBytes(attribute_index); | ||||
|                         offset = Common::AlignUp(offset, element_size); | ||||
|                         vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset; | ||||
|                         vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); | ||||
|                         vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); | ||||
|                         vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | ||||
|                         vertex_attribute_element_size[attribute_index] = element_size; | ||||
|                         vertex_attribute_default[attribute_index] = attribute_config.IsDefaultAttribute(attribute_index); | ||||
|                         offset += attribute_config.GetStride(attribute_index); | ||||
|                     } else if (attribute_index < 16) { | ||||
|                         // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
 | ||||
|                         offset = Common::AlignUp(offset, 4); | ||||
|                         offset += (attribute_index - 11) * 4; | ||||
|                     } else { | ||||
|                         UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
 | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             // Processes information about internal vertex attributes to figure out how a vertex is loaded.
 | ||||
|             // Later, these can be compiled and cached.
 | ||||
|             VertexLoader loader; | ||||
|             loader.Setup(regs); | ||||
| 
 | ||||
|             // Load vertices
 | ||||
|             bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); | ||||
| 
 | ||||
|             const auto& index_info = regs.index_array; | ||||
|             const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); | ||||
|             const u8* index_address_8 = Memory::GetPhysicalPointer(loader.GetPhysicalBaseAddress() + index_info.offset); | ||||
|             const u16* index_address_16 = reinterpret_cast<const u16*>(index_address_8); | ||||
|             bool index_u16 = index_info.format != 0; | ||||
| 
 | ||||
|  | @ -265,32 +223,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|                 } | ||||
|             } | ||||
| 
 | ||||
|             class { | ||||
|                 /// Combine overlapping and close ranges
 | ||||
|                 void SimplifyRanges() { | ||||
|                     for (auto it = ranges.begin(); it != ranges.end(); ++it) { | ||||
|                         // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
 | ||||
|                         auto it2 = std::next(it); | ||||
|                         while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { | ||||
|                             it->second = std::max(it->second, it2->first + it2->second - it->first); | ||||
|                             it2 = ranges.erase(it2); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
| 
 | ||||
|             public: | ||||
|                 /// Record a particular memory access in the list
 | ||||
|                 void AddAccess(u32 paddr, u32 size) { | ||||
|                     // Create new range or extend existing one
 | ||||
|                     ranges[paddr] = std::max(ranges[paddr], size); | ||||
| 
 | ||||
|                     // Simplify ranges...
 | ||||
|                     SimplifyRanges(); | ||||
|                 } | ||||
| 
 | ||||
|                 /// Map of accessed ranges (mapping start address to range size)
 | ||||
|                 std::map<u32, u32> ranges; | ||||
|             } memory_accesses; | ||||
|             MemoryAccesses memory_accesses; | ||||
| 
 | ||||
|             // Simple circular-replacement vertex cache
 | ||||
|             // The size has been tuned for optimal balance between hit-rate and the cost of lookup
 | ||||
|  | @ -319,7 +252,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|                 if (is_indexed) { | ||||
|                     if (g_debug_context && Pica::g_debug_context->recorder) { | ||||
|                         int size = index_u16 ? 2 : 1; | ||||
|                         memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); | ||||
|                         memory_accesses.AddAccess(loader.GetPhysicalBaseAddress() + index_info.offset + size * index, size); | ||||
|                     } | ||||
| 
 | ||||
|                     for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { | ||||
|  | @ -334,60 +267,13 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { | |||
|                 if (!vertex_cache_hit) { | ||||
|                     // Initialize data for the current vertex
 | ||||
|                     Shader::InputVertex input; | ||||
| 
 | ||||
|                     for (int i = 0; i < num_total_attributes; ++i) { | ||||
|                         if (vertex_attribute_elements[i] != 0) { | ||||
|                             // Default attribute values set if array elements have < 4 components. This
 | ||||
|                             // is *not* carried over from the default attribute settings even if they're
 | ||||
|                             // enabled for this attribute.
 | ||||
|                             static const float24 zero = float24::FromFloat32(0.0f); | ||||
|                             static const float24 one = float24::FromFloat32(1.0f); | ||||
|                             input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); | ||||
| 
 | ||||
|                             // Load per-vertex data from the loader arrays
 | ||||
|                             for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||
|                                 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; | ||||
|                                 const u8* srcdata = Memory::GetPhysicalPointer(source_addr); | ||||
| 
 | ||||
|                                 if (g_debug_context && Pica::g_debug_context->recorder) { | ||||
|                                     memory_accesses.AddAccess(source_addr, | ||||
|                                         (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | ||||
|                                         : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); | ||||
|                                 } | ||||
| 
 | ||||
|                                 const float srcval = | ||||
|                                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE)  ? *reinterpret_cast<const s8*>(srcdata) : | ||||
|                                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) : | ||||
|                                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) : | ||||
|                                     *reinterpret_cast<const float*>(srcdata); | ||||
| 
 | ||||
|                                 input.attr[i][comp] = float24::FromFloat32(srcval); | ||||
|                                 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", | ||||
|                                     comp, i, vertex, index, | ||||
|                                     base_address, | ||||
|                                     vertex_attribute_sources[i] - base_address, | ||||
|                                     vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], | ||||
|                                     input.attr[i][comp].ToFloat32()); | ||||
|                             } | ||||
|                         } else if (vertex_attribute_default[i]) { | ||||
|                             // Load the default attribute if we're configured to do so
 | ||||
|                             input.attr[i] = g_state.vs.default_attributes[i]; | ||||
|                             LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | ||||
|                                       i, vertex, index, | ||||
|                                       input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | ||||
|                                       input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||||
|                         } else { | ||||
|                             // TODO(yuriks): In this case, no data gets loaded and the vertex
 | ||||
|                             // remains with the last value it had. This isn't currently maintained
 | ||||
|                             // as global state, however, and so won't work in Citra yet.
 | ||||
|                         } | ||||
|                     } | ||||
|                     loader.LoadVertex(index, vertex, input, memory_accesses); | ||||
| 
 | ||||
|                     if (g_debug_context) | ||||
|                         g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); | ||||
| 
 | ||||
|                     // Send to vertex shader
 | ||||
|                     output = Shader::Run(shader_unit, input, num_total_attributes); | ||||
|                     output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); | ||||
| 
 | ||||
|                     if (is_indexed) { | ||||
|                         vertex_cache[vertex_cache_pos] = output; | ||||
|  |  | |||
|  | @ -25,7 +25,7 @@ namespace Pica { | |||
| namespace Shader { | ||||
| 
 | ||||
| struct InputVertex { | ||||
|     Math::Vec4<float24> attr[16]; | ||||
|     alignas(16) Math::Vec4<float24> attr[16]; | ||||
| }; | ||||
| 
 | ||||
| struct OutputVertex { | ||||
|  |  | |||
							
								
								
									
										119
									
								
								src/video_core/vertex_loader.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								src/video_core/vertex_loader.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,119 @@ | |||
| #include <cmath> | ||||
| #include <string> | ||||
| 
 | ||||
| #include "boost/range/algorithm/fill.hpp" | ||||
| 
 | ||||
| #include "common/assert.h" | ||||
| #include "common/alignment.h" | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_funcs.h" | ||||
| #include "common/common_types.h" | ||||
| #include "common/logging/log.h" | ||||
| 
 | ||||
| #include "core/memory.h" | ||||
| 
 | ||||
| #include "debug_utils/debug_utils.h" | ||||
| 
 | ||||
| #include "pica.h" | ||||
| #include "pica_state.h" | ||||
| #include "pica_types.h" | ||||
| #include "vertex_loader.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| 
 | ||||
| void VertexLoader::Setup(const Pica::Regs ®s) { | ||||
|     const auto& attribute_config = regs.vertex_attributes; | ||||
|     base_address = attribute_config.GetPhysicalBaseAddress(); | ||||
|     num_total_attributes = attribute_config.GetNumTotalAttributes(); | ||||
| 
 | ||||
|     boost::fill(vertex_attribute_sources, 0xdeadbeef); | ||||
| 
 | ||||
|     for (int i = 0; i < 16; i++) { | ||||
|         vertex_attribute_is_default[i] = attribute_config.IsDefaultAttribute(i); | ||||
|     } | ||||
| 
 | ||||
|     // Setup attribute data from loaders
 | ||||
|     for (int loader = 0; loader < 12; ++loader) { | ||||
|         const auto& loader_config = attribute_config.attribute_loaders[loader]; | ||||
| 
 | ||||
|         u32 offset = 0; | ||||
| 
 | ||||
|         // TODO: What happens if a loader overwrites a previous one's data?
 | ||||
|         for (unsigned component = 0; component < loader_config.component_count; ++component) { | ||||
|             if (component >= 12) { | ||||
|                 LOG_ERROR(HW_GPU, "Overflow in the vertex attribute loader %u trying to load component %u", loader, component); | ||||
|                 continue; | ||||
|             } | ||||
| 
 | ||||
|             u32 attribute_index = loader_config.GetComponent(component); | ||||
|             if (attribute_index < 12) { | ||||
|                 int element_size = attribute_config.GetElementSizeInBytes(attribute_index); | ||||
|                 offset = Common::AlignUp(offset, element_size); | ||||
|                 vertex_attribute_sources[attribute_index] = base_address + loader_config.data_offset + offset; | ||||
|                 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); | ||||
|                 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); | ||||
|                 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | ||||
|                 vertex_attribute_element_size[attribute_index] = element_size; | ||||
|                 offset += attribute_config.GetStride(attribute_index); | ||||
|             } else if (attribute_index < 16) { | ||||
|                 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
 | ||||
|                 offset = Common::AlignUp(offset, 4); | ||||
|                 offset += (attribute_index - 11) * 4; | ||||
|             } else { | ||||
|                 UNREACHABLE(); // This is truly unreachable due to the number of bits for each component
 | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void VertexLoader::LoadVertex(int index, int vertex, Shader::InputVertex &input, MemoryAccesses &memory_accesses) { | ||||
|     for (int i = 0; i < num_total_attributes; ++i) { | ||||
|         if (vertex_attribute_elements[i] != 0) { | ||||
|             // Default attribute values set if array elements have < 4 components. This
 | ||||
|             // is *not* carried over from the default attribute settings even if they're
 | ||||
|             // enabled for this attribute.
 | ||||
|             static const float24 zero = float24::FromFloat32(0.0f); | ||||
|             static const float24 one = float24::FromFloat32(1.0f); | ||||
|             input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); | ||||
| 
 | ||||
|             // Load per-vertex data from the loader arrays
 | ||||
|             for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||
|                 u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; | ||||
|                 const u8* srcdata = Memory::GetPhysicalPointer(source_addr); | ||||
| 
 | ||||
|                 if (g_debug_context && Pica::g_debug_context->recorder) { | ||||
|                     memory_accesses.AddAccess(source_addr, | ||||
|                         (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | ||||
|                         : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); | ||||
|                 } | ||||
| 
 | ||||
|                 const float srcval = | ||||
|                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) : | ||||
|                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) : | ||||
|                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) : | ||||
|                     *reinterpret_cast<const float*>(srcdata); | ||||
| 
 | ||||
|                 input.attr[i][comp] = float24::FromFloat32(srcval); | ||||
|                 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", | ||||
|                     comp, i, vertex, index, | ||||
|                     base_address, | ||||
|                     vertex_attribute_sources[i] - base_address, | ||||
|                     vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], | ||||
|                     input.attr[i][comp].ToFloat32()); | ||||
|             } | ||||
|         } else if (vertex_attribute_is_default[i]) { | ||||
|             // Load the default attribute if we're configured to do so
 | ||||
|             input.attr[i] = g_state.vs.default_attributes[i]; | ||||
|             LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", | ||||
|                 i, vertex, index, | ||||
|                 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), | ||||
|                 input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||||
|         } else { | ||||
|             // TODO(yuriks): In this case, no data gets loaded and the vertex
 | ||||
|             // remains with the last value it had. This isn't currently maintained
 | ||||
|             // as global state, however, and so won't work in Citra yet.
 | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| }  // namespace Pica
 | ||||
							
								
								
									
										53
									
								
								src/video_core/vertex_loader.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								src/video_core/vertex_loader.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,53 @@ | |||
| #pragma once | ||||
| 
 | ||||
| #include "video_core/pica.h" | ||||
| #include "video_core/shader/shader.h" | ||||
| 
 | ||||
| namespace Pica { | ||||
| 
 | ||||
| class MemoryAccesses { | ||||
|     /// Combine overlapping and close ranges
 | ||||
|     void SimplifyRanges() { | ||||
|         for (auto it = ranges.begin(); it != ranges.end(); ++it) { | ||||
|             // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
 | ||||
|             auto it2 = std::next(it); | ||||
|             while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { | ||||
|                 it->second = std::max(it->second, it2->first + it2->second - it->first); | ||||
|                 it2 = ranges.erase(it2); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| public: | ||||
|     /// Record a particular memory access in the list
 | ||||
|     void AddAccess(u32 paddr, u32 size) { | ||||
|         // Create new range or extend existing one
 | ||||
|         ranges[paddr] = std::max(ranges[paddr], size); | ||||
| 
 | ||||
|         // Simplify ranges...
 | ||||
|         SimplifyRanges(); | ||||
|     } | ||||
| 
 | ||||
|     /// Map of accessed ranges (mapping start address to range size)
 | ||||
|     std::map<u32, u32> ranges; | ||||
| }; | ||||
| 
 | ||||
| class VertexLoader { | ||||
| public: | ||||
|     void Setup(const Pica::Regs ®s); | ||||
|     void LoadVertex(int index, int vertex, Shader::InputVertex &input, MemoryAccesses &memory_accesses); | ||||
| 
 | ||||
|     u32 GetPhysicalBaseAddress() const { return base_address; } | ||||
|     int GetNumTotalAttributes() const { return num_total_attributes; } | ||||
| private: | ||||
|     u32 vertex_attribute_sources[16]; | ||||
|     u32 vertex_attribute_strides[16] = {}; | ||||
|     Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; | ||||
|     u32 vertex_attribute_elements[16] = {}; | ||||
|     u32 vertex_attribute_element_size[16] = {}; | ||||
|     bool vertex_attribute_is_default[16]; | ||||
|     u32 base_address; | ||||
|     int num_total_attributes; | ||||
| }; | ||||
| 
 | ||||
| }  // namespace Pica
 | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Henrik Rydgard
						Henrik Rydgard