forked from eden-emu/eden
		
	Optimize the vertex loader, nearly doubling its speed.
This commit is contained in:
		
							parent
							
								
									2403e86cbb
								
							
						
					
					
						commit
						251f29dd7f
					
				
					 2 changed files with 54 additions and 32 deletions
				
			
		|  | @ -46,13 +46,11 @@ void VertexLoader::Setup(const Pica::Regs& regs) { | ||||||
| 
 | 
 | ||||||
|             u32 attribute_index = loader_config.GetComponent(component); |             u32 attribute_index = loader_config.GetComponent(component); | ||||||
|             if (attribute_index < 12) { |             if (attribute_index < 12) { | ||||||
|                 int element_size = attribute_config.GetElementSizeInBytes(attribute_index); |                 offset = Common::AlignUp(offset, attribute_config.GetElementSizeInBytes(attribute_index)); | ||||||
|                 offset = Common::AlignUp(offset, element_size); |  | ||||||
|                 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; |                 vertex_attribute_sources[attribute_index] = loader_config.data_offset + offset; | ||||||
|                 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); |                 vertex_attribute_strides[attribute_index] = static_cast<u32>(loader_config.byte_count); | ||||||
|                 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); |                 vertex_attribute_formats[attribute_index] = attribute_config.GetFormat(attribute_index); | ||||||
|                 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); |                 vertex_attribute_elements[attribute_index] = attribute_config.GetNumElements(attribute_index); | ||||||
|                 vertex_attribute_element_size[attribute_index] = element_size; |  | ||||||
|                 offset += attribute_config.GetStride(attribute_index); |                 offset += attribute_config.GetStride(attribute_index); | ||||||
|             } else if (attribute_index < 16) { |             } else if (attribute_index < 16) { | ||||||
|                 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
 |                 // Attribute ids 12, 13, 14 and 15 signify 4, 8, 12 and 16-byte paddings, respectively
 | ||||||
|  | @ -68,38 +66,63 @@ void VertexLoader::Setup(const Pica::Regs& regs) { | ||||||
| void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { | void VertexLoader::LoadVertex(u32 base_address, int index, int vertex, Shader::InputVertex& input, MemoryAccesses& memory_accesses) { | ||||||
|     for (int i = 0; i < num_total_attributes; ++i) { |     for (int i = 0; i < num_total_attributes; ++i) { | ||||||
|         if (vertex_attribute_elements[i] != 0) { |         if (vertex_attribute_elements[i] != 0) { | ||||||
|  |             // Load per-vertex data from the loader arrays
 | ||||||
|  |             u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex; | ||||||
|  | 
 | ||||||
|  |             if (g_debug_context && Pica::g_debug_context->recorder) { | ||||||
|  |                 memory_accesses.AddAccess(source_addr, | ||||||
|  |                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 | ||||||
|  |                     : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|  |             switch (vertex_attribute_formats[i]) { | ||||||
|  |             case Regs::VertexAttributeFormat::BYTE: | ||||||
|  |             { | ||||||
|  |                 const s8* srcdata = reinterpret_cast<const s8*>(Memory::GetPhysicalPointer(source_addr)); | ||||||
|  |                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|  |                     input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             case Regs::VertexAttributeFormat::UBYTE: | ||||||
|  |             { | ||||||
|  |                 const u8* srcdata = reinterpret_cast<const u8*>(Memory::GetPhysicalPointer(source_addr)); | ||||||
|  |                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|  |                     input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             case Regs::VertexAttributeFormat::SHORT: | ||||||
|  |             { | ||||||
|  |                 const s16* srcdata = reinterpret_cast<const s16*>(Memory::GetPhysicalPointer(source_addr)); | ||||||
|  |                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|  |                     input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             case Regs::VertexAttributeFormat::FLOAT: | ||||||
|  |             { | ||||||
|  |                 const float* srcdata = reinterpret_cast<const float*>(Memory::GetPhysicalPointer(source_addr)); | ||||||
|  |                 for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { | ||||||
|  |                     input.attr[i][comp] = float24::FromFloat32(srcdata[comp]); | ||||||
|  |                 } | ||||||
|  |                 break; | ||||||
|  |             } | ||||||
|  |             } | ||||||
|  | 
 | ||||||
|             // Default attribute values set if array elements have < 4 components. This
 |             // Default attribute values set if array elements have < 4 components. This
 | ||||||
|             // is *not* carried over from the default attribute settings even if they're
 |             // is *not* carried over from the default attribute settings even if they're
 | ||||||
|             // enabled for this attribute.
 |             // enabled for this attribute.
 | ||||||
|             static const float24 zero = float24::FromFloat32(0.0f); |             for (unsigned int comp = vertex_attribute_elements[i]; comp < 4; ++comp) { | ||||||
|             static const float24 one = float24::FromFloat32(1.0f); |                 input.attr[i][comp] = comp == 3 ? float24::FromFloat32(1.0f) : float24::FromFloat32(0.0f); | ||||||
|             input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one); |  | ||||||
| 
 |  | ||||||
|             // Load per-vertex data from the loader arrays
 |  | ||||||
|             for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { |  | ||||||
|                 u32 source_addr = base_address + vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; |  | ||||||
|                 const u8* srcdata = Memory::GetPhysicalPointer(source_addr); |  | ||||||
| 
 |  | ||||||
|                 if (g_debug_context && Pica::g_debug_context->recorder) { |  | ||||||
|                     memory_accesses.AddAccess(source_addr, |  | ||||||
|                         (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 |  | ||||||
|                         : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); |  | ||||||
|                 } |  | ||||||
| 
 |  | ||||||
|                 const float srcval = |  | ||||||
|                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *reinterpret_cast<const s8*>(srcdata) : |  | ||||||
|                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *reinterpret_cast<const u8*>(srcdata) : |  | ||||||
|                     (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *reinterpret_cast<const s16*>(srcdata) : |  | ||||||
|                     *reinterpret_cast<const float*>(srcdata); |  | ||||||
| 
 |  | ||||||
|                 input.attr[i][comp] = float24::FromFloat32(srcval); |  | ||||||
|                 LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f", |  | ||||||
|                     comp, i, vertex, index, |  | ||||||
|                     base_address, |  | ||||||
|                     vertex_attribute_sources[i], |  | ||||||
|                     vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i], |  | ||||||
|                     input.attr[i][comp].ToFloat32()); |  | ||||||
|             } |             } | ||||||
|  | 
 | ||||||
|  |             LOG_TRACE(HW_GPU, "Loaded %d components of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08x + 0x%04x: %f %f %f %f", | ||||||
|  |                 vertex_attribute_elements[i], i, vertex, index, | ||||||
|  |                 base_address, | ||||||
|  |                 vertex_attribute_sources[i], | ||||||
|  |                 vertex_attribute_strides[i] * vertex, | ||||||
|  |                 input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32()); | ||||||
|         } else if (vertex_attribute_is_default[i]) { |         } else if (vertex_attribute_is_default[i]) { | ||||||
|             // Load the default attribute if we're configured to do so
 |             // Load the default attribute if we're configured to do so
 | ||||||
|             input.attr[i] = g_state.vs.default_attributes[i]; |             input.attr[i] = g_state.vs.default_attributes[i]; | ||||||
|  |  | ||||||
|  | @ -47,7 +47,6 @@ private: | ||||||
|     u32 vertex_attribute_strides[16] = {}; |     u32 vertex_attribute_strides[16] = {}; | ||||||
|     Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; |     Regs::VertexAttributeFormat vertex_attribute_formats[16] = {}; | ||||||
|     u32 vertex_attribute_elements[16] = {}; |     u32 vertex_attribute_elements[16] = {}; | ||||||
|     u32 vertex_attribute_element_size[16] = {}; |  | ||||||
|     bool vertex_attribute_is_default[16]; |     bool vertex_attribute_is_default[16]; | ||||||
|     int num_total_attributes; |     int num_total_attributes; | ||||||
| }; | }; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Henrik Rydgard
						Henrik Rydgard