forked from eden-emu/eden
		
	vk_compute_pass: Implement indexed quads
Implement indexed quads (GL_QUADS used with glDrawElements*) with a compute pass conversion. The compute shader converts from uint8/uint16/uint32 indices to uint32. The format is passed through push constants to avoid having different variants of the same shader. - Used by Fast RMX - Used by Xenoblade Chronicles 2 (it still has graphical due to synchronization issues on Vulkan)
This commit is contained in:
		
							parent
							
								
									7a9b83b658
								
							
						
					
					
						commit
						14ab5c4b65
					
				
					 5 changed files with 280 additions and 12 deletions
				
			
		
							
								
								
									
										50
									
								
								src/video_core/renderer_vulkan/shaders/quad_indexed.comp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								src/video_core/renderer_vulkan/shaders/quad_indexed.comp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,50 @@ | |||
| // Copyright 2020 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
| 
 | ||||
| /* | ||||
|  * Build instructions: | ||||
|  * $ glslangValidator -V quad_indexed.comp -o output.spv | ||||
|  * $ spirv-opt -O --strip-debug output.spv -o optimized.spv | ||||
|  * $ xxd -i optimized.spv | ||||
|  * | ||||
|  * Then copy that bytecode to the C++ file | ||||
|  */ | ||||
| 
 | ||||
| #version 460 core | ||||
| 
 | ||||
| layout (local_size_x = 1024) in; | ||||
| 
 | ||||
| layout (std430, set = 0, binding = 0) readonly buffer InputBuffer { | ||||
|     uint input_indexes[]; | ||||
| }; | ||||
| 
 | ||||
| layout (std430, set = 0, binding = 1) writeonly buffer OutputBuffer { | ||||
|     uint output_indexes[]; | ||||
| }; | ||||
| 
 | ||||
| layout (push_constant) uniform PushConstants { | ||||
|     uint base_vertex; | ||||
|     int index_shift; // 0: uint8, 1: uint16, 2: uint32 | ||||
| }; | ||||
| 
 | ||||
| void main() { | ||||
|     int primitive = int(gl_GlobalInvocationID.x); | ||||
|     if (primitive * 6 >= output_indexes.length()) { | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     int index_size = 8 << index_shift; | ||||
|     int flipped_shift = 2 - index_shift; | ||||
|     int mask = (1 << flipped_shift) - 1; | ||||
| 
 | ||||
|     const int quad_swizzle[6] = int[](0, 1, 2, 0, 2, 3); | ||||
|     for (uint vertex = 0; vertex < 6; ++vertex) { | ||||
|         int offset = primitive * 4 + quad_swizzle[vertex]; | ||||
|         int int_offset = offset >> flipped_shift; | ||||
|         int bit_offset = (offset & mask) * index_size; | ||||
|         uint packed_input = input_indexes[int_offset]; | ||||
|         uint index = bitfieldExtract(packed_input, bit_offset, index_size); | ||||
|         output_indexes[primitive * 6 + vertex] = index + base_vertex; | ||||
|     } | ||||
| } | ||||
|  | @ -135,11 +135,11 @@ VkDescriptorUpdateTemplateEntryKHR BuildQuadArrayPassDescriptorUpdateTemplateEnt | |||
|     return entry; | ||||
| } | ||||
| 
 | ||||
| VkPushConstantRange BuildQuadArrayPassPushConstantRange() { | ||||
| VkPushConstantRange BuildComputePushConstantRange(std::size_t size) { | ||||
|     VkPushConstantRange range; | ||||
|     range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; | ||||
|     range.offset = 0; | ||||
|     range.size = sizeof(u32); | ||||
|     range.size = static_cast<u32>(size); | ||||
|     return range; | ||||
| } | ||||
| 
 | ||||
|  | @ -220,7 +220,130 @@ constexpr u8 uint8_pass[] = { | |||
|     0xf9, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||||
|     0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | ||||
| 
 | ||||
| std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings() { | ||||
| // Quad indexed SPIR-V module. Generated from the "shaders/" directory.
 | ||||
| constexpr u8 QUAD_INDEXED_SPV[] = { | ||||
|     0x03, 0x02, 0x23, 0x07, 0x00, 0x00, 0x01, 0x00, 0x07, 0x00, 0x08, 0x00, 0x7c, 0x00, 0x00, 0x00, | ||||
|     0x00, 0x00, 0x00, 0x00, 0x11, 0x00, 0x02, 0x00, 0x01, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x06, 0x00, | ||||
|     0x01, 0x00, 0x00, 0x00, 0x47, 0x4c, 0x53, 0x4c, 0x2e, 0x73, 0x74, 0x64, 0x2e, 0x34, 0x35, 0x30, | ||||
|     0x00, 0x00, 0x00, 0x00, 0x0e, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||||
|     0x0f, 0x00, 0x06, 0x00, 0x05, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x6d, 0x61, 0x69, 0x6e, | ||||
|     0x00, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x10, 0x00, 0x06, 0x00, 0x04, 0x00, 0x00, 0x00, | ||||
|     0x11, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||||
|     0x47, 0x00, 0x04, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||||
|     0x47, 0x00, 0x04, 0x00, 0x15, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, | ||||
|     0x48, 0x00, 0x04, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, | ||||
|     0x48, 0x00, 0x05, 0x00, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||||
|     0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||||
|     0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||||
|     0x47, 0x00, 0x04, 0x00, 0x18, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||||
|     0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x23, 0x00, 0x00, 0x00, | ||||
|     0x00, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x22, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, | ||||
|     0x23, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x22, 0x00, 0x00, 0x00, | ||||
|     0x02, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x56, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x04, 0x00, 0x00, 0x00, 0x48, 0x00, 0x04, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||||
|     0x18, 0x00, 0x00, 0x00, 0x48, 0x00, 0x05, 0x00, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||||
|     0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, | ||||
|     0x03, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, | ||||
|     0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x59, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||||
|     0x00, 0x00, 0x00, 0x00, 0x47, 0x00, 0x04, 0x00, 0x72, 0x00, 0x00, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||||
|     0x19, 0x00, 0x00, 0x00, 0x13, 0x00, 0x02, 0x00, 0x02, 0x00, 0x00, 0x00, 0x21, 0x00, 0x03, 0x00, | ||||
|     0x03, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x20, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x07, 0x00, 0x00, 0x00, | ||||
|     0x07, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||||
|     0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x17, 0x00, 0x04, 0x00, 0x0a, 0x00, 0x00, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||||
|     0x01, 0x00, 0x00, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x0b, 0x00, 0x00, 0x00, | ||||
|     0x0c, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||||
|     0x0d, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||||
|     0x01, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x13, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, 0x15, 0x00, 0x00, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x16, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, | ||||
|     0x20, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, | ||||
|     0x3b, 0x00, 0x04, 0x00, 0x17, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||||
|     0x14, 0x00, 0x02, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x21, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x04, 0x00, 0x22, 0x00, 0x00, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x22, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x23, 0x00, 0x00, 0x00, | ||||
|     0x24, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x25, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x26, 0x00, 0x00, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x2b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, | ||||
|     0x3b, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x3f, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x04, 0x00, 0x41, 0x00, 0x00, 0x00, | ||||
|     0x06, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x43, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x09, 0x00, 0x41, 0x00, 0x00, 0x00, | ||||
|     0x44, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, | ||||
|     0x42, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x00, 0x00, 0x43, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, | ||||
|     0x46, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x03, 0x00, | ||||
|     0x56, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, 0x1e, 0x00, 0x03, 0x00, 0x57, 0x00, 0x00, 0x00, | ||||
|     0x56, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||||
|     0x57, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x58, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, | ||||
|     0x02, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x5b, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x20, 0x00, 0x04, 0x00, 0x69, 0x00, 0x00, 0x00, 0x09, 0x00, 0x00, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x70, 0x00, 0x00, 0x00, | ||||
|     0x00, 0x04, 0x00, 0x00, 0x2b, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, | ||||
|     0x01, 0x00, 0x00, 0x00, 0x2c, 0x00, 0x06, 0x00, 0x0a, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00, | ||||
|     0x70, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x71, 0x00, 0x00, 0x00, 0x36, 0x00, 0x05, 0x00, | ||||
|     0x02, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, | ||||
|     0xf8, 0x00, 0x02, 0x00, 0x05, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x04, 0x00, 0x46, 0x00, 0x00, 0x00, | ||||
|     0x47, 0x00, 0x00, 0x00, 0x07, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, | ||||
|     0xf8, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf6, 0x00, 0x04, 0x00, 0x73, 0x00, 0x00, 0x00, | ||||
|     0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, | ||||
|     0xf8, 0x00, 0x02, 0x00, 0x75, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x0e, 0x00, 0x00, 0x00, | ||||
|     0x0f, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||||
|     0x06, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, | ||||
|     0x06, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x13, 0x00, 0x00, 0x00, | ||||
|     0x44, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, | ||||
|     0x00, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, | ||||
|     0x19, 0x00, 0x00, 0x00, 0xaf, 0x00, 0x05, 0x00, 0x1b, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00, | ||||
|     0x14, 0x00, 0x00, 0x00, 0x1a, 0x00, 0x00, 0x00, 0xf7, 0x00, 0x03, 0x00, 0x1e, 0x00, 0x00, 0x00, | ||||
|     0x00, 0x00, 0x00, 0x00, 0xfa, 0x00, 0x04, 0x00, 0x1c, 0x00, 0x00, 0x00, 0x1d, 0x00, 0x00, 0x00, | ||||
|     0x1e, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1d, 0x00, 0x00, 0x00, 0xf9, 0x00, 0x02, 0x00, | ||||
|     0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x1e, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, | ||||
|     0x26, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||||
|     0x3d, 0x00, 0x04, 0x00, 0x06, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, 0x00, | ||||
|     0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x21, 0x00, 0x00, 0x00, | ||||
|     0x28, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, | ||||
|     0x2b, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, 0xc4, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, 0x2e, 0x00, 0x00, 0x00, 0x82, 0x00, 0x05, 0x00, | ||||
|     0x06, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x31, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||||
|     0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, | ||||
|     0xf5, 0x00, 0x07, 0x00, 0x09, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x0d, 0x00, 0x00, 0x00, | ||||
|     0x1e, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x05, 0x00, | ||||
|     0x1b, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3b, 0x00, 0x00, 0x00, | ||||
|     0xf6, 0x00, 0x04, 0x00, 0x37, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, | ||||
|     0xfa, 0x00, 0x04, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x37, 0x00, 0x00, 0x00, | ||||
|     0xf8, 0x00, 0x02, 0x00, 0x36, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x40, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x3f, 0x00, 0x00, 0x00, 0x3e, 0x00, 0x03, 0x00, | ||||
|     0x47, 0x00, 0x00, 0x00, 0x44, 0x00, 0x00, 0x00, 0x41, 0x00, 0x05, 0x00, 0x07, 0x00, 0x00, 0x00, | ||||
|     0x48, 0x00, 0x00, 0x00, 0x47, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, | ||||
|     0x06, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||||
|     0x06, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x49, 0x00, 0x00, 0x00, | ||||
|     0xc3, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x4e, 0x00, 0x00, 0x00, 0x4a, 0x00, 0x00, 0x00, | ||||
|     0x2e, 0x00, 0x00, 0x00, 0xc7, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, | ||||
|     0x4a, 0x00, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00, 0x84, 0x00, 0x05, 0x00, 0x06, 0x00, 0x00, 0x00, | ||||
|     0x54, 0x00, 0x00, 0x00, 0x52, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, | ||||
|     0x5b, 0x00, 0x00, 0x00, 0x5c, 0x00, 0x00, 0x00, 0x59, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, | ||||
|     0x4e, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x5d, 0x00, 0x00, 0x00, | ||||
|     0x5c, 0x00, 0x00, 0x00, 0xcb, 0x00, 0x06, 0x00, 0x09, 0x00, 0x00, 0x00, 0x62, 0x00, 0x00, 0x00, | ||||
|     0x5d, 0x00, 0x00, 0x00, 0x54, 0x00, 0x00, 0x00, 0x29, 0x00, 0x00, 0x00, 0x7c, 0x00, 0x04, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, | ||||
|     0x41, 0x00, 0x05, 0x00, 0x69, 0x00, 0x00, 0x00, 0x6a, 0x00, 0x00, 0x00, 0x24, 0x00, 0x00, 0x00, | ||||
|     0x42, 0x00, 0x00, 0x00, 0x3d, 0x00, 0x04, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, | ||||
|     0x6a, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, 0x09, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, | ||||
|     0x62, 0x00, 0x00, 0x00, 0x6b, 0x00, 0x00, 0x00, 0x41, 0x00, 0x06, 0x00, 0x5b, 0x00, 0x00, 0x00, | ||||
|     0x6d, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x42, 0x00, 0x00, 0x00, 0x67, 0x00, 0x00, 0x00, | ||||
|     0x3e, 0x00, 0x03, 0x00, 0x6d, 0x00, 0x00, 0x00, 0x6c, 0x00, 0x00, 0x00, 0x80, 0x00, 0x05, 0x00, | ||||
|     0x09, 0x00, 0x00, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x7b, 0x00, 0x00, 0x00, 0x25, 0x00, 0x00, 0x00, | ||||
|     0xf9, 0x00, 0x02, 0x00, 0x35, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x37, 0x00, 0x00, 0x00, | ||||
|     0xf9, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x76, 0x00, 0x00, 0x00, | ||||
|     0xf9, 0x00, 0x02, 0x00, 0x74, 0x00, 0x00, 0x00, 0xf8, 0x00, 0x02, 0x00, 0x73, 0x00, 0x00, 0x00, | ||||
|     0xfd, 0x00, 0x01, 0x00, 0x38, 0x00, 0x01, 0x00}; | ||||
| 
 | ||||
| std::array<VkDescriptorSetLayoutBinding, 2> BuildInputOutputDescriptorSetBindings() { | ||||
|     std::array<VkDescriptorSetLayoutBinding, 2> bindings; | ||||
|     bindings[0].binding = 0; | ||||
|     bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; | ||||
|  | @ -235,7 +358,7 @@ std::array<VkDescriptorSetLayoutBinding, 2> BuildUint8PassDescriptorSetBindings( | |||
|     return bindings; | ||||
| } | ||||
| 
 | ||||
| VkDescriptorUpdateTemplateEntryKHR BuildUint8PassDescriptorUpdateTemplateEntry() { | ||||
| VkDescriptorUpdateTemplateEntryKHR BuildInputOutputDescriptorUpdateTemplate() { | ||||
|     VkDescriptorUpdateTemplateEntryKHR entry; | ||||
|     entry.dstBinding = 0; | ||||
|     entry.dstArrayElement = 0; | ||||
|  | @ -337,14 +460,14 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler, | |||
|                              VKUpdateDescriptorQueue& update_descriptor_queue) | ||||
|     : VKComputePass(device, descriptor_pool, BuildQuadArrayPassDescriptorSetLayoutBinding(), | ||||
|                     BuildQuadArrayPassDescriptorUpdateTemplateEntry(), | ||||
|                     BuildQuadArrayPassPushConstantRange(), std::size(quad_array), quad_array), | ||||
|                     BuildComputePushConstantRange(sizeof(u32)), std::size(quad_array), quad_array), | ||||
|       scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | ||||
|       update_descriptor_queue{update_descriptor_queue} {} | ||||
| 
 | ||||
| QuadArrayPass::~QuadArrayPass() = default; | ||||
| 
 | ||||
| std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { | ||||
|     const u32 num_triangle_vertices = num_vertices * 6 / 4; | ||||
|     const u32 num_triangle_vertices = (num_vertices / 4) * 6; | ||||
|     const std::size_t staging_size = num_triangle_vertices * sizeof(u32); | ||||
|     auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); | ||||
| 
 | ||||
|  | @ -383,8 +506,8 @@ std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 | |||
| Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, | ||||
|                      VKDescriptorPool& descriptor_pool, VKStagingBufferPool& staging_buffer_pool, | ||||
|                      VKUpdateDescriptorQueue& update_descriptor_queue) | ||||
|     : VKComputePass(device, descriptor_pool, BuildUint8PassDescriptorSetBindings(), | ||||
|                     BuildUint8PassDescriptorUpdateTemplateEntry(), {}, std::size(uint8_pass), | ||||
|     : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), | ||||
|                     BuildInputOutputDescriptorUpdateTemplate(), {}, std::size(uint8_pass), | ||||
|                     uint8_pass), | ||||
|       scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | ||||
|       update_descriptor_queue{update_descriptor_queue} {} | ||||
|  | @ -425,4 +548,70 @@ std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buff | |||
|     return {*buffer.handle, 0}; | ||||
| } | ||||
| 
 | ||||
| QuadIndexedPass::QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, | ||||
|                                  VKDescriptorPool& descriptor_pool, | ||||
|                                  VKStagingBufferPool& staging_buffer_pool, | ||||
|                                  VKUpdateDescriptorQueue& update_descriptor_queue) | ||||
|     : VKComputePass(device, descriptor_pool, BuildInputOutputDescriptorSetBindings(), | ||||
|                     BuildInputOutputDescriptorUpdateTemplate(), | ||||
|                     BuildComputePushConstantRange(sizeof(u32) * 2), std::size(QUAD_INDEXED_SPV), | ||||
|                     QUAD_INDEXED_SPV), | ||||
|       scheduler{scheduler}, staging_buffer_pool{staging_buffer_pool}, | ||||
|       update_descriptor_queue{update_descriptor_queue} {} | ||||
| 
 | ||||
| QuadIndexedPass::~QuadIndexedPass() = default; | ||||
| 
 | ||||
| std::pair<VkBuffer, u64> QuadIndexedPass::Assemble( | ||||
|     Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, u32 num_vertices, u32 base_vertex, | ||||
|     VkBuffer src_buffer, u64 src_offset) { | ||||
|     const u32 index_shift = [index_format] { | ||||
|         switch (index_format) { | ||||
|         case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedByte: | ||||
|             return 0; | ||||
|         case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedShort: | ||||
|             return 1; | ||||
|         case Tegra::Engines::Maxwell3D::Regs::IndexFormat::UnsignedInt: | ||||
|             return 2; | ||||
|         } | ||||
|         UNREACHABLE(); | ||||
|         return 2; | ||||
|     }(); | ||||
|     const u32 input_size = num_vertices << index_shift; | ||||
|     const u32 num_tri_vertices = (num_vertices / 4) * 6; | ||||
| 
 | ||||
|     const std::size_t staging_size = num_tri_vertices * sizeof(u32); | ||||
|     auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); | ||||
| 
 | ||||
|     update_descriptor_queue.Acquire(); | ||||
|     update_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size); | ||||
|     update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size); | ||||
|     const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); | ||||
| 
 | ||||
|     scheduler.RequestOutsideRenderPassOperationContext(); | ||||
|     scheduler.Record([layout = *layout, pipeline = *pipeline, buffer = *buffer.handle, set, | ||||
|                       num_tri_vertices, base_vertex, index_shift](vk::CommandBuffer cmdbuf) { | ||||
|         static constexpr u32 dispatch_size = 1024; | ||||
|         const std::array push_constants = {base_vertex, index_shift}; | ||||
|         cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_COMPUTE, pipeline); | ||||
|         cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, set, {}); | ||||
|         cmdbuf.PushConstants(layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), | ||||
|                              &push_constants); | ||||
|         cmdbuf.Dispatch(Common::AlignUp(num_tri_vertices, dispatch_size) / dispatch_size, 1, 1); | ||||
| 
 | ||||
|         VkBufferMemoryBarrier barrier; | ||||
|         barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; | ||||
|         barrier.pNext = nullptr; | ||||
|         barrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT; | ||||
|         barrier.dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT; | ||||
|         barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; | ||||
|         barrier.buffer = buffer; | ||||
|         barrier.offset = 0; | ||||
|         barrier.size = static_cast<VkDeviceSize>(num_tri_vertices * sizeof(u32)); | ||||
|         cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, | ||||
|                                VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); | ||||
|     }); | ||||
|     return {*buffer.handle, 0}; | ||||
| } | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -8,6 +8,7 @@ | |||
| #include <utility> | ||||
| #include <vector> | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/renderer_vulkan/vk_descriptor_pool.h" | ||||
| #include "video_core/renderer_vulkan/wrapper.h" | ||||
| 
 | ||||
|  | @ -73,4 +74,22 @@ private: | |||
|     VKUpdateDescriptorQueue& update_descriptor_queue; | ||||
| }; | ||||
| 
 | ||||
| class QuadIndexedPass final : public VKComputePass { | ||||
| public: | ||||
|     explicit QuadIndexedPass(const VKDevice& device, VKScheduler& scheduler, | ||||
|                              VKDescriptorPool& descriptor_pool, | ||||
|                              VKStagingBufferPool& staging_buffer_pool, | ||||
|                              VKUpdateDescriptorQueue& update_descriptor_queue); | ||||
|     ~QuadIndexedPass(); | ||||
| 
 | ||||
|     std::pair<VkBuffer, u64> Assemble(Tegra::Engines::Maxwell3D::Regs::IndexFormat index_format, | ||||
|                                       u32 num_vertices, u32 base_vertex, VkBuffer src_buffer, | ||||
|                                       u64 src_offset); | ||||
| 
 | ||||
| private: | ||||
|     VKScheduler& scheduler; | ||||
|     VKStagingBufferPool& staging_buffer_pool; | ||||
|     VKUpdateDescriptorQueue& update_descriptor_queue; | ||||
| }; | ||||
| 
 | ||||
| } // namespace Vulkan
 | ||||
|  |  | |||
|  | @ -293,6 +293,7 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind | |||
|       update_descriptor_queue(device, scheduler), renderpass_cache(device), | ||||
|       quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||
|       uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||
|       quad_indexed_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), | ||||
|       texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, | ||||
|                     staging_pool), | ||||
|       pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue, | ||||
|  | @ -844,18 +845,26 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar | |||
|                                         bool is_indexed) { | ||||
|     const auto& regs = system.GPU().Maxwell3D().regs; | ||||
|     switch (regs.draw.topology) { | ||||
|     case Maxwell::PrimitiveTopology::Quads: | ||||
|         if (params.is_indexed) { | ||||
|             UNIMPLEMENTED(); | ||||
|         } else { | ||||
|     case Maxwell::PrimitiveTopology::Quads: { | ||||
|         if (!params.is_indexed) { | ||||
|             const auto [buffer, offset] = | ||||
|                 quad_array_pass.Assemble(params.num_vertices, params.base_vertex); | ||||
|             buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||||
|             params.base_vertex = 0; | ||||
|             params.num_vertices = params.num_vertices * 6 / 4; | ||||
|             params.is_indexed = true; | ||||
|             break; | ||||
|         } | ||||
|         const GPUVAddr gpu_addr = regs.index_array.IndexStart(); | ||||
|         auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); | ||||
|         std::tie(buffer, offset) = quad_indexed_pass.Assemble( | ||||
|             regs.index_array.format, params.num_vertices, params.base_vertex, buffer, offset); | ||||
| 
 | ||||
|         buffer_bindings.SetIndexBinding(buffer, offset, VK_INDEX_TYPE_UINT32); | ||||
|         params.num_vertices = (params.num_vertices / 4) * 6; | ||||
|         params.base_vertex = 0; | ||||
|         break; | ||||
|     } | ||||
|     default: { | ||||
|         if (!is_indexed) { | ||||
|             break; | ||||
|  |  | |||
|  | @ -254,6 +254,7 @@ private: | |||
|     VKUpdateDescriptorQueue update_descriptor_queue; | ||||
|     VKRenderPassCache renderpass_cache; | ||||
|     QuadArrayPass quad_array_pass; | ||||
|     QuadIndexedPass quad_indexed_pass; | ||||
|     Uint8Pass uint8_pass; | ||||
| 
 | ||||
|     VKTextureCache texture_cache; | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ReinUsesLisp
						ReinUsesLisp