forked from eden-emu/eden
		
	Merge pull request #3489 from namkazt/patch-2
shader: implement SULD.D bits32/64
This commit is contained in:
		
						commit
						487f9ba525
					
				
					 2 changed files with 353 additions and 11 deletions
				
			
		|  | @ -13,13 +13,247 @@ | |||
| #include "video_core/engines/shader_bytecode.h" | ||||
| #include "video_core/shader/node_helper.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
| #include "video_core/textures/texture.h" | ||||
| 
 | ||||
| namespace VideoCommon::Shader { | ||||
| 
 | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::PredCondition; | ||||
| using Tegra::Shader::StoreType; | ||||
| using Tegra::Texture::ComponentType; | ||||
| using Tegra::Texture::TextureFormat; | ||||
| using Tegra::Texture::TICEntry; | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| ComponentType GetComponentType(Tegra::Engines::SamplerDescriptor descriptor, | ||||
|                                std::size_t component) { | ||||
|     const TextureFormat format{descriptor.format}; | ||||
|     switch (format) { | ||||
|     case TextureFormat::R16_G16_B16_A16: | ||||
|     case TextureFormat::R32_G32_B32_A32: | ||||
|     case TextureFormat::R32_G32_B32: | ||||
|     case TextureFormat::R32_G32: | ||||
|     case TextureFormat::R16_G16: | ||||
|     case TextureFormat::R32: | ||||
|     case TextureFormat::R16: | ||||
|     case TextureFormat::R8: | ||||
|     case TextureFormat::R1: | ||||
|         if (component == 0) { | ||||
|             return descriptor.r_type; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return descriptor.g_type; | ||||
|         } | ||||
|         if (component == 2) { | ||||
|             return descriptor.b_type; | ||||
|         } | ||||
|         if (component == 3) { | ||||
|             return descriptor.a_type; | ||||
|         } | ||||
|         break; | ||||
|     case TextureFormat::A8R8G8B8: | ||||
|         if (component == 0) { | ||||
|             return descriptor.a_type; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return descriptor.r_type; | ||||
|         } | ||||
|         if (component == 2) { | ||||
|             return descriptor.g_type; | ||||
|         } | ||||
|         if (component == 3) { | ||||
|             return descriptor.b_type; | ||||
|         } | ||||
|         break; | ||||
|     case TextureFormat::A2B10G10R10: | ||||
|     case TextureFormat::A4B4G4R4: | ||||
|     case TextureFormat::A5B5G5R1: | ||||
|     case TextureFormat::A1B5G5R5: | ||||
|         if (component == 0) { | ||||
|             return descriptor.a_type; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return descriptor.b_type; | ||||
|         } | ||||
|         if (component == 2) { | ||||
|             return descriptor.g_type; | ||||
|         } | ||||
|         if (component == 3) { | ||||
|             return descriptor.r_type; | ||||
|         } | ||||
|         break; | ||||
|     case TextureFormat::R32_B24G8: | ||||
|         if (component == 0) { | ||||
|             return descriptor.r_type; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return descriptor.b_type; | ||||
|         } | ||||
|         if (component == 2) { | ||||
|             return descriptor.g_type; | ||||
|         } | ||||
|         break; | ||||
|     case TextureFormat::B5G6R5: | ||||
|     case TextureFormat::B6G5R5: | ||||
|         if (component == 0) { | ||||
|             return descriptor.b_type; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return descriptor.g_type; | ||||
|         } | ||||
|         if (component == 2) { | ||||
|             return descriptor.r_type; | ||||
|         } | ||||
|         break; | ||||
|     case TextureFormat::G8R24: | ||||
|     case TextureFormat::G24R8: | ||||
|     case TextureFormat::G8R8: | ||||
|     case TextureFormat::G4R4: | ||||
|         if (component == 0) { | ||||
|             return descriptor.g_type; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return descriptor.r_type; | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     UNIMPLEMENTED_MSG("texture format not implement={}", format); | ||||
|     return ComponentType::FLOAT; | ||||
| } | ||||
| 
 | ||||
| bool IsComponentEnabled(std::size_t component_mask, std::size_t component) { | ||||
|     constexpr u8 R = 0b0001; | ||||
|     constexpr u8 G = 0b0010; | ||||
|     constexpr u8 B = 0b0100; | ||||
|     constexpr u8 A = 0b1000; | ||||
|     constexpr std::array<u8, 16> mask = { | ||||
|         0,   (R),     (G),     (R | G),     (B),     (R | B),     (G | B),     (R | G | B), | ||||
|         (A), (R | A), (G | A), (R | G | A), (B | A), (R | B | A), (G | B | A), (R | G | B | A)}; | ||||
|     return std::bitset<4>{mask.at(component_mask)}.test(component); | ||||
| } | ||||
| 
 | ||||
| u32 GetComponentSize(TextureFormat format, std::size_t component) { | ||||
|     switch (format) { | ||||
|     case TextureFormat::R32_G32_B32_A32: | ||||
|         return 32; | ||||
|     case TextureFormat::R16_G16_B16_A16: | ||||
|         return 16; | ||||
|     case TextureFormat::R32_G32_B32: | ||||
|         return component <= 2 ? 32 : 0; | ||||
|     case TextureFormat::R32_G32: | ||||
|         return component <= 1 ? 32 : 0; | ||||
|     case TextureFormat::R16_G16: | ||||
|         return component <= 1 ? 16 : 0; | ||||
|     case TextureFormat::R32: | ||||
|         return component == 0 ? 32 : 0; | ||||
|     case TextureFormat::R16: | ||||
|         return component == 0 ? 16 : 0; | ||||
|     case TextureFormat::R8: | ||||
|         return component == 0 ? 8 : 0; | ||||
|     case TextureFormat::R1: | ||||
|         return component == 0 ? 1 : 0; | ||||
|     case TextureFormat::A8R8G8B8: | ||||
|         return 8; | ||||
|     case TextureFormat::A2B10G10R10: | ||||
|         return (component == 3 || component == 2 || component == 1) ? 10 : 2; | ||||
|     case TextureFormat::A4B4G4R4: | ||||
|         return 4; | ||||
|     case TextureFormat::A5B5G5R1: | ||||
|         return (component == 0 || component == 1 || component == 2) ? 5 : 1; | ||||
|     case TextureFormat::A1B5G5R5: | ||||
|         return (component == 1 || component == 2 || component == 3) ? 5 : 1; | ||||
|     case TextureFormat::R32_B24G8: | ||||
|         if (component == 0) { | ||||
|             return 32; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return 24; | ||||
|         } | ||||
|         if (component == 2) { | ||||
|             return 8; | ||||
|         } | ||||
|         return 0; | ||||
|     case TextureFormat::B5G6R5: | ||||
|         if (component == 0 || component == 2) { | ||||
|             return 5; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return 6; | ||||
|         } | ||||
|         return 0; | ||||
|     case TextureFormat::B6G5R5: | ||||
|         if (component == 1 || component == 2) { | ||||
|             return 5; | ||||
|         } | ||||
|         if (component == 0) { | ||||
|             return 6; | ||||
|         } | ||||
|         return 0; | ||||
|     case TextureFormat::G8R24: | ||||
|         if (component == 0) { | ||||
|             return 8; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return 24; | ||||
|         } | ||||
|         return 0; | ||||
|     case TextureFormat::G24R8: | ||||
|         if (component == 0) { | ||||
|             return 8; | ||||
|         } | ||||
|         if (component == 1) { | ||||
|             return 24; | ||||
|         } | ||||
|         return 0; | ||||
|     case TextureFormat::G8R8: | ||||
|         return (component == 0 || component == 1) ? 8 : 0; | ||||
|     case TextureFormat::G4R4: | ||||
|         return (component == 0 || component == 1) ? 4 : 0; | ||||
|     default: | ||||
|         UNIMPLEMENTED_MSG("texture format not implement={}", format); | ||||
|         return 0; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| std::size_t GetImageComponentMask(TextureFormat format) { | ||||
|     constexpr u8 R = 0b0001; | ||||
|     constexpr u8 G = 0b0010; | ||||
|     constexpr u8 B = 0b0100; | ||||
|     constexpr u8 A = 0b1000; | ||||
|     switch (format) { | ||||
|     case TextureFormat::R32_G32_B32_A32: | ||||
|     case TextureFormat::R16_G16_B16_A16: | ||||
|     case TextureFormat::A8R8G8B8: | ||||
|     case TextureFormat::A2B10G10R10: | ||||
|     case TextureFormat::A4B4G4R4: | ||||
|     case TextureFormat::A5B5G5R1: | ||||
|     case TextureFormat::A1B5G5R5: | ||||
|         return std::size_t{R | G | B | A}; | ||||
|     case TextureFormat::R32_G32_B32: | ||||
|     case TextureFormat::R32_B24G8: | ||||
|     case TextureFormat::B5G6R5: | ||||
|     case TextureFormat::B6G5R5: | ||||
|         return std::size_t{R | G | B}; | ||||
|     case TextureFormat::R32_G32: | ||||
|     case TextureFormat::R16_G16: | ||||
|     case TextureFormat::G8R24: | ||||
|     case TextureFormat::G24R8: | ||||
|     case TextureFormat::G8R8: | ||||
|     case TextureFormat::G4R4: | ||||
|         return std::size_t{R | G}; | ||||
|     case TextureFormat::R32: | ||||
|     case TextureFormat::R16: | ||||
|     case TextureFormat::R8: | ||||
|     case TextureFormat::R1: | ||||
|         return std::size_t{R}; | ||||
|     default: | ||||
|         UNIMPLEMENTED_MSG("texture format not implement={}", format); | ||||
|         return std::size_t{R | G | B | A}; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | ||||
|     switch (image_type) { | ||||
|     case Tegra::Shader::ImageType::Texture1D: | ||||
|  | @ -37,6 +271,39 @@ std::size_t GetImageTypeNumCoordinates(Tegra::Shader::ImageType image_type) { | |||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| std::pair<Node, bool> ShaderIR::GetComponentValue(ComponentType component_type, u32 component_size, | ||||
|                                                   Node original_value) { | ||||
|     switch (component_type) { | ||||
|     case ComponentType::SNORM: { | ||||
|         // range [-1.0, 1.0]
 | ||||
|         auto cnv_value = Operation(OperationCode::FMul, original_value, | ||||
|                                    Immediate(static_cast<float>(1 << component_size) / 2.f - 1.f)); | ||||
|         cnv_value = Operation(OperationCode::ICastFloat, std::move(cnv_value)); | ||||
|         return {BitfieldExtract(std::move(cnv_value), 0, component_size), true}; | ||||
|     } | ||||
|     case ComponentType::SINT: | ||||
|     case ComponentType::UNORM: { | ||||
|         bool is_signed = component_type == ComponentType::SINT; | ||||
|         // range [0.0, 1.0]
 | ||||
|         auto cnv_value = Operation(OperationCode::FMul, original_value, | ||||
|                                    Immediate(static_cast<float>(1 << component_size) - 1.f)); | ||||
|         return {SignedOperation(OperationCode::ICastFloat, is_signed, std::move(cnv_value)), | ||||
|                 is_signed}; | ||||
|     } | ||||
|     case ComponentType::UINT: // range [0, (1 << component_size) - 1]
 | ||||
|         return {std::move(original_value), false}; | ||||
|     case ComponentType::FLOAT: | ||||
|         if (component_size == 16) { | ||||
|             return {Operation(OperationCode::HCastFloat, original_value), true}; | ||||
|         } else { | ||||
|             return {std::move(original_value), true}; | ||||
|         } | ||||
|     default: | ||||
|         UNIMPLEMENTED_MSG("Unimplement component type={}", component_type); | ||||
|         return {std::move(original_value), true}; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | ||||
|     const Instruction instr = {program_code[pc]}; | ||||
|     const auto opcode = OpCode::Decode(instr); | ||||
|  | @ -53,7 +320,6 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
| 
 | ||||
|     switch (opcode->get().GetId()) { | ||||
|     case OpCode::Id::SULD: { | ||||
|         UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); | ||||
|         UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != | ||||
|                          Tegra::Shader::OutOfBoundsStore::Ignore); | ||||
| 
 | ||||
|  | @ -62,6 +328,7 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
|                                               : GetBindlessImage(instr.gpr39, type)}; | ||||
|         image.MarkRead(); | ||||
| 
 | ||||
|         if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::P) { | ||||
|             u32 indexer = 0; | ||||
|             for (u32 element = 0; element < 4; ++element) { | ||||
|                 if (!instr.suldst.IsComponentEnabled(element)) { | ||||
|  | @ -74,6 +341,77 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { | |||
|             for (u32 i = 0; i < indexer; ++i) { | ||||
|                 SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||||
|             } | ||||
|         } else if (instr.suldst.mode == Tegra::Shader::SurfaceDataMode::D_BA) { | ||||
|             UNIMPLEMENTED_IF(instr.suldst.GetStoreDataLayout() != StoreType::Bits32 && | ||||
|                              instr.suldst.GetStoreDataLayout() != StoreType::Bits64); | ||||
| 
 | ||||
|             auto descriptor = [this, instr] { | ||||
|                 std::optional<Tegra::Engines::SamplerDescriptor> descriptor; | ||||
|                 if (instr.suldst.is_immediate) { | ||||
|                     descriptor = | ||||
|                         registry.ObtainBoundSampler(static_cast<u32>(instr.image.index.Value())); | ||||
|                 } else { | ||||
|                     const Node image_register = GetRegister(instr.gpr39); | ||||
|                     const auto [base_image, buffer, offset] = TrackCbuf( | ||||
|                         image_register, global_code, static_cast<s64>(global_code.size())); | ||||
|                     descriptor = registry.ObtainBindlessSampler(buffer, offset); | ||||
|                 } | ||||
|                 if (!descriptor) { | ||||
|                     UNREACHABLE_MSG("Failed to obtain image descriptor"); | ||||
|                 } | ||||
|                 return *descriptor; | ||||
|             }(); | ||||
| 
 | ||||
|             const auto comp_mask = GetImageComponentMask(descriptor.format); | ||||
| 
 | ||||
|             switch (instr.suldst.GetStoreDataLayout()) { | ||||
|             case StoreType::Bits32: | ||||
|             case StoreType::Bits64: { | ||||
|                 u32 indexer = 0; | ||||
|                 u32 shifted_counter = 0; | ||||
|                 Node value = Immediate(0); | ||||
|                 for (u32 element = 0; element < 4; ++element) { | ||||
|                     if (!IsComponentEnabled(comp_mask, element)) { | ||||
|                         continue; | ||||
|                     } | ||||
|                     const auto component_type = GetComponentType(descriptor, element); | ||||
|                     const auto component_size = GetComponentSize(descriptor.format, element); | ||||
|                     MetaImage meta{image, {}, element}; | ||||
| 
 | ||||
|                     auto [converted_value, is_signed] = GetComponentValue( | ||||
|                         component_type, component_size, | ||||
|                         Operation(OperationCode::ImageLoad, meta, GetCoordinates(type))); | ||||
| 
 | ||||
|                     // shift element to correct position
 | ||||
|                     const auto shifted = shifted_counter; | ||||
|                     if (shifted > 0) { | ||||
|                         converted_value = | ||||
|                             SignedOperation(OperationCode::ILogicalShiftLeft, is_signed, | ||||
|                                             std::move(converted_value), Immediate(shifted)); | ||||
|                     } | ||||
|                     shifted_counter += component_size; | ||||
| 
 | ||||
|                     // add value into result
 | ||||
|                     value = Operation(OperationCode::UBitwiseOr, value, std::move(converted_value)); | ||||
| 
 | ||||
|                     // if we shifted enough for 1 byte -> we save it into temp
 | ||||
|                     if (shifted_counter >= 32) { | ||||
|                         SetTemporary(bb, indexer++, std::move(value)); | ||||
|                         // reset counter and value to prepare pack next byte
 | ||||
|                         value = Immediate(0); | ||||
|                         shifted_counter = 0; | ||||
|                     } | ||||
|                 } | ||||
|                 for (u32 i = 0; i < indexer; ++i) { | ||||
|                     SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); | ||||
|                 } | ||||
|                 break; | ||||
|             } | ||||
|             default: | ||||
|                 UNREACHABLE(); | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::SUST: { | ||||
|  |  | |||
|  | @ -312,6 +312,10 @@ private: | |||
|     /// Conditionally saturates a half float pair
 | ||||
|     Node GetSaturatedHalfFloat(Node value, bool saturate = true); | ||||
| 
 | ||||
|     /// Get image component value by type and size
 | ||||
|     std::pair<Node, bool> GetComponentValue(Tegra::Texture::ComponentType component_type, | ||||
|                                             u32 component_size, Node original_value); | ||||
| 
 | ||||
|     /// Returns a predicate comparing two floats
 | ||||
|     Node GetPredicateComparisonFloat(Tegra::Shader::PredCondition condition, Node op_a, Node op_b); | ||||
|     /// Returns a predicate comparing two integers
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Rodrigo Locatti
						Rodrigo Locatti