forked from eden-emu/eden
		
	Shader_Ir: Implement F16 Variants of F2F, F2I, I2F.
This commit takes care of implementing the F16 Variants of the conversion instructions and makes sure conversions are done.
This commit is contained in:
		
							parent
							
								
									0a67416971
								
							
						
					
					
						commit
						11f4e739bd
					
				
					 5 changed files with 75 additions and 18 deletions
				
			
		|  | @ -1018,8 +1018,6 @@ union Instruction { | ||||||
|         } f2i; |         } f2i; | ||||||
| 
 | 
 | ||||||
|         union { |         union { | ||||||
|             BitField<8, 2, Register::Size> src_size; |  | ||||||
|             BitField<10, 2, Register::Size> dst_size; |  | ||||||
|             BitField<39, 4, u64> rounding; |             BitField<39, 4, u64> rounding; | ||||||
|             // H0, H1 extract for F16 missing
 |             // H0, H1 extract for F16 missing
 | ||||||
|             BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
 |             BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
 | ||||||
|  |  | ||||||
|  | @ -1122,6 +1122,16 @@ private: | ||||||
|                                Type::Float); |                                Type::Float); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     std::string FCastHalf0(Operation operation) { | ||||||
|  |         const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||||||
|  |         return fmt::format("({})[0]", op_a); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     std::string FCastHalf1(Operation operation) { | ||||||
|  |         const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||||||
|  |         return fmt::format("({})[1]", op_a); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     template <Type type> |     template <Type type> | ||||||
|     std::string Min(Operation operation) { |     std::string Min(Operation operation) { | ||||||
|         return GenerateBinaryCall(operation, "min", type, type, type); |         return GenerateBinaryCall(operation, "min", type, type, type); | ||||||
|  | @ -1278,6 +1288,11 @@ private: | ||||||
|         return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); |         return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     std::string HCastFloat(Operation operation) { | ||||||
|  |         const std::string op_a = VisitOperand(operation, 0, Type::Float); | ||||||
|  |         return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     std::string HUnpack(Operation operation) { |     std::string HUnpack(Operation operation) { | ||||||
|         const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; |         const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; | ||||||
|         const auto value = [&]() -> std::string { |         const auto value = [&]() -> std::string { | ||||||
|  | @ -1718,6 +1733,8 @@ private: | ||||||
|         &GLSLDecompiler::Negate<Type::Float>, |         &GLSLDecompiler::Negate<Type::Float>, | ||||||
|         &GLSLDecompiler::Absolute<Type::Float>, |         &GLSLDecompiler::Absolute<Type::Float>, | ||||||
|         &GLSLDecompiler::FClamp, |         &GLSLDecompiler::FClamp, | ||||||
|  |         &GLSLDecompiler::FCastHalf0, | ||||||
|  |         &GLSLDecompiler::FCastHalf1, | ||||||
|         &GLSLDecompiler::Min<Type::Float>, |         &GLSLDecompiler::Min<Type::Float>, | ||||||
|         &GLSLDecompiler::Max<Type::Float>, |         &GLSLDecompiler::Max<Type::Float>, | ||||||
|         &GLSLDecompiler::FCos, |         &GLSLDecompiler::FCos, | ||||||
|  | @ -1778,6 +1795,7 @@ private: | ||||||
|         &GLSLDecompiler::Absolute<Type::HalfFloat>, |         &GLSLDecompiler::Absolute<Type::HalfFloat>, | ||||||
|         &GLSLDecompiler::HNegate, |         &GLSLDecompiler::HNegate, | ||||||
|         &GLSLDecompiler::HClamp, |         &GLSLDecompiler::HClamp, | ||||||
|  |         &GLSLDecompiler::HCastFloat, | ||||||
|         &GLSLDecompiler::HUnpack, |         &GLSLDecompiler::HUnpack, | ||||||
|         &GLSLDecompiler::HMergeF32, |         &GLSLDecompiler::HMergeF32, | ||||||
|         &GLSLDecompiler::HMergeH0, |         &GLSLDecompiler::HMergeH0, | ||||||
|  |  | ||||||
|  | @ -735,6 +735,16 @@ private: | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     Id FCastHalf0(Operation operation) { | ||||||
|  |         UNIMPLEMENTED(); | ||||||
|  |         return {}; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Id FCastHalf1(Operation operation) { | ||||||
|  |         UNIMPLEMENTED(); | ||||||
|  |         return {}; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     Id HNegate(Operation operation) { |     Id HNegate(Operation operation) { | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|         return {}; |         return {}; | ||||||
|  | @ -745,6 +755,11 @@ private: | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     Id HCastFloat(Operation operation) { | ||||||
|  |         UNIMPLEMENTED(); | ||||||
|  |         return {}; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     Id HUnpack(Operation operation) { |     Id HUnpack(Operation operation) { | ||||||
|         UNIMPLEMENTED(); |         UNIMPLEMENTED(); | ||||||
|         return {}; |         return {}; | ||||||
|  | @ -1210,6 +1225,8 @@ private: | ||||||
|         &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, |         &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, | ||||||
|         &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, |         &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, | ||||||
|         &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, |         &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, | ||||||
|  |         &SPIRVDecompiler::FCastHalf0, | ||||||
|  |         &SPIRVDecompiler::FCastHalf1, | ||||||
|         &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, |         &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, | ||||||
|         &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, |         &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, | ||||||
|         &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, |         &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, | ||||||
|  | @ -1270,6 +1287,7 @@ private: | ||||||
|         &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, |         &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | ||||||
|         &SPIRVDecompiler::HNegate, |         &SPIRVDecompiler::HNegate, | ||||||
|         &SPIRVDecompiler::HClamp, |         &SPIRVDecompiler::HClamp, | ||||||
|  |         &SPIRVDecompiler::HCastFloat, | ||||||
|         &SPIRVDecompiler::HUnpack, |         &SPIRVDecompiler::HUnpack, | ||||||
|         &SPIRVDecompiler::HMergeF32, |         &SPIRVDecompiler::HMergeF32, | ||||||
|         &SPIRVDecompiler::HMergeH0, |         &SPIRVDecompiler::HMergeH0, | ||||||
|  |  | ||||||
|  | @ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||||||
|     case OpCode::Id::I2F_R: |     case OpCode::Id::I2F_R: | ||||||
|     case OpCode::Id::I2F_C: |     case OpCode::Id::I2F_C: | ||||||
|     case OpCode::Id::I2F_IMM: { |     case OpCode::Id::I2F_IMM: { | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); |         UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.selector); |         UNIMPLEMENTED_IF(instr.conversion.selector); | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, |         UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||||||
|                              "Condition codes generation in I2F is not implemented"); |                              "Condition codes generation in I2F is not implemented"); | ||||||
|  | @ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||||||
|         value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); |         value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||||||
| 
 | 
 | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||||||
|  | 
 | ||||||
|  |         if (instr.conversion.dst_size == Register::Size::Short) { | ||||||
|  |             value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         SetRegister(bb, instr.gpr0, value); |         SetRegister(bb, instr.gpr0, value); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case OpCode::Id::F2F_R: |     case OpCode::Id::F2F_R: | ||||||
|     case OpCode::Id::F2F_C: |     case OpCode::Id::F2F_C: | ||||||
|     case OpCode::Id::F2F_IMM: { |     case OpCode::Id::F2F_IMM: { | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); |         UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); |         UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, |         UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||||||
|                              "Condition codes generation in F2F is not implemented"); |                              "Condition codes generation in F2F is not implemented"); | ||||||
| 
 | 
 | ||||||
|  | @ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||||||
|             } |             } | ||||||
|         }(); |         }(); | ||||||
| 
 | 
 | ||||||
|  |         if (instr.conversion.src_size == Register::Size::Short) { | ||||||
|  |             // TODO: figure where extract is sey in the encoding
 | ||||||
|  |             value = Operation(OperationCode::FCastHalf0, PRECISE, value); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||||||
| 
 | 
 | ||||||
|         value = [&]() { |         value = [&]() { | ||||||
|  | @ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||||||
|             default: |             default: | ||||||
|                 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", |                 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||||||
|                                   static_cast<u32>(instr.conversion.f2f.rounding.Value())); |                                   static_cast<u32>(instr.conversion.f2f.rounding.Value())); | ||||||
|                 return Immediate(0); |                 return value; | ||||||
|             } |             } | ||||||
|         }(); |         }(); | ||||||
|         value = GetSaturatedFloat(value, instr.alu.saturate_d); |         value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||||||
| 
 | 
 | ||||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); |         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||||||
|  | 
 | ||||||
|  |         if (instr.conversion.dst_size == Register::Size::Short) { | ||||||
|  |             value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         SetRegister(bb, instr.gpr0, value); |         SetRegister(bb, instr.gpr0, value); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     case OpCode::Id::F2I_R: |     case OpCode::Id::F2I_R: | ||||||
|     case OpCode::Id::F2I_C: |     case OpCode::Id::F2I_C: | ||||||
|     case OpCode::Id::F2I_IMM: { |     case OpCode::Id::F2I_IMM: { | ||||||
|         UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); |         UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, |         UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||||||
|                              "Condition codes generation in F2I is not implemented"); |                              "Condition codes generation in F2I is not implemented"); | ||||||
|         Node value = [&]() { |         Node value = [&]() { | ||||||
|  | @ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | ||||||
|             } |             } | ||||||
|         }(); |         }(); | ||||||
| 
 | 
 | ||||||
|  |         if (instr.conversion.src_size == Register::Size::Short) { | ||||||
|  |             // TODO: figure where extract is sey in the encoding
 | ||||||
|  |             value = Operation(OperationCode::FCastHalf0, PRECISE, value); | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); |         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||||||
| 
 | 
 | ||||||
|         value = [&]() { |         value = [&]() { | ||||||
|  |  | ||||||
|  | @ -30,6 +30,8 @@ enum class OperationCode { | ||||||
|     FNegate,       /// (MetaArithmetic, float a) -> float
 |     FNegate,       /// (MetaArithmetic, float a) -> float
 | ||||||
|     FAbsolute,     /// (MetaArithmetic, float a) -> float
 |     FAbsolute,     /// (MetaArithmetic, float a) -> float
 | ||||||
|     FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
 |     FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
 | ||||||
|  |     FCastHalf0,    /// (MetaArithmetic, f16vec2 a) -> float
 | ||||||
|  |     FCastHalf1,    /// (MetaArithmetic, f16vec2 a) -> float
 | ||||||
|     FMin,          /// (MetaArithmetic, float a, float b) -> float
 |     FMin,          /// (MetaArithmetic, float a, float b) -> float
 | ||||||
|     FMax,          /// (MetaArithmetic, float a, float b) -> float
 |     FMax,          /// (MetaArithmetic, float a, float b) -> float
 | ||||||
|     FCos,          /// (MetaArithmetic, float a) -> float
 |     FCos,          /// (MetaArithmetic, float a) -> float
 | ||||||
|  | @ -83,17 +85,18 @@ enum class OperationCode { | ||||||
|     UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
 |     UBitfieldExtract, /// (MetaArithmetic, uint value, int offset, int offset) -> uint
 | ||||||
|     UBitCount,        /// (MetaArithmetic, uint) -> uint
 |     UBitCount,        /// (MetaArithmetic, uint) -> uint
 | ||||||
| 
 | 
 | ||||||
|     HAdd,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
 |     HAdd,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
 | ||||||
|     HMul,      /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
 |     HMul,       /// (MetaArithmetic, f16vec2 a, f16vec2 b) -> f16vec2
 | ||||||
|     HFma,      /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
 |     HFma,       /// (MetaArithmetic, f16vec2 a, f16vec2 b, f16vec2 c) -> f16vec2
 | ||||||
|     HAbsolute, /// (f16vec2 a) -> f16vec2
 |     HAbsolute,  /// (f16vec2 a) -> f16vec2
 | ||||||
|     HNegate,   /// (f16vec2 a, bool first, bool second) -> f16vec2
 |     HNegate,    /// (f16vec2 a, bool first, bool second) -> f16vec2
 | ||||||
|     HClamp,    /// (f16vec2 src, float min, float max) -> f16vec2
 |     HClamp,     /// (f16vec2 src, float min, float max) -> f16vec2
 | ||||||
|     HUnpack,   /// (Tegra::Shader::HalfType, T value) -> f16vec2
 |     HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
 | ||||||
|     HMergeF32, /// (f16vec2 src) -> float
 |     HUnpack,    /// (Tegra::Shader::HalfType, T value) -> f16vec2
 | ||||||
|     HMergeH0,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
 |     HMergeF32,  /// (f16vec2 src) -> float
 | ||||||
|     HMergeH1,  /// (f16vec2 dest, f16vec2 src) -> f16vec2
 |     HMergeH0,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
 | ||||||
|     HPack2,    /// (float a, float b) -> f16vec2
 |     HMergeH1,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
 | ||||||
|  |     HPack2,     /// (float a, float b) -> f16vec2
 | ||||||
| 
 | 
 | ||||||
|     LogicalAssign, /// (bool& dst, bool src) -> void
 |     LogicalAssign, /// (bool& dst, bool src) -> void
 | ||||||
|     LogicalAnd,    /// (bool a, bool b) -> bool
 |     LogicalAnd,    /// (bool a, bool b) -> bool
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow