forked from eden-emu/eden
		
	Shader_Ir: Implement F16 Variants of F2F, F2I, I2F.
This commit takes care of implementing the F16 Variants of the conversion instructions and makes sure conversions are done.
This commit is contained in:
		
							parent
							
								
									f96208f686
								
							
						
					
					
						commit
						9a0fa90be2
					
				
					 5 changed files with 75 additions and 18 deletions
				
			
		|  | @ -1018,8 +1018,6 @@ union Instruction { | |||
|         } f2i; | ||||
| 
 | ||||
|         union { | ||||
|             BitField<8, 2, Register::Size> src_size; | ||||
|             BitField<10, 2, Register::Size> dst_size; | ||||
|             BitField<39, 4, u64> rounding; | ||||
|             // H0, H1 extract for F16 missing
 | ||||
|             BitField<41, 1, u64> selector; // Guessed as some games set it, TODO: reverse this value
 | ||||
|  |  | |||
|  | @ -1122,6 +1122,16 @@ private: | |||
|                                Type::Float); | ||||
|     } | ||||
| 
 | ||||
|     std::string FCastHalf0(Operation operation) { | ||||
|         const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||||
|         return fmt::format("({})[0]", op_a); | ||||
|     } | ||||
| 
 | ||||
|     std::string FCastHalf1(Operation operation) { | ||||
|         const std::string op_a = VisitOperand(operation, 0, Type::HalfFloat); | ||||
|         return fmt::format("({})[1]", op_a); | ||||
|     } | ||||
| 
 | ||||
|     template <Type type> | ||||
|     std::string Min(Operation operation) { | ||||
|         return GenerateBinaryCall(operation, "min", type, type, type); | ||||
|  | @ -1278,6 +1288,11 @@ private: | |||
|         return ApplyPrecise(operation, BitwiseCastResult(clamped, Type::HalfFloat)); | ||||
|     } | ||||
| 
 | ||||
|     std::string HCastFloat(Operation operation) { | ||||
|         const std::string op_a = VisitOperand(operation, 0, Type::Float); | ||||
|         return fmt::format("fromHalf2(vec2({}, 0.0f))", op_a); | ||||
|     } | ||||
| 
 | ||||
|     std::string HUnpack(Operation operation) { | ||||
|         const std::string operand{VisitOperand(operation, 0, Type::HalfFloat)}; | ||||
|         const auto value = [&]() -> std::string { | ||||
|  | @ -1718,6 +1733,8 @@ private: | |||
|         &GLSLDecompiler::Negate<Type::Float>, | ||||
|         &GLSLDecompiler::Absolute<Type::Float>, | ||||
|         &GLSLDecompiler::FClamp, | ||||
|         &GLSLDecompiler::FCastHalf0, | ||||
|         &GLSLDecompiler::FCastHalf1, | ||||
|         &GLSLDecompiler::Min<Type::Float>, | ||||
|         &GLSLDecompiler::Max<Type::Float>, | ||||
|         &GLSLDecompiler::FCos, | ||||
|  | @ -1778,6 +1795,7 @@ private: | |||
|         &GLSLDecompiler::Absolute<Type::HalfFloat>, | ||||
|         &GLSLDecompiler::HNegate, | ||||
|         &GLSLDecompiler::HClamp, | ||||
|         &GLSLDecompiler::HCastFloat, | ||||
|         &GLSLDecompiler::HUnpack, | ||||
|         &GLSLDecompiler::HMergeF32, | ||||
|         &GLSLDecompiler::HMergeH0, | ||||
|  |  | |||
|  | @ -735,6 +735,16 @@ private: | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Id FCastHalf0(Operation operation) { | ||||
|         UNIMPLEMENTED(); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Id FCastHalf1(Operation operation) { | ||||
|         UNIMPLEMENTED(); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Id HNegate(Operation operation) { | ||||
|         UNIMPLEMENTED(); | ||||
|         return {}; | ||||
|  | @ -745,6 +755,11 @@ private: | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Id HCastFloat(Operation operation) { | ||||
|         UNIMPLEMENTED(); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Id HUnpack(Operation operation) { | ||||
|         UNIMPLEMENTED(); | ||||
|         return {}; | ||||
|  | @ -1210,6 +1225,8 @@ private: | |||
|         &SPIRVDecompiler::Unary<&Module::OpFNegate, Type::Float>, | ||||
|         &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::Float>, | ||||
|         &SPIRVDecompiler::Ternary<&Module::OpFClamp, Type::Float>, | ||||
|         &SPIRVDecompiler::FCastHalf0, | ||||
|         &SPIRVDecompiler::FCastHalf1, | ||||
|         &SPIRVDecompiler::Binary<&Module::OpFMin, Type::Float>, | ||||
|         &SPIRVDecompiler::Binary<&Module::OpFMax, Type::Float>, | ||||
|         &SPIRVDecompiler::Unary<&Module::OpCos, Type::Float>, | ||||
|  | @ -1270,6 +1287,7 @@ private: | |||
|         &SPIRVDecompiler::Unary<&Module::OpFAbs, Type::HalfFloat>, | ||||
|         &SPIRVDecompiler::HNegate, | ||||
|         &SPIRVDecompiler::HClamp, | ||||
|         &SPIRVDecompiler::HCastFloat, | ||||
|         &SPIRVDecompiler::HUnpack, | ||||
|         &SPIRVDecompiler::HMergeF32, | ||||
|         &SPIRVDecompiler::HMergeH0, | ||||
|  |  | |||
|  | @ -57,7 +57,7 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
|     case OpCode::Id::I2F_R: | ||||
|     case OpCode::Id::I2F_C: | ||||
|     case OpCode::Id::I2F_IMM: { | ||||
|         UNIMPLEMENTED_IF(instr.conversion.dst_size != Register::Size::Word); | ||||
|         UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||||
|         UNIMPLEMENTED_IF(instr.conversion.selector); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||||
|                              "Condition codes generation in I2F is not implemented"); | ||||
|  | @ -82,14 +82,19 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
|         value = GetOperandAbsNegFloat(value, false, instr.conversion.negate_a); | ||||
| 
 | ||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||||
| 
 | ||||
|         if (instr.conversion.dst_size == Register::Size::Short) { | ||||
|             value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||||
|         } | ||||
| 
 | ||||
|         SetRegister(bb, instr.gpr0, value); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::F2F_R: | ||||
|     case OpCode::Id::F2F_C: | ||||
|     case OpCode::Id::F2F_IMM: { | ||||
|         UNIMPLEMENTED_IF(instr.conversion.f2f.dst_size != Register::Size::Word); | ||||
|         UNIMPLEMENTED_IF(instr.conversion.f2f.src_size != Register::Size::Word); | ||||
|         UNIMPLEMENTED_IF(instr.conversion.dst_size == Register::Size::Long); | ||||
|         UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||||
|                              "Condition codes generation in F2F is not implemented"); | ||||
| 
 | ||||
|  | @ -107,6 +112,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
|             } | ||||
|         }(); | ||||
| 
 | ||||
|         if (instr.conversion.src_size == Register::Size::Short) { | ||||
|             // TODO: figure where extract is sey in the encoding
 | ||||
|             value = Operation(OperationCode::FCastHalf0, PRECISE, value); | ||||
|         } | ||||
| 
 | ||||
|         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||||
| 
 | ||||
|         value = [&]() { | ||||
|  | @ -124,19 +134,24 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
|             default: | ||||
|                 UNIMPLEMENTED_MSG("Unimplemented F2F rounding mode {}", | ||||
|                                   static_cast<u32>(instr.conversion.f2f.rounding.Value())); | ||||
|                 return Immediate(0); | ||||
|                 return value; | ||||
|             } | ||||
|         }(); | ||||
|         value = GetSaturatedFloat(value, instr.alu.saturate_d); | ||||
| 
 | ||||
|         SetInternalFlagsFromFloat(bb, value, instr.generates_cc); | ||||
| 
 | ||||
|         if (instr.conversion.dst_size == Register::Size::Short) { | ||||
|             value = Operation(OperationCode::HCastFloat, PRECISE, value); | ||||
|         } | ||||
| 
 | ||||
|         SetRegister(bb, instr.gpr0, value); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::F2I_R: | ||||
|     case OpCode::Id::F2I_C: | ||||
|     case OpCode::Id::F2I_IMM: { | ||||
|         UNIMPLEMENTED_IF(instr.conversion.src_size != Register::Size::Word); | ||||
|         UNIMPLEMENTED_IF(instr.conversion.src_size == Register::Size::Long); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.generates_cc, | ||||
|                              "Condition codes generation in F2I is not implemented"); | ||||
|         Node value = [&]() { | ||||
|  | @ -153,6 +168,11 @@ u32 ShaderIR::DecodeConversion(NodeBlock& bb, u32 pc) { | |||
|             } | ||||
|         }(); | ||||
| 
 | ||||
|         if (instr.conversion.src_size == Register::Size::Short) { | ||||
|             // TODO: figure where extract is sey in the encoding
 | ||||
|             value = Operation(OperationCode::FCastHalf0, PRECISE, value); | ||||
|         } | ||||
| 
 | ||||
|         value = GetOperandAbsNegFloat(value, instr.conversion.abs_a, instr.conversion.negate_a); | ||||
| 
 | ||||
|         value = [&]() { | ||||
|  |  | |||
|  | @ -30,6 +30,8 @@ enum class OperationCode { | |||
|     FNegate,       /// (MetaArithmetic, float a) -> float
 | ||||
|     FAbsolute,     /// (MetaArithmetic, float a) -> float
 | ||||
|     FClamp,        /// (MetaArithmetic, float value, float min, float max) -> float
 | ||||
|     FCastHalf0,    /// (MetaArithmetic, f16vec2 a) -> float
 | ||||
|     FCastHalf1,    /// (MetaArithmetic, f16vec2 a) -> float
 | ||||
|     FMin,          /// (MetaArithmetic, float a, float b) -> float
 | ||||
|     FMax,          /// (MetaArithmetic, float a, float b) -> float
 | ||||
|     FCos,          /// (MetaArithmetic, float a) -> float
 | ||||
|  | @ -89,6 +91,7 @@ enum class OperationCode { | |||
|     HAbsolute,  /// (f16vec2 a) -> f16vec2
 | ||||
|     HNegate,    /// (f16vec2 a, bool first, bool second) -> f16vec2
 | ||||
|     HClamp,     /// (f16vec2 src, float min, float max) -> f16vec2
 | ||||
|     HCastFloat, /// (MetaArithmetic, float a) -> f16vec2
 | ||||
|     HUnpack,    /// (Tegra::Shader::HalfType, T value) -> f16vec2
 | ||||
|     HMergeF32,  /// (f16vec2 src) -> float
 | ||||
|     HMergeH0,   /// (f16vec2 dest, f16vec2 src) -> f16vec2
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow