forked from eden-emu/eden
		
	shader: Implement FSET and FSETP
Also fix oversight with adding SignedZeroInfNanPreserve execution mode.
This commit is contained in:
		
							parent
							
								
									17a82b56d7
								
							
						
					
					
						commit
						fa2f6e38f4
					
				
					 9 changed files with 204 additions and 94 deletions
				
			
		|  | @ -66,12 +66,14 @@ add_library(shader_recompiler STATIC | |||
|     frontend/maxwell/translate/impl/find_leading_one.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_add.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_compare.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_min_max.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_multi_function.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_multiply.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_range_reduction.cpp | ||||
|     frontend/maxwell/translate/impl/floating_point_set_predicate.cpp | ||||
|     frontend/maxwell/translate/impl/half_floating_point_add.cpp | ||||
|     frontend/maxwell/translate/impl/impl.cpp | ||||
|     frontend/maxwell/translate/impl/impl.h | ||||
|  |  | |||
|  | @ -124,10 +124,12 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit | |||
|     ctx.AddExtension("SPV_KHR_float_controls"); | ||||
| 
 | ||||
|     if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { | ||||
|         ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); | ||||
|         ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); | ||||
|         ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U); | ||||
|     } | ||||
|     if (profile.support_fp32_signed_zero_nan_preserve) { | ||||
|         ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); | ||||
|         ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve); | ||||
|         ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U); | ||||
|     } | ||||
|     if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { | ||||
|         // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader");
 | ||||
|  |  | |||
|  | @ -58,4 +58,52 @@ IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp | |||
|     } | ||||
| } | ||||
| 
 | ||||
| bool IsCompareOpOrdered(FPCompareOp op) { | ||||
|     switch (op) { | ||||
|     case FPCompareOp::LTU: | ||||
|     case FPCompareOp::EQU: | ||||
|     case FPCompareOp::LEU: | ||||
|     case FPCompareOp::GTU: | ||||
|     case FPCompareOp::NEU: | ||||
|     case FPCompareOp::GEU: | ||||
|         return false; | ||||
|     default: | ||||
|         return true; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, | ||||
|                             FPCompareOp compare_op, IR::FpControl control) { | ||||
|     const bool ordered{IsCompareOpOrdered(compare_op)}; | ||||
|     switch (compare_op) { | ||||
|     case FPCompareOp::F: | ||||
|         return ir.Imm1(false); | ||||
|     case FPCompareOp::LT: | ||||
|     case FPCompareOp::LTU: | ||||
|         return ir.FPLessThan(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::EQ: | ||||
|     case FPCompareOp::EQU: | ||||
|         return ir.FPEqual(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::LE: | ||||
|     case FPCompareOp::LEU: | ||||
|         return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::GT: | ||||
|     case FPCompareOp::GTU: | ||||
|         return ir.FPGreaterThan(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::NE: | ||||
|     case FPCompareOp::NEU: | ||||
|         return ir.FPNotEqual(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::GE: | ||||
|     case FPCompareOp::GEU: | ||||
|         return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::NUM: | ||||
|         return ir.FPOrdered(operand_1, operand_2); | ||||
|     case FPCompareOp::Nan: | ||||
|         return ir.FPUnordered(operand_1, operand_2); | ||||
|     case FPCompareOp::T: | ||||
|         return ir.Imm1(true); | ||||
|     default: | ||||
|         throw NotImplementedException("Invalid FP compare op {}", compare_op); | ||||
|     } | ||||
| } | ||||
| } // namespace Shader::Maxwell
 | ||||
|  |  | |||
|  | @ -15,4 +15,10 @@ namespace Shader::Maxwell { | |||
|                                       const IR::U1& predicate_2, BooleanOp bop); | ||||
| 
 | ||||
| [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); | ||||
| 
 | ||||
| [[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op); | ||||
| 
 | ||||
| [[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, | ||||
|                                           const IR::F32& operand_2, FPCompareOp compare_op, | ||||
|                                           IR::FpControl control = {}); | ||||
| } // namespace Shader::Maxwell
 | ||||
|  |  | |||
|  | @ -9,74 +9,6 @@ | |||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class FPCompareOp : u64 { | ||||
|     F, | ||||
|     LT, | ||||
|     EQ, | ||||
|     LE, | ||||
|     GT, | ||||
|     NE, | ||||
|     GE, | ||||
|     NUM, | ||||
|     Nan, | ||||
|     LTU, | ||||
|     EQU, | ||||
|     LEU, | ||||
|     GTU, | ||||
|     NEU, | ||||
|     GEU, | ||||
|     T, | ||||
| }; | ||||
| 
 | ||||
| bool IsCompareOpOrdered(FPCompareOp op) { | ||||
|     switch (op) { | ||||
|     case FPCompareOp::LTU: | ||||
|     case FPCompareOp::EQU: | ||||
|     case FPCompareOp::LEU: | ||||
|     case FPCompareOp::GTU: | ||||
|     case FPCompareOp::NEU: | ||||
|     case FPCompareOp::GEU: | ||||
|         return false; | ||||
|     default: | ||||
|         return true; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2, | ||||
|                             FPCompareOp compare_op, IR::FpControl control) { | ||||
|     const bool ordered{IsCompareOpOrdered(compare_op)}; | ||||
|     switch (compare_op) { | ||||
|     case FPCompareOp::F: | ||||
|         return ir.Imm1(false); | ||||
|     case FPCompareOp::LT: | ||||
|     case FPCompareOp::LTU: | ||||
|         return ir.FPLessThan(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::EQ: | ||||
|     case FPCompareOp::EQU: | ||||
|         return ir.FPEqual(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::LE: | ||||
|     case FPCompareOp::LEU: | ||||
|         return ir.FPLessThanEqual(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::GT: | ||||
|     case FPCompareOp::GTU: | ||||
|         return ir.FPGreaterThan(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::NE: | ||||
|     case FPCompareOp::NEU: | ||||
|         return ir.FPNotEqual(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::GE: | ||||
|     case FPCompareOp::GEU: | ||||
|         return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered); | ||||
|     case FPCompareOp::NUM: | ||||
|         return ir.FPOrdered(operand_1, operand_2); | ||||
|     case FPCompareOp::Nan: | ||||
|         return ir.FPUnordered(operand_1, operand_2); | ||||
|     case FPCompareOp::T: | ||||
|         return ir.Imm1(true); | ||||
|     default: | ||||
|         throw NotImplementedException("Invalid compare op {}", compare_op); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|  |  | |||
|  | @ -0,0 +1,65 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_a_reg; | ||||
|         BitField<39, 3, IR::Pred> pred; | ||||
|         BitField<42, 1, u64> neg_pred; | ||||
|         BitField<43, 1, u64> negate_a; | ||||
|         BitField<44, 1, u64> abs_b; | ||||
|         BitField<45, 2, BooleanOp> bop; | ||||
|         BitField<48, 4, FPCompareOp> compare_op; | ||||
|         BitField<52, 1, u64> bf; | ||||
|         BitField<53, 1, u64> negate_b; | ||||
|         BitField<54, 1, u64> abs_a; | ||||
|         BitField<55, 1, u64> ftz; | ||||
|     } const fset{insn}; | ||||
| 
 | ||||
|     const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)}; | ||||
|     const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0); | ||||
|     const IR::FpControl control{ | ||||
|         .no_contraction{false}, | ||||
|         .rounding{IR::FpRounding::DontCare}, | ||||
|         .fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||||
|     }; | ||||
| 
 | ||||
|     IR::U1 pred{v.ir.GetPred(fset.pred)}; | ||||
|     if (fset.neg_pred != 0) { | ||||
|         pred = v.ir.LogicalNot(pred); | ||||
|     } | ||||
|     const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)}; | ||||
|     const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)}; | ||||
| 
 | ||||
|     const IR::U32 one_mask{v.ir.Imm32(-1)}; | ||||
|     const IR::U32 fp_one{v.ir.Imm32(0x3f800000)}; | ||||
|     const IR::U32 fail_result{v.ir.Imm32(0)}; | ||||
|     const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one}; | ||||
| 
 | ||||
|     v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)}); | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void TranslatorVisitor::FSET_reg(u64 insn) { | ||||
|     FSET(*this, insn, GetFloatReg20(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSET_cbuf(u64 insn) { | ||||
|     FSET(*this, insn, GetFloatCbuf(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSET_imm(u64 insn) { | ||||
|     FSET(*this, insn, GetFloatImm20(insn)); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
|  | @ -0,0 +1,60 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<0, 3, IR::Pred> dest_pred_b; | ||||
|         BitField<3, 3, IR::Pred> dest_pred_a; | ||||
|         BitField<6, 1, u64> negate_b; | ||||
|         BitField<7, 1, u64> abs_a; | ||||
|         BitField<8, 8, IR::Reg> src_a_reg; | ||||
|         BitField<39, 3, IR::Pred> bop_pred; | ||||
|         BitField<42, 1, u64> neg_bop_pred; | ||||
|         BitField<43, 1, u64> negate_a; | ||||
|         BitField<44, 1, u64> abs_b; | ||||
|         BitField<45, 2, BooleanOp> bop; | ||||
|         BitField<47, 1, u64> ftz; | ||||
|         BitField<48, 4, FPCompareOp> compare_op; | ||||
|     } const fsetp{insn}; | ||||
| 
 | ||||
|     const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)}; | ||||
|     const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0); | ||||
|     const IR::FpControl control{ | ||||
|         .no_contraction{false}, | ||||
|         .rounding{IR::FpRounding::DontCare}, | ||||
|         .fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}, | ||||
|     }; | ||||
| 
 | ||||
|     const BooleanOp bop{fsetp.bop}; | ||||
|     const FPCompareOp compare_op{fsetp.compare_op}; | ||||
|     const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)}; | ||||
|     const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)}; | ||||
|     const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)}; | ||||
|     const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)}; | ||||
|     v.ir.SetPred(fsetp.dest_pred_a, result_a); | ||||
|     v.ir.SetPred(fsetp.dest_pred_b, result_b); | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void TranslatorVisitor::FSETP_reg(u64 insn) { | ||||
|     FSETP(*this, insn, GetFloatReg20(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSETP_cbuf(u64 insn) { | ||||
|     FSETP(*this, insn, GetFloatCbuf(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSETP_imm(u64 insn) { | ||||
|     FSETP(*this, insn, GetFloatImm20(insn)); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
|  | @ -35,6 +35,25 @@ enum class PredicateOp : u64 { | |||
|     NonZero, | ||||
| }; | ||||
| 
 | ||||
| enum class FPCompareOp : u64 { | ||||
|     F, | ||||
|     LT, | ||||
|     EQ, | ||||
|     LE, | ||||
|     GT, | ||||
|     NE, | ||||
|     GE, | ||||
|     NUM, | ||||
|     Nan, | ||||
|     LTU, | ||||
|     EQU, | ||||
|     LEU, | ||||
|     GTU, | ||||
|     NEU, | ||||
|     GEU, | ||||
|     T, | ||||
| }; | ||||
| 
 | ||||
| class TranslatorVisitor { | ||||
| public: | ||||
|     explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} | ||||
|  |  | |||
|  | @ -201,30 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) { | |||
|     ThrowNotImplemented(Opcode::FCHK_imm); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSET_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSET_reg); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSET_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSET_cbuf); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSET_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSET_imm); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSETP_reg(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSETP_reg); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSETP_cbuf(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSETP_cbuf); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSETP_imm(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSETP_imm); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::FSWZADD(u64) { | ||||
|     ThrowNotImplemented(Opcode::FSWZADD); | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ameerj
						ameerj