forked from eden-emu/eden
		
	shader: Implement SHFL
This commit is contained in:
		
							parent
							
								
									49e87ea8ab
								
							
						
					
					
						commit
						32c5483beb
					
				
					 16 changed files with 284 additions and 69 deletions
				
			
		|  | @ -374,6 +374,10 @@ U1 IREmitter::GetSparseFromOp(const Value& op) { | |||
|     return Inst<U1>(Opcode::GetSparseFromOp, op); | ||||
| } | ||||
| 
 | ||||
| U1 IREmitter::GetInBoundsFromOp(const Value& op) { | ||||
|     return Inst<U1>(Opcode::GetInBoundsFromOp, op); | ||||
| } | ||||
| 
 | ||||
| F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) { | ||||
|     if (a.Type() != b.Type()) { | ||||
|         throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type()); | ||||
|  | @ -1486,4 +1490,23 @@ U32 IREmitter::SubgroupBallot(const U1& value) { | |||
|     return Inst<U32>(Opcode::SubgroupBallot, value); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                             const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleIndex, value, index, clamp, seg_mask); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                          const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleUp, value, index, clamp, seg_mask); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                            const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleDown, value, index, clamp, seg_mask); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::ShuffleButterfly(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                                 const IR::U32& seg_mask) { | ||||
|     return Inst<U32>(Opcode::ShuffleButterfly, value, index, clamp, seg_mask); | ||||
| } | ||||
| } // namespace Shader::IR
 | ||||
|  |  | |||
|  | @ -104,6 +104,7 @@ public: | |||
|     [[nodiscard]] U1 GetCarryFromOp(const Value& op); | ||||
|     [[nodiscard]] U1 GetOverflowFromOp(const Value& op); | ||||
|     [[nodiscard]] U1 GetSparseFromOp(const Value& op); | ||||
|     [[nodiscard]] U1 GetInBoundsFromOp(const Value& op); | ||||
| 
 | ||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2); | ||||
|     [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3); | ||||
|  | @ -147,7 +148,8 @@ public: | |||
|     [[nodiscard]] F32F64 FPRecipSqrt(const F32F64& value); | ||||
|     [[nodiscard]] F32 FPSqrt(const F32& value); | ||||
|     [[nodiscard]] F16F32F64 FPSaturate(const F16F32F64& value); | ||||
|     [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, const F16F32F64& max_value); | ||||
|     [[nodiscard]] F16F32F64 FPClamp(const F16F32F64& value, const F16F32F64& min_value, | ||||
|                                     const F16F32F64& max_value); | ||||
|     [[nodiscard]] F16F32F64 FPRoundEven(const F16F32F64& value, FpControl control = {}); | ||||
|     [[nodiscard]] F16F32F64 FPFloor(const F16F32F64& value, FpControl control = {}); | ||||
|     [[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {}); | ||||
|  | @ -242,6 +244,14 @@ public: | |||
|     [[nodiscard]] U1 VoteAny(const U1& value); | ||||
|     [[nodiscard]] U1 VoteEqual(const U1& value); | ||||
|     [[nodiscard]] U32 SubgroupBallot(const U1& value); | ||||
|     [[nodiscard]] U32 ShuffleIndex(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                                    const IR::U32& seg_mask); | ||||
|     [[nodiscard]] U32 ShuffleUp(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                                 const IR::U32& seg_mask); | ||||
|     [[nodiscard]] U32 ShuffleDown(const IR::U32& value, const IR::U32& index, const IR::U32& clamp, | ||||
|                                   const IR::U32& seg_mask); | ||||
|     [[nodiscard]] U32 ShuffleButterfly(const IR::U32& value, const IR::U32& index, | ||||
|                                        const IR::U32& clamp, const IR::U32& seg_mask); | ||||
| 
 | ||||
| private: | ||||
|     IR::Block::iterator insertion_point; | ||||
|  |  | |||
|  | @ -89,6 +89,7 @@ bool Inst::IsPseudoInstruction() const noexcept { | |||
|     case Opcode::GetCarryFromOp: | ||||
|     case Opcode::GetOverflowFromOp: | ||||
|     case Opcode::GetSparseFromOp: | ||||
|     case Opcode::GetInBoundsFromOp: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
|  | @ -123,6 +124,9 @@ Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) { | |||
|     case Opcode::GetSparseFromOp: | ||||
|         CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp); | ||||
|         return associated_insts->sparse_inst; | ||||
|     case Opcode::GetInBoundsFromOp: | ||||
|         CheckPseudoInstruction(associated_insts->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||||
|         return associated_insts->in_bounds_inst; | ||||
|     default: | ||||
|         throw InvalidArgument("{} is not a pseudo-instruction", opcode); | ||||
|     } | ||||
|  | @ -262,6 +266,10 @@ void Inst::Use(const Value& value) { | |||
|         AllocAssociatedInsts(assoc_inst); | ||||
|         SetPseudoInstruction(assoc_inst->sparse_inst, this); | ||||
|         break; | ||||
|     case Opcode::GetInBoundsFromOp: | ||||
|         AllocAssociatedInsts(assoc_inst); | ||||
|         SetPseudoInstruction(assoc_inst->in_bounds_inst, this); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|  | @ -289,6 +297,10 @@ void Inst::UndoUse(const Value& value) { | |||
|         AllocAssociatedInsts(assoc_inst); | ||||
|         RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp); | ||||
|         break; | ||||
|     case Opcode::GetInBoundsFromOp: | ||||
|         AllocAssociatedInsts(assoc_inst); | ||||
|         RemovePseudoInstruction(assoc_inst->in_bounds_inst, Opcode::GetInBoundsFromOp); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
|  |  | |||
|  | @ -134,6 +134,7 @@ static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased"); | |||
| 
 | ||||
| struct AssociatedInsts { | ||||
|     union { | ||||
|         Inst* in_bounds_inst; | ||||
|         Inst* sparse_inst; | ||||
|         Inst* zero_inst{}; | ||||
|     }; | ||||
|  |  | |||
|  | @ -159,6 +159,7 @@ OPCODE(GetSignFromOp,                                       U1,             Opaq | |||
| OPCODE(GetCarryFromOp,                                      U1,             Opaque,                                                                         ) | ||||
| OPCODE(GetOverflowFromOp,                                   U1,             Opaque,                                                                         ) | ||||
| OPCODE(GetSparseFromOp,                                     U1,             Opaque,                                                                         ) | ||||
| OPCODE(GetInBoundsFromOp,                                   U1,             Opaque,                                                                         ) | ||||
| 
 | ||||
| // Floating-point operations
 | ||||
| OPCODE(FPAbs16,                                             F16,            F16,                                                                            ) | ||||
|  | @ -363,8 +364,12 @@ OPCODE(ImageSampleExplicitLod,                              F32x4,          U32, | |||
| OPCODE(ImageSampleDrefImplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||
| OPCODE(ImageSampleDrefExplicitLod,                          F32,            U32,            Opaque,         F32,            Opaque,         Opaque,         ) | ||||
| 
 | ||||
| // Vote operations
 | ||||
| // Warp operations
 | ||||
| OPCODE(VoteAll,                                             U1,             U1,                                                                             ) | ||||
| OPCODE(VoteAny,                                             U1,             U1,                                                                             ) | ||||
| OPCODE(VoteEqual,                                           U1,             U1,                                                                             ) | ||||
| OPCODE(SubgroupBallot,                                      U32,            U1,                                                                             ) | ||||
| OPCODE(ShuffleIndex,                                        U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleUp,                                           U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleDown,                                         U32,            U32,            U32,            U32,            U32,                            ) | ||||
| OPCODE(ShuffleButterfly,                                    U32,            U32,            U32,            U32,            U32,                            ) | ||||
|  |  | |||
|  | @ -53,8 +53,8 @@ void TranslatorVisitor::ISCADD_reg(u64 insn) { | |||
|     ISCADD(*this, insn, GetReg20(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::ISCADD_cbuf(u64) { | ||||
|     throw NotImplementedException("ISCADD (cbuf)"); | ||||
| void TranslatorVisitor::ISCADD_cbuf(u64 insn) { | ||||
|     ISCADD(*this, insn, GetCbuf(insn)); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::ISCADD_imm(u64 insn) { | ||||
|  |  | |||
|  | @ -301,10 +301,6 @@ void TranslatorVisitor::SETLMEMBASE(u64) { | |||
|     ThrowNotImplemented(Opcode::SETLMEMBASE); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::SHFL(u64) { | ||||
|     ThrowNotImplemented(Opcode::SHFL); | ||||
| } | ||||
| 
 | ||||
| void TranslatorVisitor::SSY() { | ||||
|     // SSY is a no-op
 | ||||
| } | ||||
|  |  | |||
|  | @ -0,0 +1,69 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <optional> | ||||
| 
 | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| namespace { | ||||
| enum class ShuffleMode : u64 { | ||||
|     IDX, | ||||
|     UP, | ||||
|     DOWN, | ||||
|     BFLY, | ||||
| }; | ||||
| 
 | ||||
| [[nodiscard]] IR::U32 ShuffleOperation(IR::IREmitter& ir, const IR::U32& value, | ||||
|                                        const IR::U32& index, const IR::U32& mask, | ||||
|                                        ShuffleMode shfl_op) { | ||||
|     const IR::U32 clamp{ir.BitFieldExtract(mask, ir.Imm32(0), ir.Imm32(5))}; | ||||
|     const IR::U32 seg_mask{ir.BitFieldExtract(mask, ir.Imm32(8), ir.Imm32(5))}; | ||||
|     switch (shfl_op) { | ||||
|     case ShuffleMode::IDX: | ||||
|         return ir.ShuffleIndex(value, index, clamp, seg_mask); | ||||
|     case ShuffleMode::UP: | ||||
|         return ir.ShuffleUp(value, index, clamp, seg_mask); | ||||
|     case ShuffleMode::DOWN: | ||||
|         return ir.ShuffleDown(value, index, clamp, seg_mask); | ||||
|     case ShuffleMode::BFLY: | ||||
|         return ir.ShuffleButterfly(value, index, clamp, seg_mask); | ||||
|     default: | ||||
|         throw NotImplementedException("Invalid SHFL op {}", shfl_op); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<0, 8, IR::Reg> dest_reg; | ||||
|         BitField<8, 8, IR::Reg> src_reg; | ||||
|         BitField<30, 2, ShuffleMode> mode; | ||||
|         BitField<48, 3, IR::Pred> pred; | ||||
|     } const shfl{insn}; | ||||
| 
 | ||||
|     const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; | ||||
|     v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); | ||||
|     v.X(shfl.dest_reg, result); | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| void TranslatorVisitor::SHFL(u64 insn) { | ||||
|     union { | ||||
|         u64 insn; | ||||
|         BitField<20, 5, u64> src_a_imm; | ||||
|         BitField<28, 1, u64> src_a_flag; | ||||
|         BitField<29, 1, u64> src_b_flag; | ||||
|         BitField<34, 13, u64> src_b_imm; | ||||
|     } const flags{insn}; | ||||
|     const IR::U32 src_a{flags.src_a_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_a_imm)) | ||||
|                                               : GetReg20(insn)}; | ||||
|     const IR::U32 src_b{flags.src_b_flag != 0 ? ir.Imm32(static_cast<u32>(flags.src_b_imm)) | ||||
|                                               : GetReg39(insn)}; | ||||
|     Shuffle(*this, insn, src_a, src_b); | ||||
| } | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ameerj
						ameerj