forked from eden-emu/eden
		
	Merge pull request #3612 from ReinUsesLisp/red
shader/memory: Implement RED.E.ADD and minor changes to ATOM
This commit is contained in:
		
						commit
						e33196d4e7
					
				
					 5 changed files with 140 additions and 69 deletions
				
			
		|  | @ -1005,6 +1005,12 @@ union Instruction { | |||
|         BitField<46, 2, u64> cache_mode; | ||||
|     } stg; | ||||
| 
 | ||||
|     union { | ||||
|         BitField<23, 3, AtomicOp> operation; | ||||
|         BitField<48, 1, u64> extended; | ||||
|         BitField<20, 3, GlobalAtomicType> type; | ||||
|     } red; | ||||
| 
 | ||||
|     union { | ||||
|         BitField<52, 4, AtomicOp> operation; | ||||
|         BitField<49, 3, GlobalAtomicType> type; | ||||
|  | @ -1787,6 +1793,7 @@ public: | |||
|         ST_S, | ||||
|         ST,    // Store in generic memory
 | ||||
|         STG,   // Store in global memory
 | ||||
|         RED,   // Reduction operation
 | ||||
|         ATOM,  // Atomic operation on global memory
 | ||||
|         ATOMS, // Atomic operation on shared memory
 | ||||
|         AL2P,  // Transforms attribute memory into physical memory
 | ||||
|  | @ -2097,6 +2104,7 @@ private: | |||
|             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | ||||
|             INST("101-------------", Id::ST, Type::Memory, "ST"), | ||||
|             INST("1110111011011---", Id::STG, Type::Memory, "STG"), | ||||
|             INST("1110101111111---", Id::RED, Type::Memory, "RED"), | ||||
|             INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), | ||||
|             INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), | ||||
|             INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | ||||
|  |  | |||
|  | @ -2119,8 +2119,14 @@ private: | |||
|             return {}; | ||||
|         } | ||||
|         return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), | ||||
|                             Visit(operation[1]).As(type)), | ||||
|                 type}; | ||||
|                             Visit(operation[1]).AsUint()), | ||||
|                 Type::Uint}; | ||||
|     } | ||||
| 
 | ||||
|     template <const std::string_view& opname, Type type> | ||||
|     Expression Reduce(Operation operation) { | ||||
|         code.AddLine("{};", Atomic<opname, type>(operation).GetCode()); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Expression Branch(Operation operation) { | ||||
|  | @ -2479,6 +2485,20 @@ private: | |||
|         &GLSLDecompiler::Atomic<Func::Or, Type::Int>, | ||||
|         &GLSLDecompiler::Atomic<Func::Xor, Type::Int>, | ||||
| 
 | ||||
|         &GLSLDecompiler::Reduce<Func::Add, Type::Uint>, | ||||
|         &GLSLDecompiler::Reduce<Func::Min, Type::Uint>, | ||||
|         &GLSLDecompiler::Reduce<Func::Max, Type::Uint>, | ||||
|         &GLSLDecompiler::Reduce<Func::And, Type::Uint>, | ||||
|         &GLSLDecompiler::Reduce<Func::Or, Type::Uint>, | ||||
|         &GLSLDecompiler::Reduce<Func::Xor, Type::Uint>, | ||||
| 
 | ||||
|         &GLSLDecompiler::Reduce<Func::Add, Type::Int>, | ||||
|         &GLSLDecompiler::Reduce<Func::Min, Type::Int>, | ||||
|         &GLSLDecompiler::Reduce<Func::Max, Type::Int>, | ||||
|         &GLSLDecompiler::Reduce<Func::And, Type::Int>, | ||||
|         &GLSLDecompiler::Reduce<Func::Or, Type::Int>, | ||||
|         &GLSLDecompiler::Reduce<Func::Xor, Type::Int>, | ||||
| 
 | ||||
|         &GLSLDecompiler::Branch, | ||||
|         &GLSLDecompiler::BranchIndirect, | ||||
|         &GLSLDecompiler::PushFlowStack, | ||||
|  |  | |||
|  | @ -1938,11 +1938,8 @@ private: | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, | ||||
|               Type value_type = result_type> | ||||
|     template <Id (Module::*func)(Id, Id, Id, Id, Id)> | ||||
|     Expression Atomic(Operation operation) { | ||||
|         const Id type_def = GetTypeDefinition(result_type); | ||||
| 
 | ||||
|         Id pointer; | ||||
|         if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||||
|             pointer = GetSharedMemoryPointer(*smem); | ||||
|  | @ -1950,15 +1947,19 @@ private: | |||
|             pointer = GetGlobalMemoryPointer(*gmem); | ||||
|         } else { | ||||
|             UNREACHABLE(); | ||||
|             return {Constant(type_def, 0), result_type}; | ||||
|             return {v_float_zero, Type::Float}; | ||||
|         } | ||||
|         const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | ||||
|         const Id semantics = Constant(t_uint, 0); | ||||
|         const Id value = AsUint(Visit(operation[1])); | ||||
| 
 | ||||
|         return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; | ||||
|     } | ||||
| 
 | ||||
|         const Id value = As(Visit(operation[1]), value_type); | ||||
| 
 | ||||
|         const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | ||||
|         const Id semantics = Constant(type_def, 0); | ||||
| 
 | ||||
|         return {(this->*func)(type_def, pointer, scope, semantics, value), result_type}; | ||||
|     template <Id (Module::*func)(Id, Id, Id, Id, Id)> | ||||
|     Expression Reduce(Operation operation) { | ||||
|         Atomic<func>(operation); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Expression Branch(Operation operation) { | ||||
|  | @ -2547,21 +2548,35 @@ private: | |||
|         &SPIRVDecompiler::AtomicImageXor, | ||||
|         &SPIRVDecompiler::AtomicImageExchange, | ||||
| 
 | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, | ||||
| 
 | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, | ||||
|         &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, | ||||
| 
 | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, | ||||
| 
 | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, | ||||
|         &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, | ||||
| 
 | ||||
|         &SPIRVDecompiler::Branch, | ||||
|         &SPIRVDecompiler::BranchIndirect, | ||||
|  |  | |||
|  | @ -3,7 +3,9 @@ | |||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include <fmt/format.h> | ||||
| 
 | ||||
| #include "common/alignment.h" | ||||
|  | @ -16,6 +18,7 @@ | |||
| 
 | ||||
| namespace VideoCommon::Shader { | ||||
| 
 | ||||
| using std::move; | ||||
| using Tegra::Shader::AtomicOp; | ||||
| using Tegra::Shader::AtomicType; | ||||
| using Tegra::Shader::Attribute; | ||||
|  | @ -27,8 +30,7 @@ using Tegra::Shader::StoreType; | |||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { | ||||
|     const OperationCode operation_code = [op] { | ||||
| OperationCode GetAtomOperation(AtomicOp op) { | ||||
|     switch (op) { | ||||
|     case AtomicOp::Add: | ||||
|         return OperationCode::AtomicIAdd; | ||||
|  | @ -48,8 +50,6 @@ Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { | |||
|         UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); | ||||
|         return OperationCode::AtomicIAdd; | ||||
|     } | ||||
|     }(); | ||||
|     return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data)); | ||||
| } | ||||
| 
 | ||||
| bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { | ||||
|  | @ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) { | |||
| 
 | ||||
| Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { | ||||
|     Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); | ||||
|     offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); | ||||
|     return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), | ||||
|                      Immediate(size)); | ||||
|     offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||||
|     return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size)); | ||||
| } | ||||
| 
 | ||||
| Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { | ||||
|     Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); | ||||
|     offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); | ||||
|     return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), | ||||
|                      std::move(offset), Immediate(size)); | ||||
|     Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask)); | ||||
|     offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3)); | ||||
|     return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset), | ||||
|                      Immediate(size)); | ||||
| } | ||||
| 
 | ||||
| Node Sign16Extend(Node value) { | ||||
|     Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); | ||||
|     Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); | ||||
|     Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15)); | ||||
|     Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); | ||||
|     return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); | ||||
|     return Operation(OperationCode::UBitwiseOr, move(value), move(extend)); | ||||
| } | ||||
| 
 | ||||
| } // Anonymous namespace
 | ||||
|  | @ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
| 
 | ||||
|             if (IsUnaligned(type)) { | ||||
|                 const u32 mask = GetUnalignedMask(type); | ||||
|                 value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); | ||||
|                 value = InsertUnaligned(gmem, move(value), real_address, mask, size); | ||||
|             } | ||||
| 
 | ||||
|             bb.push_back(Operation(OperationCode::Assign, gmem, value)); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::RED: { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); | ||||
|         const auto [real_address, base_address, descriptor] = | ||||
|             TrackGlobalMemory(bb, instr, true, true); | ||||
|         if (!real_address || !base_address) { | ||||
|             // Tracking failed, skip atomic.
 | ||||
|             break; | ||||
|         } | ||||
|         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||||
|         Node value = GetRegister(instr.gpr0); | ||||
|         bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::ATOM: { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || | ||||
|                                  instr.atom.operation == AtomicOp::Dec || | ||||
|                                  instr.atom.operation == AtomicOp::SafeAdd, | ||||
|                              "operation={}", static_cast<int>(instr.atom.operation.Value())); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || | ||||
|                                  instr.atom.type == GlobalAtomicType::U64, | ||||
|                                  instr.atom.type == GlobalAtomicType::U64 || | ||||
|                                  instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN || | ||||
|                                  instr.atom.type == GlobalAtomicType::F32_FTZ_RN, | ||||
|                              "type={}", static_cast<int>(instr.atom.type.Value())); | ||||
| 
 | ||||
|         const auto [real_address, base_address, descriptor] = | ||||
|  | @ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
|         } | ||||
| 
 | ||||
|         const bool is_signed = | ||||
|             instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; | ||||
|             instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64; | ||||
|         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||||
|         Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, | ||||
|                                       GetRegister(instr.gpr20)); | ||||
|         SetRegister(bb, instr.gpr0, std::move(value)); | ||||
|         SetRegister(bb, instr.gpr0, | ||||
|                     SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem, | ||||
|                                     GetRegister(instr.gpr20))); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::ATOMS: { | ||||
|  | @ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
|             instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; | ||||
|         const s32 offset = instr.atoms.GetImmediateOffset(); | ||||
|         Node address = GetRegister(instr.gpr8); | ||||
|         address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); | ||||
|         Node value = | ||||
|             GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, | ||||
|                              GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); | ||||
|         SetRegister(bb, instr.gpr0, std::move(value)); | ||||
|         address = Operation(OperationCode::IAdd, move(address), Immediate(offset)); | ||||
|         SetRegister(bb, instr.gpr0, | ||||
|                     SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed, | ||||
|                                     GetSharedMemory(move(address)), GetRegister(instr.gpr20))); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::AL2P: { | ||||
|  |  | |||
|  | @ -178,6 +178,20 @@ enum class OperationCode { | |||
|     AtomicIOr,       /// (memory, int) -> int
 | ||||
|     AtomicIXor,      /// (memory, int) -> int
 | ||||
| 
 | ||||
|     ReduceUAdd, /// (memory, uint) -> void
 | ||||
|     ReduceUMin, /// (memory, uint) -> void
 | ||||
|     ReduceUMax, /// (memory, uint) -> void
 | ||||
|     ReduceUAnd, /// (memory, uint) -> void
 | ||||
|     ReduceUOr,  /// (memory, uint) -> void
 | ||||
|     ReduceUXor, /// (memory, uint) -> void
 | ||||
| 
 | ||||
|     ReduceIAdd, /// (memory, int) -> void
 | ||||
|     ReduceIMin, /// (memory, int) -> void
 | ||||
|     ReduceIMax, /// (memory, int) -> void
 | ||||
|     ReduceIAnd, /// (memory, int) -> void
 | ||||
|     ReduceIOr,  /// (memory, int) -> void
 | ||||
|     ReduceIXor, /// (memory, int) -> void
 | ||||
| 
 | ||||
|     Branch,         /// (uint branch_target) -> void
 | ||||
|     BranchIndirect, /// (uint branch_target) -> void
 | ||||
|     PushFlowStack,  /// (uint branch_target) -> void
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Fernando Sahmkow
						Fernando Sahmkow