forked from eden-emu/eden
		
	Merge pull request #3350 from ReinUsesLisp/atom
shader/memory: Implement ATOM.ADD
This commit is contained in:
		
						commit
						2db7adc42a
					
				
					 5 changed files with 86 additions and 39 deletions
				
			
		|  | @ -227,6 +227,28 @@ enum class AtomicOp : u64 { | ||||||
|     Exch = 8, |     Exch = 8, | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
|  | enum class GlobalAtomicOp : u64 { | ||||||
|  |     Add = 0, | ||||||
|  |     Min = 1, | ||||||
|  |     Max = 2, | ||||||
|  |     Inc = 3, | ||||||
|  |     Dec = 4, | ||||||
|  |     And = 5, | ||||||
|  |     Or = 6, | ||||||
|  |     Xor = 7, | ||||||
|  |     Exch = 8, | ||||||
|  |     SafeAdd = 10, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
|  | enum class GlobalAtomicType : u64 { | ||||||
|  |     U32 = 0, | ||||||
|  |     S32 = 1, | ||||||
|  |     U64 = 2, | ||||||
|  |     F32_FTZ_RN = 3, | ||||||
|  |     F16x2_FTZ_RN = 4, | ||||||
|  |     S64 = 5, | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| enum class UniformType : u64 { | enum class UniformType : u64 { | ||||||
|     UnsignedByte = 0, |     UnsignedByte = 0, | ||||||
|     SignedByte = 1, |     SignedByte = 1, | ||||||
|  | @ -957,6 +979,12 @@ union Instruction { | ||||||
|         BitField<46, 2, u64> cache_mode; |         BitField<46, 2, u64> cache_mode; | ||||||
|     } stg; |     } stg; | ||||||
| 
 | 
 | ||||||
|  |     union { | ||||||
|  |         BitField<52, 4, GlobalAtomicOp> operation; | ||||||
|  |         BitField<49, 3, GlobalAtomicType> type; | ||||||
|  |         BitField<28, 20, s64> offset; | ||||||
|  |     } atom; | ||||||
|  | 
 | ||||||
|     union { |     union { | ||||||
|         BitField<52, 4, AtomicOp> operation; |         BitField<52, 4, AtomicOp> operation; | ||||||
|         BitField<28, 2, AtomicType> type; |         BitField<28, 2, AtomicType> type; | ||||||
|  | @ -1690,6 +1718,7 @@ public: | ||||||
|         ST_S, |         ST_S, | ||||||
|         ST,    // Store in generic memory
 |         ST,    // Store in generic memory
 | ||||||
|         STG,   // Store in global memory
 |         STG,   // Store in global memory
 | ||||||
|  |         ATOM,  // Atomic operation on global memory
 | ||||||
|         ATOMS, // Atomic operation on shared memory
 |         ATOMS, // Atomic operation on shared memory
 | ||||||
|         AL2P,  // Transforms attribute memory into physical memory
 |         AL2P,  // Transforms attribute memory into physical memory
 | ||||||
|         TEX, |         TEX, | ||||||
|  | @ -1994,6 +2023,7 @@ private: | ||||||
|             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), |             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | ||||||
|             INST("101-------------", Id::ST, Type::Memory, "ST"), |             INST("101-------------", Id::ST, Type::Memory, "ST"), | ||||||
|             INST("1110111011011---", Id::STG, Type::Memory, "STG"), |             INST("1110111011011---", Id::STG, Type::Memory, "STG"), | ||||||
|  |             INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), | ||||||
|             INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), |             INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), | ||||||
|             INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), |             INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | ||||||
|             INST("110000----111---", Id::TEX, Type::Texture, "TEX"), |             INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | ||||||
|  |  | ||||||
|  | @ -1857,10 +1857,7 @@ private: | ||||||
| 
 | 
 | ||||||
|     template <const std::string_view& opname, Type type> |     template <const std::string_view& opname, Type type> | ||||||
|     Expression Atomic(Operation operation) { |     Expression Atomic(Operation operation) { | ||||||
|         ASSERT(stage == ShaderType::Compute); |         return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), | ||||||
|         auto& smem = std::get<SmemNode>(*operation[0]); |  | ||||||
| 
 |  | ||||||
|         return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), |  | ||||||
|                             Visit(operation[1]).As(type)), |                             Visit(operation[1]).As(type)), | ||||||
|                 type}; |                 type}; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -1123,15 +1123,7 @@ private: | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         if (const auto gmem = std::get_if<GmemNode>(&*node)) { |         if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||||||
|             const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); |             return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; | ||||||
|             const Id real = AsUint(Visit(gmem->GetRealAddress())); |  | ||||||
|             const Id base = AsUint(Visit(gmem->GetBaseAddress())); |  | ||||||
| 
 |  | ||||||
|             Id offset = OpISub(t_uint, real, base); |  | ||||||
|             offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); |  | ||||||
|             return {OpLoad(t_float, |  | ||||||
|                            OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), |  | ||||||
|                     Type::Float}; |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         if (const auto lmem = std::get_if<LmemNode>(&*node)) { |         if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||||||
|  | @ -1142,10 +1134,7 @@ private: | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         if (const auto smem = std::get_if<SmemNode>(&*node)) { |         if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||||||
|             Id address = AsUint(Visit(smem->GetAddress())); |             return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; | ||||||
|             address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); |  | ||||||
|             const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); |  | ||||||
|             return {OpLoad(t_uint, pointer), Type::Uint}; |  | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|         if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { |         if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||||||
|  | @ -1339,20 +1328,10 @@ private: | ||||||
|             target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; |             target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; | ||||||
| 
 | 
 | ||||||
|         } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { |         } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||||||
|             ASSERT(stage == ShaderType::Compute); |             target = {GetSharedMemoryPointer(*smem), Type::Uint}; | ||||||
|             Id address = AsUint(Visit(smem->GetAddress())); |  | ||||||
|             address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); |  | ||||||
|             target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; |  | ||||||
| 
 | 
 | ||||||
|         } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { |         } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||||||
|             const Id real = AsUint(Visit(gmem->GetRealAddress())); |             target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; | ||||||
|             const Id base = AsUint(Visit(gmem->GetBaseAddress())); |  | ||||||
|             const Id diff = OpISub(t_uint, real, base); |  | ||||||
|             const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); |  | ||||||
| 
 |  | ||||||
|             const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); |  | ||||||
|             target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), |  | ||||||
|                       Type::Float}; |  | ||||||
| 
 | 
 | ||||||
|         } else { |         } else { | ||||||
|             UNIMPLEMENTED(); |             UNIMPLEMENTED(); | ||||||
|  | @ -1804,11 +1783,16 @@ private: | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     Expression UAtomicAdd(Operation operation) { |     Expression AtomicAdd(Operation operation) { | ||||||
|         const auto& smem = std::get<SmemNode>(*operation[0]); |         Id pointer; | ||||||
|         Id address = AsUint(Visit(smem.GetAddress())); |         if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||||||
|         address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); |             pointer = GetSharedMemoryPointer(*smem); | ||||||
|         const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); |         } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||||||
|  |             pointer = GetGlobalMemoryPointer(*gmem); | ||||||
|  |         } else { | ||||||
|  |             UNREACHABLE(); | ||||||
|  |             return {Constant(t_uint, 0), Type::Uint}; | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|         const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); |         const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | ||||||
|         const Id semantics = Constant(t_uint, 0U); |         const Id semantics = Constant(t_uint, 0U); | ||||||
|  | @ -2243,6 +2227,22 @@ private: | ||||||
|         return {}; |         return {}; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     Id GetGlobalMemoryPointer(const GmemNode& gmem) { | ||||||
|  |         const Id real = AsUint(Visit(gmem.GetRealAddress())); | ||||||
|  |         const Id base = AsUint(Visit(gmem.GetBaseAddress())); | ||||||
|  |         const Id diff = OpISub(t_uint, real, base); | ||||||
|  |         const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||||||
|  |         const Id buffer = global_buffers.at(gmem.GetDescriptor()); | ||||||
|  |         return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     Id GetSharedMemoryPointer(const SmemNode& smem) { | ||||||
|  |         ASSERT(stage == ShaderType::Compute); | ||||||
|  |         Id address = AsUint(Visit(smem.GetAddress())); | ||||||
|  |         address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||||||
|  |         return OpAccessChain(t_smem_uint, shared_memory, address); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     static constexpr std::array operation_decompilers = { |     static constexpr std::array operation_decompilers = { | ||||||
|         &SPIRVDecompiler::Assign, |         &SPIRVDecompiler::Assign, | ||||||
| 
 | 
 | ||||||
|  | @ -2389,7 +2389,7 @@ private: | ||||||
|         &SPIRVDecompiler::AtomicImageXor, |         &SPIRVDecompiler::AtomicImageXor, | ||||||
|         &SPIRVDecompiler::AtomicImageExchange, |         &SPIRVDecompiler::AtomicImageExchange, | ||||||
| 
 | 
 | ||||||
|         &SPIRVDecompiler::UAtomicAdd, |         &SPIRVDecompiler::AtomicAdd, | ||||||
| 
 | 
 | ||||||
|         &SPIRVDecompiler::Branch, |         &SPIRVDecompiler::Branch, | ||||||
|         &SPIRVDecompiler::BranchIndirect, |         &SPIRVDecompiler::BranchIndirect, | ||||||
|  | @ -2485,9 +2485,9 @@ private: | ||||||
| 
 | 
 | ||||||
|     Id t_smem_uint{}; |     Id t_smem_uint{}; | ||||||
| 
 | 
 | ||||||
|     const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); |     const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); | ||||||
|     const Id t_gmem_array = |     const Id t_gmem_array = | ||||||
|         Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); |         Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); | ||||||
|     const Id t_gmem_struct = MemberDecorate( |     const Id t_gmem_struct = MemberDecorate( | ||||||
|         Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); |         Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||||||
|     const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); |     const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); | ||||||
|  |  | ||||||
|  | @ -19,6 +19,8 @@ namespace VideoCommon::Shader { | ||||||
| using Tegra::Shader::AtomicOp; | using Tegra::Shader::AtomicOp; | ||||||
| using Tegra::Shader::AtomicType; | using Tegra::Shader::AtomicType; | ||||||
| using Tegra::Shader::Attribute; | using Tegra::Shader::Attribute; | ||||||
|  | using Tegra::Shader::GlobalAtomicOp; | ||||||
|  | using Tegra::Shader::GlobalAtomicType; | ||||||
| using Tegra::Shader::Instruction; | using Tegra::Shader::Instruction; | ||||||
| using Tegra::Shader::OpCode; | using Tegra::Shader::OpCode; | ||||||
| using Tegra::Shader::Register; | using Tegra::Shader::Register; | ||||||
|  | @ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |     case OpCode::Id::ATOM: { | ||||||
|  |         UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", | ||||||
|  |                              static_cast<int>(instr.atom.operation.Value())); | ||||||
|  |         UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", | ||||||
|  |                              static_cast<int>(instr.atom.type.Value())); | ||||||
|  | 
 | ||||||
|  |         const auto [real_address, base_address, descriptor] = | ||||||
|  |             TrackGlobalMemory(bb, instr, true, true); | ||||||
|  |         if (!real_address || !base_address) { | ||||||
|  |             // Tracking failed, skip atomic.
 | ||||||
|  |             break; | ||||||
|  |         } | ||||||
|  | 
 | ||||||
|  |         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||||||
|  |         Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); | ||||||
|  |         SetRegister(bb, instr.gpr0, std::move(value)); | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|     case OpCode::Id::ATOMS: { |     case OpCode::Id::ATOMS: { | ||||||
|         UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", |         UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", | ||||||
|                              static_cast<int>(instr.atoms.operation.Value())); |                              static_cast<int>(instr.atoms.operation.Value())); | ||||||
|  | @ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | ||||||
|         Node memory = GetSharedMemory(std::move(address)); |         Node memory = GetSharedMemory(std::move(address)); | ||||||
|         Node data = GetRegister(instr.gpr20); |         Node data = GetRegister(instr.gpr20); | ||||||
| 
 | 
 | ||||||
|         Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); |         Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); | ||||||
|         SetRegister(bb, instr.gpr0, std::move(value)); |         SetRegister(bb, instr.gpr0, std::move(value)); | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  | @ -162,7 +162,7 @@ enum class OperationCode { | ||||||
|     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
 |     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
 | ||||||
|     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 |     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 | ||||||
| 
 | 
 | ||||||
|     UAtomicAdd, /// (smem, uint) -> uint
 |     AtomicAdd, /// (memory, {u}int) -> {u}int
 | ||||||
| 
 | 
 | ||||||
|     Branch,         /// (uint branch_target) -> void
 |     Branch,         /// (uint branch_target) -> void
 | ||||||
|     BranchIndirect, /// (uint branch_target) -> void
 |     BranchIndirect, /// (uint branch_target) -> void
 | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei