forked from eden-emu/eden
		
	Merge pull request #3350 from ReinUsesLisp/atom
shader/memory: Implement ATOM.ADD
This commit is contained in:
		
						commit
						2db7adc42a
					
				
					 5 changed files with 86 additions and 39 deletions
				
			
		|  | @ -227,6 +227,28 @@ enum class AtomicOp : u64 { | |||
|     Exch = 8, | ||||
| }; | ||||
| 
 | ||||
| enum class GlobalAtomicOp : u64 { | ||||
|     Add = 0, | ||||
|     Min = 1, | ||||
|     Max = 2, | ||||
|     Inc = 3, | ||||
|     Dec = 4, | ||||
|     And = 5, | ||||
|     Or = 6, | ||||
|     Xor = 7, | ||||
|     Exch = 8, | ||||
|     SafeAdd = 10, | ||||
| }; | ||||
| 
 | ||||
| enum class GlobalAtomicType : u64 { | ||||
|     U32 = 0, | ||||
|     S32 = 1, | ||||
|     U64 = 2, | ||||
|     F32_FTZ_RN = 3, | ||||
|     F16x2_FTZ_RN = 4, | ||||
|     S64 = 5, | ||||
| }; | ||||
| 
 | ||||
| enum class UniformType : u64 { | ||||
|     UnsignedByte = 0, | ||||
|     SignedByte = 1, | ||||
|  | @ -957,6 +979,12 @@ union Instruction { | |||
|         BitField<46, 2, u64> cache_mode; | ||||
|     } stg; | ||||
| 
 | ||||
|     union { | ||||
|         BitField<52, 4, GlobalAtomicOp> operation; | ||||
|         BitField<49, 3, GlobalAtomicType> type; | ||||
|         BitField<28, 20, s64> offset; | ||||
|     } atom; | ||||
| 
 | ||||
|     union { | ||||
|         BitField<52, 4, AtomicOp> operation; | ||||
|         BitField<28, 2, AtomicType> type; | ||||
|  | @ -1690,6 +1718,7 @@ public: | |||
|         ST_S, | ||||
|         ST,    // Store in generic memory
 | ||||
|         STG,   // Store in global memory
 | ||||
|         ATOM,  // Atomic operation on global memory
 | ||||
|         ATOMS, // Atomic operation on shared memory
 | ||||
|         AL2P,  // Transforms attribute memory into physical memory
 | ||||
|         TEX, | ||||
|  | @ -1994,6 +2023,7 @@ private: | |||
|             INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), | ||||
|             INST("101-------------", Id::ST, Type::Memory, "ST"), | ||||
|             INST("1110111011011---", Id::STG, Type::Memory, "STG"), | ||||
|             INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), | ||||
|             INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), | ||||
|             INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), | ||||
|             INST("110000----111---", Id::TEX, Type::Texture, "TEX"), | ||||
|  |  | |||
|  | @ -1857,10 +1857,7 @@ private: | |||
| 
 | ||||
|     template <const std::string_view& opname, Type type> | ||||
|     Expression Atomic(Operation operation) { | ||||
|         ASSERT(stage == ShaderType::Compute); | ||||
|         auto& smem = std::get<SmemNode>(*operation[0]); | ||||
| 
 | ||||
|         return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(), | ||||
|         return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), | ||||
|                             Visit(operation[1]).As(type)), | ||||
|                 type}; | ||||
|     } | ||||
|  |  | |||
|  | @ -1123,15 +1123,7 @@ private: | |||
|         } | ||||
| 
 | ||||
|         if (const auto gmem = std::get_if<GmemNode>(&*node)) { | ||||
|             const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); | ||||
|             const Id real = AsUint(Visit(gmem->GetRealAddress())); | ||||
|             const Id base = AsUint(Visit(gmem->GetBaseAddress())); | ||||
| 
 | ||||
|             Id offset = OpISub(t_uint, real, base); | ||||
|             offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U)); | ||||
|             return {OpLoad(t_float, | ||||
|                            OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)), | ||||
|                     Type::Float}; | ||||
|             return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint}; | ||||
|         } | ||||
| 
 | ||||
|         if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||||
|  | @ -1142,10 +1134,7 @@ private: | |||
|         } | ||||
| 
 | ||||
|         if (const auto smem = std::get_if<SmemNode>(&*node)) { | ||||
|             Id address = AsUint(Visit(smem->GetAddress())); | ||||
|             address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||||
|             const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); | ||||
|             return {OpLoad(t_uint, pointer), Type::Uint}; | ||||
|             return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint}; | ||||
|         } | ||||
| 
 | ||||
|         if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { | ||||
|  | @ -1339,20 +1328,10 @@ private: | |||
|             target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; | ||||
| 
 | ||||
|         } else if (const auto smem = std::get_if<SmemNode>(&*dest)) { | ||||
|             ASSERT(stage == ShaderType::Compute); | ||||
|             Id address = AsUint(Visit(smem->GetAddress())); | ||||
|             address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||||
|             target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint}; | ||||
|             target = {GetSharedMemoryPointer(*smem), Type::Uint}; | ||||
| 
 | ||||
|         } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||||
|             const Id real = AsUint(Visit(gmem->GetRealAddress())); | ||||
|             const Id base = AsUint(Visit(gmem->GetBaseAddress())); | ||||
|             const Id diff = OpISub(t_uint, real, base); | ||||
|             const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||||
| 
 | ||||
|             const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); | ||||
|             target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset), | ||||
|                       Type::Float}; | ||||
|             target = {GetGlobalMemoryPointer(*gmem), Type::Uint}; | ||||
| 
 | ||||
|         } else { | ||||
|             UNIMPLEMENTED(); | ||||
|  | @ -1804,11 +1783,16 @@ private: | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Expression UAtomicAdd(Operation operation) { | ||||
|         const auto& smem = std::get<SmemNode>(*operation[0]); | ||||
|         Id address = AsUint(Visit(smem.GetAddress())); | ||||
|         address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||||
|         const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); | ||||
|     Expression AtomicAdd(Operation operation) { | ||||
|         Id pointer; | ||||
|         if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { | ||||
|             pointer = GetSharedMemoryPointer(*smem); | ||||
|         } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) { | ||||
|             pointer = GetGlobalMemoryPointer(*gmem); | ||||
|         } else { | ||||
|             UNREACHABLE(); | ||||
|             return {Constant(t_uint, 0), Type::Uint}; | ||||
|         } | ||||
| 
 | ||||
|         const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); | ||||
|         const Id semantics = Constant(t_uint, 0U); | ||||
|  | @ -2243,6 +2227,22 @@ private: | |||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     Id GetGlobalMemoryPointer(const GmemNode& gmem) { | ||||
|         const Id real = AsUint(Visit(gmem.GetRealAddress())); | ||||
|         const Id base = AsUint(Visit(gmem.GetBaseAddress())); | ||||
|         const Id diff = OpISub(t_uint, real, base); | ||||
|         const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2)); | ||||
|         const Id buffer = global_buffers.at(gmem.GetDescriptor()); | ||||
|         return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset); | ||||
|     } | ||||
| 
 | ||||
|     Id GetSharedMemoryPointer(const SmemNode& smem) { | ||||
|         ASSERT(stage == ShaderType::Compute); | ||||
|         Id address = AsUint(Visit(smem.GetAddress())); | ||||
|         address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); | ||||
|         return OpAccessChain(t_smem_uint, shared_memory, address); | ||||
|     } | ||||
| 
 | ||||
|     static constexpr std::array operation_decompilers = { | ||||
|         &SPIRVDecompiler::Assign, | ||||
| 
 | ||||
|  | @ -2389,7 +2389,7 @@ private: | |||
|         &SPIRVDecompiler::AtomicImageXor, | ||||
|         &SPIRVDecompiler::AtomicImageExchange, | ||||
| 
 | ||||
|         &SPIRVDecompiler::UAtomicAdd, | ||||
|         &SPIRVDecompiler::AtomicAdd, | ||||
| 
 | ||||
|         &SPIRVDecompiler::Branch, | ||||
|         &SPIRVDecompiler::BranchIndirect, | ||||
|  | @ -2485,9 +2485,9 @@ private: | |||
| 
 | ||||
|     Id t_smem_uint{}; | ||||
| 
 | ||||
|     const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); | ||||
|     const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint); | ||||
|     const Id t_gmem_array = | ||||
|         Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); | ||||
|         Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray"); | ||||
|     const Id t_gmem_struct = MemberDecorate( | ||||
|         Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); | ||||
|     const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); | ||||
|  |  | |||
|  | @ -19,6 +19,8 @@ namespace VideoCommon::Shader { | |||
| using Tegra::Shader::AtomicOp; | ||||
| using Tegra::Shader::AtomicType; | ||||
| using Tegra::Shader::Attribute; | ||||
| using Tegra::Shader::GlobalAtomicOp; | ||||
| using Tegra::Shader::GlobalAtomicType; | ||||
| using Tegra::Shader::Instruction; | ||||
| using Tegra::Shader::OpCode; | ||||
| using Tegra::Shader::Register; | ||||
|  | @ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
|         } | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::ATOM: { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}", | ||||
|                              static_cast<int>(instr.atom.operation.Value())); | ||||
|         UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}", | ||||
|                              static_cast<int>(instr.atom.type.Value())); | ||||
| 
 | ||||
|         const auto [real_address, base_address, descriptor] = | ||||
|             TrackGlobalMemory(bb, instr, true, true); | ||||
|         if (!real_address || !base_address) { | ||||
|             // Tracking failed, skip atomic.
 | ||||
|             break; | ||||
|         } | ||||
| 
 | ||||
|         Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); | ||||
|         Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20)); | ||||
|         SetRegister(bb, instr.gpr0, std::move(value)); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::ATOMS: { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", | ||||
|                              static_cast<int>(instr.atoms.operation.Value())); | ||||
|  | @ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { | |||
|         Node memory = GetSharedMemory(std::move(address)); | ||||
|         Node data = GetRegister(instr.gpr20); | ||||
| 
 | ||||
|         Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); | ||||
|         Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data)); | ||||
|         SetRegister(bb, instr.gpr0, std::move(value)); | ||||
|         break; | ||||
|     } | ||||
|  |  | |||
|  | @ -162,7 +162,7 @@ enum class OperationCode { | |||
|     AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
 | ||||
|     AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 | ||||
| 
 | ||||
|     UAtomicAdd, /// (smem, uint) -> uint
 | ||||
|     AtomicAdd, /// (memory, {u}int) -> {u}int
 | ||||
| 
 | ||||
|     Branch,         /// (uint branch_target) -> void
 | ||||
|     BranchIndirect, /// (uint branch_target) -> void
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei