forked from eden-emu/eden
		
	shader: Split SSY and PBK stack
Hardware testing revealed that SSY and PBK push to a different stack,
allowing code like this:
        SSY label1;
        PBK label2;
        SYNC;
label1: PBK;
label2: EXIT;
			
			
This commit is contained in:
		
							parent
							
								
									51f8ffce6b
								
							
						
					
					
						commit
						e8bd976b4d
					
				
					 4 changed files with 78 additions and 27 deletions
				
			
		|  | @ -143,6 +143,24 @@ u32 GetGenericAttributeIndex(Attribute::Index index) { | |||
|     return static_cast<u32>(index) - static_cast<u32>(Attribute::Index::Attribute_0); | ||||
| } | ||||
| 
 | ||||
| constexpr const char* GetFlowStackPrefix(MetaStackClass stack) { | ||||
|     switch (stack) { | ||||
|     case MetaStackClass::Ssy: | ||||
|         return "ssy"; | ||||
|     case MetaStackClass::Pbk: | ||||
|         return "pbk"; | ||||
|     } | ||||
|     return {}; | ||||
| } | ||||
| 
 | ||||
| std::string FlowStackName(MetaStackClass stack) { | ||||
|     return fmt::format("{}_flow_stack", GetFlowStackPrefix(stack)); | ||||
| } | ||||
| 
 | ||||
| std::string FlowStackTopName(MetaStackClass stack) { | ||||
|     return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | ||||
| } | ||||
| 
 | ||||
| class GLSLDecompiler final { | ||||
| public: | ||||
|     explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | ||||
|  | @ -173,8 +191,10 @@ public: | |||
|         // TODO(Subv): Figure out the actual depth of the flow stack, for now it seems
 | ||||
|         // unlikely that shaders will use 20 nested SSYs and PBKs.
 | ||||
|         constexpr u32 FLOW_STACK_SIZE = 20; | ||||
|         code.AddLine("uint flow_stack[{}];", FLOW_STACK_SIZE); | ||||
|         code.AddLine("uint flow_stack_top = 0u;"); | ||||
|         for (const auto stack : std::array{MetaStackClass::Ssy, MetaStackClass::Pbk}) { | ||||
|             code.AddLine("uint {}[{}];", FlowStackName(stack), FLOW_STACK_SIZE); | ||||
|             code.AddLine("uint {} = 0u;", FlowStackTopName(stack)); | ||||
|         } | ||||
| 
 | ||||
|         code.AddLine("while (true) {{"); | ||||
|         ++code.scope; | ||||
|  | @ -1438,15 +1458,18 @@ private: | |||
|     } | ||||
| 
 | ||||
|     std::string PushFlowStack(Operation operation) { | ||||
|         const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||||
|         const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||||
|         UNIMPLEMENTED_IF(!target); | ||||
| 
 | ||||
|         code.AddLine("flow_stack[flow_stack_top++] = 0x{:x}u;", target->GetValue()); | ||||
|         code.AddLine("{}[{}++] = 0x{:x}u;", FlowStackName(stack), FlowStackTopName(stack), | ||||
|                      target->GetValue()); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     std::string PopFlowStack(Operation operation) { | ||||
|         code.AddLine("jmp_to = flow_stack[--flow_stack_top];"); | ||||
|         const auto stack = std::get<MetaStackClass>(operation.GetMeta()); | ||||
|         code.AddLine("jmp_to = {}[--{}];", FlowStackName(stack), FlowStackTopName(stack)); | ||||
|         code.AddLine("break;"); | ||||
|         return {}; | ||||
|     } | ||||
|  |  | |||
|  | @ -132,20 +132,16 @@ public: | |||
|             branch_labels.push_back(label); | ||||
|         } | ||||
| 
 | ||||
|         // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
 | ||||
|         // that shaders will use 20 nested SSYs and PBKs.
 | ||||
|         constexpr u32 FLOW_STACK_SIZE = 20; | ||||
|         const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); | ||||
|         jmp_to = Emit(OpVariable(TypePointer(spv::StorageClass::Function, t_uint), | ||||
|                                  spv::StorageClass::Function, Constant(t_uint, first_address))); | ||||
|         flow_stack = Emit(OpVariable(TypePointer(spv::StorageClass::Function, flow_stack_type), | ||||
|                                      spv::StorageClass::Function, ConstantNull(flow_stack_type))); | ||||
|         flow_stack_top = | ||||
|             Emit(OpVariable(t_func_uint, spv::StorageClass::Function, Constant(t_uint, 0))); | ||||
|         std::tie(ssy_flow_stack, ssy_flow_stack_top) = CreateFlowStack(); | ||||
|         std::tie(pbk_flow_stack, pbk_flow_stack_top) = CreateFlowStack(); | ||||
| 
 | ||||
|         Name(jmp_to, "jmp_to"); | ||||
|         Name(flow_stack, "flow_stack"); | ||||
|         Name(flow_stack_top, "flow_stack_top"); | ||||
|         Name(ssy_flow_stack, "ssy_flow_stack"); | ||||
|         Name(ssy_flow_stack_top, "ssy_flow_stack_top"); | ||||
|         Name(pbk_flow_stack, "pbk_flow_stack"); | ||||
|         Name(pbk_flow_stack_top, "pbk_flow_stack_top"); | ||||
| 
 | ||||
|         Emit(OpBranch(loop_label)); | ||||
|         Emit(loop_label); | ||||
|  | @ -952,6 +948,7 @@ private: | |||
|         const auto target = std::get_if<ImmediateNode>(&*operation[0]); | ||||
|         ASSERT(target); | ||||
| 
 | ||||
|         const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); | ||||
|         const Id current = Emit(OpLoad(t_uint, flow_stack_top)); | ||||
|         const Id next = Emit(OpIAdd(t_uint, current, Constant(t_uint, 1))); | ||||
|         const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, current)); | ||||
|  | @ -962,6 +959,7 @@ private: | |||
|     } | ||||
| 
 | ||||
|     Id PopFlowStack(Operation operation) { | ||||
|         const auto [flow_stack, flow_stack_top] = GetFlowStack(operation); | ||||
|         const Id current = Emit(OpLoad(t_uint, flow_stack_top)); | ||||
|         const Id previous = Emit(OpISub(t_uint, current, Constant(t_uint, 1))); | ||||
|         const Id access = Emit(OpAccessChain(t_func_uint, flow_stack, previous)); | ||||
|  | @ -1172,6 +1170,31 @@ private: | |||
|         Emit(skip_label); | ||||
|     } | ||||
| 
 | ||||
|     std::tuple<Id, Id> CreateFlowStack() { | ||||
|         // TODO(Rodrigo): Figure out the actual depth of the flow stack, for now it seems unlikely
 | ||||
|         // that shaders will use 20 nested SSYs and PBKs.
 | ||||
|         constexpr u32 FLOW_STACK_SIZE = 20; | ||||
|         constexpr auto storage_class = spv::StorageClass::Function; | ||||
| 
 | ||||
|         const Id flow_stack_type = TypeArray(t_uint, Constant(t_uint, FLOW_STACK_SIZE)); | ||||
|         const Id stack = Emit(OpVariable(TypePointer(storage_class, flow_stack_type), storage_class, | ||||
|                                          ConstantNull(flow_stack_type))); | ||||
|         const Id top = Emit(OpVariable(t_func_uint, storage_class, Constant(t_uint, 0))); | ||||
|         return std::tie(stack, top); | ||||
|     } | ||||
| 
 | ||||
|     std::pair<Id, Id> GetFlowStack(Operation operation) { | ||||
|         const auto stack_class = std::get<MetaStackClass>(operation.GetMeta()); | ||||
|         switch (stack_class) { | ||||
|         case MetaStackClass::Ssy: | ||||
|             return {ssy_flow_stack, ssy_flow_stack_top}; | ||||
|         case MetaStackClass::Pbk: | ||||
|             return {pbk_flow_stack, pbk_flow_stack_top}; | ||||
|         } | ||||
|         UNREACHABLE(); | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     static constexpr OperationDecompilersArray operation_decompilers = { | ||||
|         &SPIRVDecompiler::Assign, | ||||
| 
 | ||||
|  | @ -1414,8 +1437,10 @@ private: | |||
| 
 | ||||
|     Id execute_function{}; | ||||
|     Id jmp_to{}; | ||||
|     Id flow_stack_top{}; | ||||
|     Id flow_stack{}; | ||||
|     Id ssy_flow_stack_top{}; | ||||
|     Id pbk_flow_stack_top{}; | ||||
|     Id ssy_flow_stack{}; | ||||
|     Id pbk_flow_stack{}; | ||||
|     Id continue_label{}; | ||||
|     std::map<u32, Id> labels; | ||||
| }; | ||||
|  |  | |||
|  | @ -109,22 +109,20 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||||
|                              "Constant buffer flow is not supported"); | ||||
| 
 | ||||
|         // The SSY opcode tells the GPU where to re-converge divergent execution paths, it sets the
 | ||||
|         // target of the jump that the SYNC instruction will make. The SSY opcode has a similar
 | ||||
|         // structure to the BRA opcode.
 | ||||
|         // The SSY opcode tells the GPU where to re-converge divergent execution paths with SYNC.
 | ||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); | ||||
|         bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | ||||
|         bb.push_back( | ||||
|             Operation(OperationCode::PushFlowStack, MetaStackClass::Ssy, Immediate(target))); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::PBK: { | ||||
|         UNIMPLEMENTED_IF_MSG(instr.bra.constant_buffer != 0, | ||||
|                              "Constant buffer PBK is not supported"); | ||||
| 
 | ||||
|         // PBK pushes to a stack the address where BRK will jump to. This shares stack with SSY but
 | ||||
|         // using SYNC on a PBK address will kill the shader execution. We don't emulate this because
 | ||||
|         // it's very unlikely a driver will emit such invalid shader.
 | ||||
|         // PBK pushes to a stack the address where BRK will jump to.
 | ||||
|         const u32 target = pc + instr.bra.GetBranchTarget(); | ||||
|         bb.push_back(Operation(OperationCode::PushFlowStack, Immediate(target))); | ||||
|         bb.push_back( | ||||
|             Operation(OperationCode::PushFlowStack, MetaStackClass::Pbk, Immediate(target))); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::SYNC: { | ||||
|  | @ -133,7 +131,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
|                              static_cast<u32>(cc)); | ||||
| 
 | ||||
|         // The SYNC opcode jumps to the address previously set by the SSY opcode
 | ||||
|         bb.push_back(Operation(OperationCode::PopFlowStack)); | ||||
|         bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Ssy)); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::BRK: { | ||||
|  | @ -142,7 +140,7 @@ u32 ShaderIR::DecodeOther(NodeBlock& bb, u32 pc) { | |||
|                              static_cast<u32>(cc)); | ||||
| 
 | ||||
|         // The BRK opcode jumps to the address previously set by the PBK opcode
 | ||||
|         bb.push_back(Operation(OperationCode::PopFlowStack)); | ||||
|         bb.push_back(Operation(OperationCode::PopFlowStack, MetaStackClass::Pbk)); | ||||
|         break; | ||||
|     } | ||||
|     case OpCode::Id::IPA: { | ||||
|  |  | |||
|  | @ -174,6 +174,11 @@ enum class InternalFlag { | |||
|     Amount = 4, | ||||
| }; | ||||
| 
 | ||||
| enum class MetaStackClass { | ||||
|     Ssy, | ||||
|     Pbk, | ||||
| }; | ||||
| 
 | ||||
| class OperationNode; | ||||
| class ConditionalNode; | ||||
| class GprNode; | ||||
|  | @ -285,7 +290,7 @@ struct MetaTexture { | |||
| }; | ||||
| 
 | ||||
| /// Parameters that modify an operation but are not part of any particular operand
 | ||||
| using Meta = std::variant<MetaArithmetic, MetaTexture, Tegra::Shader::HalfType>; | ||||
| using Meta = std::variant<MetaArithmetic, MetaTexture, MetaStackClass, Tegra::Shader::HalfType>; | ||||
| 
 | ||||
| /// Holds any kind of operation that can be done in the IR
 | ||||
| class OperationNode final { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ReinUsesLisp
						ReinUsesLisp