forked from eden-emu/eden
		
	Merge pull request #6900 from ameerj/attr-reorder
structured_control_flow: Add DemoteCombinationPass
This commit is contained in:
		
						commit
						59cf0848c7
					
				
					 7 changed files with 140 additions and 10 deletions
				
			
		|  | @ -20,6 +20,7 @@ | |||
| #include "shader_recompiler/frontend/maxwell/decode.h" | ||||
| #include "shader_recompiler/frontend/maxwell/structured_control_flow.h" | ||||
| #include "shader_recompiler/frontend/maxwell/translate/translate.h" | ||||
| #include "shader_recompiler/host_translate_info.h" | ||||
| #include "shader_recompiler/object_pool.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
|  | @ -652,7 +653,7 @@ class TranslatePass { | |||
| public: | ||||
|     TranslatePass(ObjectPool<IR::Inst>& inst_pool_, ObjectPool<IR::Block>& block_pool_, | ||||
|                   ObjectPool<Statement>& stmt_pool_, Environment& env_, Statement& root_stmt, | ||||
|                   IR::AbstractSyntaxList& syntax_list_) | ||||
|                   IR::AbstractSyntaxList& syntax_list_, const HostTranslateInfo& host_info) | ||||
|         : stmt_pool{stmt_pool_}, inst_pool{inst_pool_}, block_pool{block_pool_}, env{env_}, | ||||
|           syntax_list{syntax_list_} { | ||||
|         Visit(root_stmt, nullptr, nullptr); | ||||
|  | @ -660,6 +661,9 @@ public: | |||
|         IR::Block& first_block{*syntax_list.front().data.block}; | ||||
|         IR::IREmitter ir(first_block, first_block.begin()); | ||||
|         ir.Prologue(); | ||||
|         if (uses_demote_to_helper && host_info.needs_demote_reorder) { | ||||
|             DemoteCombinationPass(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|  | @ -809,7 +813,14 @@ private: | |||
|             } | ||||
|             case StatementType::Return: { | ||||
|                 ensure_block(); | ||||
|                 IR::IREmitter{*current_block}.Epilogue(); | ||||
|                 IR::Block* return_block{block_pool.Create(inst_pool)}; | ||||
|                 IR::IREmitter{*return_block}.Epilogue(); | ||||
|                 current_block->AddBranch(return_block); | ||||
| 
 | ||||
|                 auto& merge{syntax_list.emplace_back()}; | ||||
|                 merge.type = IR::AbstractSyntaxNode::Type::Block; | ||||
|                 merge.data.block = return_block; | ||||
| 
 | ||||
|                 current_block = nullptr; | ||||
|                 syntax_list.emplace_back().type = IR::AbstractSyntaxNode::Type::Return; | ||||
|                 break; | ||||
|  | @ -824,6 +835,7 @@ private: | |||
|                 auto& merge{syntax_list.emplace_back()}; | ||||
|                 merge.type = IR::AbstractSyntaxNode::Type::Block; | ||||
|                 merge.data.block = demote_block; | ||||
|                 uses_demote_to_helper = true; | ||||
|                 break; | ||||
|             } | ||||
|             case StatementType::Unreachable: { | ||||
|  | @ -855,11 +867,117 @@ private: | |||
|         return block_pool.Create(inst_pool); | ||||
|     } | ||||
| 
 | ||||
|     void DemoteCombinationPass() { | ||||
|         using Type = IR::AbstractSyntaxNode::Type; | ||||
|         std::vector<IR::Block*> demote_blocks; | ||||
|         std::vector<IR::U1> demote_conds; | ||||
|         u32 num_epilogues{}; | ||||
|         u32 branch_depth{}; | ||||
|         for (const IR::AbstractSyntaxNode& node : syntax_list) { | ||||
|             if (node.type == Type::If) { | ||||
|                 ++branch_depth; | ||||
|             } | ||||
|             if (node.type == Type::EndIf) { | ||||
|                 --branch_depth; | ||||
|             } | ||||
|             if (node.type != Type::Block) { | ||||
|                 continue; | ||||
|             } | ||||
|             if (branch_depth > 1) { | ||||
|                 // Skip reordering nested demote branches.
 | ||||
|                 continue; | ||||
|             } | ||||
|             for (const IR::Inst& inst : node.data.block->Instructions()) { | ||||
|                 const IR::Opcode op{inst.GetOpcode()}; | ||||
|                 if (op == IR::Opcode::DemoteToHelperInvocation) { | ||||
|                     demote_blocks.push_back(node.data.block); | ||||
|                     break; | ||||
|                 } | ||||
|                 if (op == IR::Opcode::Epilogue) { | ||||
|                     ++num_epilogues; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         if (demote_blocks.size() == 0) { | ||||
|             return; | ||||
|         } | ||||
|         if (num_epilogues > 1) { | ||||
|             LOG_DEBUG(Shader, "Combining demotes with more than one return is not implemented."); | ||||
|             return; | ||||
|         } | ||||
|         s64 last_iterator_offset{}; | ||||
|         auto& asl{syntax_list}; | ||||
|         for (const IR::Block* demote_block : demote_blocks) { | ||||
|             const auto start_it{asl.begin() + last_iterator_offset}; | ||||
|             auto asl_it{std::find_if(start_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||||
|                 return asn.type == Type::If && asn.data.if_node.body == demote_block; | ||||
|             })}; | ||||
|             if (asl_it == asl.end()) { | ||||
|                 // Demote without a conditional branch.
 | ||||
|                 // No need to proceed since all fragment instances will be demoted regardless.
 | ||||
|                 return; | ||||
|             } | ||||
|             const IR::Block* const end_if = asl_it->data.if_node.merge; | ||||
|             demote_conds.push_back(asl_it->data.if_node.cond); | ||||
|             last_iterator_offset = std::distance(asl.begin(), asl_it); | ||||
| 
 | ||||
|             asl_it = asl.erase(asl_it); | ||||
|             asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||||
|                 return asn.type == Type::Block && asn.data.block == demote_block; | ||||
|             }); | ||||
| 
 | ||||
|             asl_it = asl.erase(asl_it); | ||||
|             asl_it = std::find_if(asl_it, asl.end(), [&](const IR::AbstractSyntaxNode& asn) { | ||||
|                 return asn.type == Type::EndIf && asn.data.end_if.merge == end_if; | ||||
|             }); | ||||
|             asl_it = asl.erase(asl_it); | ||||
|         } | ||||
|         const auto epilogue_func{[](const IR::AbstractSyntaxNode& asn) { | ||||
|             if (asn.type != Type::Block) { | ||||
|                 return false; | ||||
|             } | ||||
|             for (const auto& inst : asn.data.block->Instructions()) { | ||||
|                 if (inst.GetOpcode() == IR::Opcode::Epilogue) { | ||||
|                     return true; | ||||
|                 } | ||||
|             } | ||||
|             return false; | ||||
|         }}; | ||||
|         const auto reverse_it{std::find_if(asl.rbegin(), asl.rend(), epilogue_func)}; | ||||
|         const auto return_block_it{(reverse_it + 1).base()}; | ||||
| 
 | ||||
|         IR::IREmitter ir{*(return_block_it - 1)->data.block}; | ||||
|         IR::U1 cond(IR::Value(false)); | ||||
|         for (const auto& demote_cond : demote_conds) { | ||||
|             cond = ir.LogicalOr(cond, demote_cond); | ||||
|         } | ||||
|         cond.Inst()->DestructiveAddUsage(1); | ||||
| 
 | ||||
|         IR::AbstractSyntaxNode demote_if_node{}; | ||||
|         demote_if_node.type = Type::If; | ||||
|         demote_if_node.data.if_node.cond = cond; | ||||
|         demote_if_node.data.if_node.body = demote_blocks[0]; | ||||
|         demote_if_node.data.if_node.merge = return_block_it->data.block; | ||||
| 
 | ||||
|         IR::AbstractSyntaxNode demote_node{}; | ||||
|         demote_node.type = Type::Block; | ||||
|         demote_node.data.block = demote_blocks[0]; | ||||
| 
 | ||||
|         IR::AbstractSyntaxNode demote_endif_node{}; | ||||
|         demote_endif_node.type = Type::EndIf; | ||||
|         demote_endif_node.data.end_if.merge = return_block_it->data.block; | ||||
| 
 | ||||
|         asl.insert(return_block_it, demote_endif_node); | ||||
|         asl.insert(return_block_it, demote_node); | ||||
|         asl.insert(return_block_it, demote_if_node); | ||||
|     } | ||||
| 
 | ||||
|     ObjectPool<Statement>& stmt_pool; | ||||
|     ObjectPool<IR::Inst>& inst_pool; | ||||
|     ObjectPool<IR::Block>& block_pool; | ||||
|     Environment& env; | ||||
|     IR::AbstractSyntaxList& syntax_list; | ||||
|     bool uses_demote_to_helper{}; | ||||
| 
 | ||||
| // TODO: C++20 Remove this when all compilers support constexpr std::vector
 | ||||
| #if __cpp_lib_constexpr_vector >= 201907 | ||||
|  | @ -871,12 +989,13 @@ private: | |||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||||
|                                 Environment& env, Flow::CFG& cfg) { | ||||
|                                 Environment& env, Flow::CFG& cfg, | ||||
|                                 const HostTranslateInfo& host_info) { | ||||
|     ObjectPool<Statement> stmt_pool{64}; | ||||
|     GotoPass goto_pass{cfg, stmt_pool}; | ||||
|     Statement& root{goto_pass.RootStatement()}; | ||||
|     IR::AbstractSyntaxList syntax_list; | ||||
|     TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list}; | ||||
|     TranslatePass{inst_pool, block_pool, stmt_pool, env, root, syntax_list, host_info}; | ||||
|     return syntax_list; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -11,10 +11,13 @@ | |||
| #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||||
| #include "shader_recompiler/object_pool.h" | ||||
| 
 | ||||
| namespace Shader::Maxwell { | ||||
| namespace Shader { | ||||
| struct HostTranslateInfo; | ||||
| namespace Maxwell { | ||||
| 
 | ||||
| [[nodiscard]] IR::AbstractSyntaxList BuildASL(ObjectPool<IR::Inst>& inst_pool, | ||||
|                                               ObjectPool<IR::Block>& block_pool, Environment& env, | ||||
|                                               Flow::CFG& cfg); | ||||
|                                               Flow::CFG& cfg, const HostTranslateInfo& host_info); | ||||
| 
 | ||||
| } // namespace Shader::Maxwell
 | ||||
| } // namespace Maxwell
 | ||||
| } // namespace Shader
 | ||||
|  |  | |||
|  | @ -130,7 +130,7 @@ void AddNVNStorageBuffers(IR::Program& program) { | |||
| IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool, | ||||
|                              Environment& env, Flow::CFG& cfg, const HostTranslateInfo& host_info) { | ||||
|     IR::Program program; | ||||
|     program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg); | ||||
|     program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg, host_info); | ||||
|     program.blocks = GenerateBlocks(program.syntax_list); | ||||
|     program.post_order_blocks = PostOrder(program.syntax_list.front()); | ||||
|     program.stage = env.ShaderStage(); | ||||
|  |  | |||
|  | @ -13,6 +13,7 @@ namespace Shader { | |||
| struct HostTranslateInfo { | ||||
|     bool support_float16{};      ///< True when the device supports 16-bit floats
 | ||||
|     bool support_int64{};        ///< True when the device supports 64-bit integers
 | ||||
|     bool needs_demote_reorder{}; ///< True when the device needs DemoteToHelperInvocation reordered
 | ||||
| }; | ||||
| 
 | ||||
| } // namespace Shader
 | ||||
|  |  | |||
|  | @ -156,6 +156,10 @@ public: | |||
|         return shader_backend; | ||||
|     } | ||||
| 
 | ||||
|     bool IsAmd() const { | ||||
|         return vendor_name == "ATI Technologies Inc."; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     static bool TestVariableAoffi(); | ||||
|     static bool TestPreciseBug(); | ||||
|  |  | |||
|  | @ -219,6 +219,7 @@ ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindo | |||
|       host_info{ | ||||
|           .support_float16 = false, | ||||
|           .support_int64 = device.HasShaderInt64(), | ||||
|           .needs_demote_reorder = device.IsAmd(), | ||||
|       } { | ||||
|     if (use_asynchronous_shaders) { | ||||
|         workers = CreateWorkers(); | ||||
|  |  | |||
|  | @ -325,6 +325,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::Engines::Maxw | |||
|     host_info = Shader::HostTranslateInfo{ | ||||
|         .support_float16 = device.IsFloat16Supported(), | ||||
|         .support_int64 = device.IsShaderInt64Supported(), | ||||
|         .needs_demote_reorder = driver_id == VK_DRIVER_ID_AMD_PROPRIETARY_KHR || | ||||
|                                 driver_id == VK_DRIVER_ID_AMD_OPEN_SOURCE_KHR, | ||||
|     }; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei