forked from eden-emu/eden
		
	 d54d7de40e
			
		
	
	
		d54d7de40e
		
	
	
	
	
		
			
			This commit regresses VertexA shaders, their transformation pass has to be adapted to the new control flow.
		
			
				
	
	
		
			193 lines
		
	
	
	
		
			7.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
	
		
			7.2 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Copyright 2021 yuzu Emulator Project
 | |
| // Licensed under GPLv2 or any later version
 | |
| // Refer to the license.txt file included.
 | |
| 
 | |
| #include <algorithm>
 | |
| #include <memory>
 | |
| #include <ranges>
 | |
| #include <vector>
 | |
| 
 | |
| #include "shader_recompiler/frontend/ir/basic_block.h"
 | |
| #include "shader_recompiler/frontend/ir/post_order.h"
 | |
| #include "shader_recompiler/frontend/maxwell/program.h"
 | |
| #include "shader_recompiler/frontend/maxwell/structured_control_flow.h"
 | |
| #include "shader_recompiler/frontend/maxwell/translate/translate.h"
 | |
| #include "shader_recompiler/ir_opt/passes.h"
 | |
| 
 | |
| namespace Shader::Maxwell {
 | |
| namespace {
 | |
| IR::BlockList GenerateBlocks(const IR::AbstractSyntaxList& syntax_list) {
 | |
|     auto syntax_blocks{syntax_list | std::views::filter([](const auto& node) {
 | |
|                            return node.type == IR::AbstractSyntaxNode::Type::Block;
 | |
|                        })};
 | |
|     IR::BlockList blocks(std::ranges::distance(syntax_blocks));
 | |
|     std::ranges::transform(syntax_blocks, blocks.begin(),
 | |
|                            [](const IR::AbstractSyntaxNode& node) { return node.block; });
 | |
|     return blocks;
 | |
| }
 | |
| 
 | |
| void RemoveUnreachableBlocks(IR::Program& program) {
 | |
|     // Some blocks might be unreachable if a function call exists unconditionally
 | |
|     // If this happens the number of blocks and post order blocks will mismatch
 | |
|     if (program.blocks.size() == program.post_order_blocks.size()) {
 | |
|         return;
 | |
|     }
 | |
|     const auto begin{program.blocks.begin() + 1};
 | |
|     const auto end{program.blocks.end()};
 | |
|     const auto pred{[](IR::Block* block) { return block->ImmPredecessors().empty(); }};
 | |
|     program.blocks.erase(std::remove_if(begin, end, pred), end);
 | |
| }
 | |
| 
 | |
| void CollectInterpolationInfo(Environment& env, IR::Program& program) {
 | |
|     if (program.stage != Stage::Fragment) {
 | |
|         return;
 | |
|     }
 | |
|     const ProgramHeader& sph{env.SPH()};
 | |
|     for (size_t index = 0; index < program.info.input_generics.size(); ++index) {
 | |
|         std::optional<PixelImap> imap;
 | |
|         for (const PixelImap value : sph.ps.GenericInputMap(static_cast<u32>(index))) {
 | |
|             if (value == PixelImap::Unused) {
 | |
|                 continue;
 | |
|             }
 | |
|             if (imap && imap != value) {
 | |
|                 throw NotImplementedException("Per component interpolation");
 | |
|             }
 | |
|             imap = value;
 | |
|         }
 | |
|         if (!imap) {
 | |
|             continue;
 | |
|         }
 | |
|         program.info.input_generics[index].interpolation = [&] {
 | |
|             switch (*imap) {
 | |
|             case PixelImap::Unused:
 | |
|             case PixelImap::Perspective:
 | |
|                 return Interpolation::Smooth;
 | |
|             case PixelImap::Constant:
 | |
|                 return Interpolation::Flat;
 | |
|             case PixelImap::ScreenLinear:
 | |
|                 return Interpolation::NoPerspective;
 | |
|             }
 | |
|             throw NotImplementedException("Unknown interpolation {}", *imap);
 | |
|         }();
 | |
|     }
 | |
| }
 | |
| 
 | |
| void AddNVNStorageBuffers(IR::Program& program) {
 | |
|     if (!program.info.uses_global_memory) {
 | |
|         return;
 | |
|     }
 | |
|     const u32 driver_cbuf{0};
 | |
|     const u32 descriptor_size{0x10};
 | |
|     const u32 num_buffers{16};
 | |
|     const u32 base{[&] {
 | |
|         switch (program.stage) {
 | |
|         case Stage::VertexA:
 | |
|         case Stage::VertexB:
 | |
|             return 0x110u;
 | |
|         case Stage::TessellationControl:
 | |
|             return 0x210u;
 | |
|         case Stage::TessellationEval:
 | |
|             return 0x310u;
 | |
|         case Stage::Geometry:
 | |
|             return 0x410u;
 | |
|         case Stage::Fragment:
 | |
|             return 0x510u;
 | |
|         case Stage::Compute:
 | |
|             return 0x310u;
 | |
|         }
 | |
|         throw InvalidArgument("Invalid stage {}", program.stage);
 | |
|     }()};
 | |
|     auto& descs{program.info.storage_buffers_descriptors};
 | |
|     for (u32 index = 0; index < num_buffers; ++index) {
 | |
|         if (!program.info.nvn_buffer_used[index]) {
 | |
|             continue;
 | |
|         }
 | |
|         const u32 offset{base + index * descriptor_size};
 | |
|         const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
 | |
|         if (it != descs.end()) {
 | |
|             it->is_written |= program.info.stores_global_memory;
 | |
|             continue;
 | |
|         }
 | |
|         descs.push_back({
 | |
|             .cbuf_index = driver_cbuf,
 | |
|             .cbuf_offset = offset,
 | |
|             .count = 1,
 | |
|             .is_written = program.info.stores_global_memory,
 | |
|         });
 | |
|     }
 | |
| }
 | |
| } // Anonymous namespace
 | |
| 
 | |
| IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Block>& block_pool,
 | |
|                              Environment& env, Flow::CFG& cfg) {
 | |
|     IR::Program program;
 | |
|     program.syntax_list = BuildASL(inst_pool, block_pool, env, cfg);
 | |
|     program.blocks = GenerateBlocks(program.syntax_list);
 | |
|     program.post_order_blocks = PostOrder(program.syntax_list.front());
 | |
|     program.stage = env.ShaderStage();
 | |
|     program.local_memory_size = env.LocalMemorySize();
 | |
|     switch (program.stage) {
 | |
|     case Stage::TessellationControl: {
 | |
|         const ProgramHeader& sph{env.SPH()};
 | |
|         program.invocations = sph.common2.threads_per_input_primitive;
 | |
|         break;
 | |
|     }
 | |
|     case Stage::Geometry: {
 | |
|         const ProgramHeader& sph{env.SPH()};
 | |
|         program.output_topology = sph.common3.output_topology;
 | |
|         program.output_vertices = sph.common4.max_output_vertices;
 | |
|         program.invocations = sph.common2.threads_per_input_primitive;
 | |
|         break;
 | |
|     }
 | |
|     case Stage::Compute:
 | |
|         program.workgroup_size = env.WorkgroupSize();
 | |
|         program.shared_memory_size = env.SharedMemorySize();
 | |
|         break;
 | |
|     default:
 | |
|         break;
 | |
|     }
 | |
|     RemoveUnreachableBlocks(program);
 | |
| 
 | |
|     // Replace instructions before the SSA rewrite
 | |
|     Optimization::LowerFp16ToFp32(program);
 | |
| 
 | |
|     Optimization::SsaRewritePass(program);
 | |
| 
 | |
|     Optimization::GlobalMemoryToStorageBufferPass(program);
 | |
|     Optimization::TexturePass(env, program);
 | |
| 
 | |
|     Optimization::ConstantPropagationPass(program);
 | |
|     Optimization::DeadCodeEliminationPass(program);
 | |
|     Optimization::VerificationPass(program);
 | |
|     Optimization::CollectShaderInfoPass(env, program);
 | |
|     CollectInterpolationInfo(env, program);
 | |
|     AddNVNStorageBuffers(program);
 | |
|     return program;
 | |
| }
 | |
| 
 | |
| IR::Program MergeDualVertexPrograms(IR::Program& vertex_a, IR::Program& vertex_b,
 | |
|                                     Environment& env_vertex_b) {
 | |
|     IR::Program result{};
 | |
|     Optimization::VertexATransformPass(vertex_a);
 | |
|     Optimization::VertexBTransformPass(vertex_b);
 | |
|     std::swap(result.blocks, vertex_a.blocks);
 | |
|     result.blocks.insert(result.blocks.end(), vertex_b.blocks.begin(), vertex_b.blocks.end());
 | |
|     result.stage = Stage::VertexB;
 | |
|     result.info = vertex_a.info;
 | |
|     result.local_memory_size = std::max(vertex_a.local_memory_size, vertex_b.local_memory_size);
 | |
| 
 | |
|     for (size_t index = 0; index < 32; ++index) {
 | |
|         result.info.input_generics[index].used |= vertex_b.info.input_generics[index].used;
 | |
|         result.info.stores_generics[index] |= vertex_b.info.stores_generics[index];
 | |
|     }
 | |
|     Optimization::JoinTextureInfo(result.info, vertex_b.info);
 | |
|     Optimization::JoinStorageInfo(result.info, vertex_b.info);
 | |
|     Optimization::DualVertexJoinPass(result);
 | |
|     result.post_order_blocks = PostOrder(result.syntax_list.front());
 | |
|     Optimization::DeadCodeEliminationPass(result);
 | |
|     Optimization::VerificationPass(result);
 | |
|     Optimization::CollectShaderInfoPass(env_vertex_b, result);
 | |
|     return result;
 | |
| }
 | |
| 
 | |
| } // namespace Shader::Maxwell
 |