forked from eden-emu/eden
		
	shader/registry: Store graphics and compute metadata
Store information GLSL forces us to provide but it's dynamic state in hardware (workgroup sizes, primitive topology, shared memory size).
This commit is contained in:
		
							parent
							
								
									99be31c902
								
							
						
					
					
						commit
						22052e73de
					
				
					 8 changed files with 176 additions and 75 deletions
				
			
		|  | @ -166,8 +166,9 @@ std::string MakeShaderID(u64 unique_identifier, ShaderType shader_type) { | ||||||
| 
 | 
 | ||||||
| std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | ||||||
|     const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; |     const VideoCore::GuestDriverProfile guest_profile{entry.texture_handler_size}; | ||||||
|     auto registry = std::make_shared<Registry>(entry.type, guest_profile); |     const VideoCommon::Shader::SerializedRegistryInfo info{guest_profile, entry.bound_buffer, | ||||||
|     registry->SetBoundBuffer(entry.bound_buffer); |                                                            entry.graphics_info, entry.compute_info}; | ||||||
|  |     const auto registry = std::make_shared<Registry>(entry.type, info); | ||||||
|     for (const auto& [address, value] : entry.keys) { |     for (const auto& [address, value] : entry.keys) { | ||||||
|         const auto [buffer, offset] = address; |         const auto [buffer, offset] = address; | ||||||
|         registry->InsertKey(buffer, offset, value); |         registry->InsertKey(buffer, offset, value); | ||||||
|  | @ -184,9 +185,9 @@ std::shared_ptr<Registry> MakeRegistry(const ShaderDiskCacheEntry& entry) { | ||||||
| 
 | 
 | ||||||
| std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, | std::shared_ptr<OGLProgram> BuildShader(const Device& device, ShaderType shader_type, | ||||||
|                                         u64 unique_identifier, const ShaderIR& ir, |                                         u64 unique_identifier, const ShaderIR& ir, | ||||||
|                                         bool hint_retrievable = false) { |                                         const Registry& registry, bool hint_retrievable = false) { | ||||||
|     LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); |     LOG_INFO(Render_OpenGL, "{}", MakeShaderID(unique_identifier, shader_type)); | ||||||
|     const std::string glsl = DecompileShader(device, ir, shader_type); |     const std::string glsl = DecompileShader(device, ir, registry, shader_type); | ||||||
|     OGLShader shader; |     OGLShader shader; | ||||||
|     shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); |     shader.Create(glsl.c_str(), GetGLShaderType(shader_type)); | ||||||
| 
 | 
 | ||||||
|  | @ -239,7 +240,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | ||||||
|     // if (!code_b.empty()) {
 |     // if (!code_b.empty()) {
 | ||||||
|     //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
 |     //     ir_b.emplace(code_b, STAGE_MAIN_OFFSET);
 | ||||||
|     // }
 |     // }
 | ||||||
|     auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir); |     auto program = BuildShader(params.device, shader_type, params.unique_identifier, ir, *registry); | ||||||
| 
 | 
 | ||||||
|     ShaderDiskCacheEntry entry; |     ShaderDiskCacheEntry entry; | ||||||
|     entry.type = shader_type; |     entry.type = shader_type; | ||||||
|  | @ -247,6 +248,7 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | ||||||
|     entry.code_b = std::move(code_b); |     entry.code_b = std::move(code_b); | ||||||
|     entry.unique_identifier = params.unique_identifier; |     entry.unique_identifier = params.unique_identifier; | ||||||
|     entry.bound_buffer = registry->GetBoundBuffer(); |     entry.bound_buffer = registry->GetBoundBuffer(); | ||||||
|  |     entry.graphics_info = registry->GetGraphicsInfo(); | ||||||
|     entry.keys = registry->GetKeys(); |     entry.keys = registry->GetKeys(); | ||||||
|     entry.bound_samplers = registry->GetBoundSamplers(); |     entry.bound_samplers = registry->GetBoundSamplers(); | ||||||
|     entry.bindless_samplers = registry->GetBindlessSamplers(); |     entry.bindless_samplers = registry->GetBindlessSamplers(); | ||||||
|  | @ -260,16 +262,18 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | ||||||
| Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode code) { | ||||||
|     const std::size_t size_in_bytes = code.size() * sizeof(u64); |     const std::size_t size_in_bytes = code.size() * sizeof(u64); | ||||||
| 
 | 
 | ||||||
|     auto registry = |     auto& engine = params.system.GPU().KeplerCompute(); | ||||||
|         std::make_shared<Registry>(ShaderType::Compute, params.system.GPU().KeplerCompute()); |     auto registry = std::make_shared<Registry>(ShaderType::Compute, engine); | ||||||
|     const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); |     const ShaderIR ir(code, KERNEL_MAIN_OFFSET, COMPILER_SETTINGS, *registry); | ||||||
|     auto program = BuildShader(params.device, ShaderType::Compute, params.unique_identifier, ir); |     const u64 uid = params.unique_identifier; | ||||||
|  |     auto program = BuildShader(params.device, ShaderType::Compute, uid, ir, *registry); | ||||||
| 
 | 
 | ||||||
|     ShaderDiskCacheEntry entry; |     ShaderDiskCacheEntry entry; | ||||||
|     entry.type = ShaderType::Compute; |     entry.type = ShaderType::Compute; | ||||||
|     entry.code = std::move(code); |     entry.code = std::move(code); | ||||||
|     entry.unique_identifier = params.unique_identifier; |     entry.unique_identifier = uid; | ||||||
|     entry.bound_buffer = registry->GetBoundBuffer(); |     entry.bound_buffer = registry->GetBoundBuffer(); | ||||||
|  |     entry.compute_info = registry->GetComputeInfo(); | ||||||
|     entry.keys = registry->GetKeys(); |     entry.keys = registry->GetKeys(); | ||||||
|     entry.bound_samplers = registry->GetBoundSamplers(); |     entry.bound_samplers = registry->GetBoundSamplers(); | ||||||
|     entry.bindless_samplers = registry->GetBindlessSamplers(); |     entry.bindless_samplers = registry->GetBindlessSamplers(); | ||||||
|  | @ -331,8 +335,8 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | ||||||
|                 return; |                 return; | ||||||
|             } |             } | ||||||
|             const auto& entry = (*transferable)[i]; |             const auto& entry = (*transferable)[i]; | ||||||
|             const u64 unique_identifier = entry.unique_identifier; |             const u64 uid = entry.unique_identifier; | ||||||
|             const auto it = find_precompiled(unique_identifier); |             const auto it = find_precompiled(uid); | ||||||
|             const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; |             const auto precompiled_entry = it != gl_cache.end() ? &*it : nullptr; | ||||||
| 
 | 
 | ||||||
|             const bool is_compute = entry.type == ShaderType::Compute; |             const bool is_compute = entry.type == ShaderType::Compute; | ||||||
|  | @ -350,7 +354,7 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading, | ||||||
|             } |             } | ||||||
|             if (!program) { |             if (!program) { | ||||||
|                 // Otherwise compile it from GLSL
 |                 // Otherwise compile it from GLSL
 | ||||||
|                 program = BuildShader(device, entry.type, unique_identifier, ir, true); |                 program = BuildShader(device, entry.type, uid, ir, *registry, true); | ||||||
|             } |             } | ||||||
| 
 | 
 | ||||||
|             PrecompiledShader shader; |             PrecompiledShader shader; | ||||||
|  |  | ||||||
|  | @ -36,6 +36,7 @@ using Tegra::Shader::IpaInterpMode; | ||||||
| using Tegra::Shader::IpaMode; | using Tegra::Shader::IpaMode; | ||||||
| using Tegra::Shader::IpaSampleMode; | using Tegra::Shader::IpaSampleMode; | ||||||
| using Tegra::Shader::Register; | using Tegra::Shader::Register; | ||||||
|  | using VideoCommon::Shader::Registry; | ||||||
| 
 | 
 | ||||||
| using namespace std::string_literals; | using namespace std::string_literals; | ||||||
| using namespace VideoCommon::Shader; | using namespace VideoCommon::Shader; | ||||||
|  | @ -288,6 +289,30 @@ const char* GetImageTypeDeclaration(Tegra::Shader::ImageType image_type) { | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | /// Describes primitive behavior on geometry shaders
 | ||||||
|  | std::pair<const char*, u32> GetPrimitiveDescription(Maxwell::PrimitiveTopology topology) { | ||||||
|  |     switch (topology) { | ||||||
|  |     case Maxwell::PrimitiveTopology::Points: | ||||||
|  |         return {"points", 1}; | ||||||
|  |     case Maxwell::PrimitiveTopology::Lines: | ||||||
|  |     case Maxwell::PrimitiveTopology::LineStrip: | ||||||
|  |         return {"lines", 2}; | ||||||
|  |     case Maxwell::PrimitiveTopology::LinesAdjacency: | ||||||
|  |     case Maxwell::PrimitiveTopology::LineStripAdjacency: | ||||||
|  |         return {"lines_adjacency", 4}; | ||||||
|  |     case Maxwell::PrimitiveTopology::Triangles: | ||||||
|  |     case Maxwell::PrimitiveTopology::TriangleStrip: | ||||||
|  |     case Maxwell::PrimitiveTopology::TriangleFan: | ||||||
|  |         return {"triangles", 3}; | ||||||
|  |     case Maxwell::PrimitiveTopology::TrianglesAdjacency: | ||||||
|  |     case Maxwell::PrimitiveTopology::TriangleStripAdjacency: | ||||||
|  |         return {"triangles_adjacency", 6}; | ||||||
|  |     default: | ||||||
|  |         UNIMPLEMENTED_MSG("topology={}", static_cast<int>(topology)); | ||||||
|  |         return {"points", 1}; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  | 
 | ||||||
| /// Generates code to use for a swizzle operation.
 | /// Generates code to use for a swizzle operation.
 | ||||||
| constexpr const char* GetSwizzle(std::size_t element) { | constexpr const char* GetSwizzle(std::size_t element) { | ||||||
|     constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; |     constexpr std::array swizzle = {".x", ".y", ".z", ".w"}; | ||||||
|  | @ -367,15 +392,17 @@ std::string FlowStackTopName(MetaStackClass stack) { | ||||||
| 
 | 
 | ||||||
| class GLSLDecompiler final { | class GLSLDecompiler final { | ||||||
| public: | public: | ||||||
|     explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, |     explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, const Registry& registry, | ||||||
|                             std::string_view suffix) |                             ShaderType stage, std::string_view suffix) | ||||||
|         : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} |         : device{device}, ir{ir}, registry{registry}, stage{stage}, suffix{suffix}, | ||||||
|  |           header{ir.GetHeader()} {} | ||||||
| 
 | 
 | ||||||
|     void Decompile() { |     void Decompile() { | ||||||
|         DeclareHeader(); |         DeclareHeader(); | ||||||
|         DeclareVertex(); |         DeclareVertex(); | ||||||
|         DeclareGeometry(); |         DeclareGeometry(); | ||||||
|         DeclareFragment(); |         DeclareFragment(); | ||||||
|  |         DeclareCompute(); | ||||||
|         DeclareRegisters(); |         DeclareRegisters(); | ||||||
|         DeclareCustomVariables(); |         DeclareCustomVariables(); | ||||||
|         DeclarePredicates(); |         DeclarePredicates(); | ||||||
|  | @ -489,9 +516,15 @@ private: | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
| 
 | 
 | ||||||
|  |         const auto& info = registry.GetGraphicsInfo(); | ||||||
|  |         const auto input_topology = info.primitive_topology; | ||||||
|  |         const auto [glsl_topology, max_vertices] = GetPrimitiveDescription(input_topology); | ||||||
|  |         max_input_vertices = max_vertices; | ||||||
|  |         code.AddLine("layout ({}) in;", glsl_topology); | ||||||
|  | 
 | ||||||
|         const auto topology = GetTopologyName(header.common3.output_topology); |         const auto topology = GetTopologyName(header.common3.output_topology); | ||||||
|         const auto max_vertices = header.common4.max_output_vertices.Value(); |         const auto max_output_vertices = header.common4.max_output_vertices.Value(); | ||||||
|         code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_vertices); |         code.AddLine("layout ({}, max_vertices = {}) out;", topology, max_output_vertices); | ||||||
|         code.AddNewLine(); |         code.AddNewLine(); | ||||||
| 
 | 
 | ||||||
|         code.AddLine("in gl_PerVertex {{"); |         code.AddLine("in gl_PerVertex {{"); | ||||||
|  | @ -513,7 +546,8 @@ private: | ||||||
|             if (!IsRenderTargetEnabled(render_target)) { |             if (!IsRenderTargetEnabled(render_target)) { | ||||||
|                 continue; |                 continue; | ||||||
|             } |             } | ||||||
|             code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, render_target); |             code.AddLine("layout (location = {}) out vec4 frag_color{};", render_target, | ||||||
|  |                          render_target); | ||||||
|             any = true; |             any = true; | ||||||
|         } |         } | ||||||
|         if (any) { |         if (any) { | ||||||
|  | @ -521,6 +555,20 @@ private: | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     void DeclareCompute() { | ||||||
|  |         if (stage != ShaderType::Compute) { | ||||||
|  |             return; | ||||||
|  |         } | ||||||
|  |         const auto& info = registry.GetComputeInfo(); | ||||||
|  |         if (const u32 size = info.shared_memory_size_in_words; size > 0) { | ||||||
|  |             code.AddLine("shared uint smem[];", size); | ||||||
|  |             code.AddNewLine(); | ||||||
|  |         } | ||||||
|  |         code.AddLine("layout (local_size_x = {}, local_size_y = {}, local_size_z = {}) in;", | ||||||
|  |                      info.workgroup_size[0], info.workgroup_size[1], info.workgroup_size[2]); | ||||||
|  |         code.AddNewLine(); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     void DeclareVertexRedeclarations() { |     void DeclareVertexRedeclarations() { | ||||||
|         code.AddLine("out gl_PerVertex {{"); |         code.AddLine("out gl_PerVertex {{"); | ||||||
|         ++code.scope; |         ++code.scope; | ||||||
|  | @ -596,18 +644,16 @@ private: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     void DeclareLocalMemory() { |     void DeclareLocalMemory() { | ||||||
|  |         u64 local_memory_size = 0; | ||||||
|         if (stage == ShaderType::Compute) { |         if (stage == ShaderType::Compute) { | ||||||
|             code.AddLine("#ifdef LOCAL_MEMORY_SIZE"); |             local_memory_size = registry.GetComputeInfo().local_memory_size_in_words * 4ULL; | ||||||
|             code.AddLine("uint {}[LOCAL_MEMORY_SIZE];", GetLocalMemory()); |         } else { | ||||||
|             code.AddLine("#endif"); |             local_memory_size = header.GetLocalMemorySize(); | ||||||
|             return; |  | ||||||
|         } |         } | ||||||
| 
 |  | ||||||
|         const u64 local_memory_size = header.GetLocalMemorySize(); |  | ||||||
|         if (local_memory_size == 0) { |         if (local_memory_size == 0) { | ||||||
|             return; |             return; | ||||||
|         } |         } | ||||||
|         const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; |         const u64 element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||||||
|         code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); |         code.AddLine("uint {}[{}];", GetLocalMemory(), element_count); | ||||||
|         code.AddNewLine(); |         code.AddNewLine(); | ||||||
|     } |     } | ||||||
|  | @ -996,7 +1042,8 @@ private: | ||||||
|                 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
 |                 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
 | ||||||
|                 // set an 0x80000000 index for those and the shader fails to build. Find out why
 |                 // set an 0x80000000 index for those and the shader fails to build. Find out why
 | ||||||
|                 // this happens and what's its intent.
 |                 // this happens and what's its intent.
 | ||||||
|                 return fmt::format("gs_{}[{} % MAX_VERTEX_INPUT]", name, Visit(buffer).AsUint()); |                 return fmt::format("gs_{}[{} % {}]", name, Visit(buffer).AsUint(), | ||||||
|  |                                    max_input_vertices.value()); | ||||||
|             } |             } | ||||||
|             return std::string(name); |             return std::string(name); | ||||||
|         }; |         }; | ||||||
|  | @ -2428,11 +2475,14 @@ private: | ||||||
| 
 | 
 | ||||||
|     const Device& device; |     const Device& device; | ||||||
|     const ShaderIR& ir; |     const ShaderIR& ir; | ||||||
|  |     const Registry& registry; | ||||||
|     const ShaderType stage; |     const ShaderType stage; | ||||||
|     const std::string_view suffix; |     const std::string_view suffix; | ||||||
|     const Header header; |     const Header header; | ||||||
| 
 | 
 | ||||||
|     ShaderWriter code; |     ShaderWriter code; | ||||||
|  | 
 | ||||||
|  |     std::optional<u32> max_input_vertices; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| std::string GetFlowVariable(u32 index) { | std::string GetFlowVariable(u32 index) { | ||||||
|  | @ -2647,9 +2697,9 @@ ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir) { | ||||||
|     return entries; |     return entries; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::string DecompileShader(const Device& device, const ShaderIR& ir, ShaderType stage, | std::string DecompileShader(const Device& device, const ShaderIR& ir, const Registry& registry, | ||||||
|                             std::string_view suffix) { |                             ShaderType stage, std::string_view suffix) { | ||||||
|     GLSLDecompiler decompiler(device, ir, stage, suffix); |     GLSLDecompiler decompiler(device, ir, registry, stage, suffix); | ||||||
|     decompiler.Decompile(); |     decompiler.Decompile(); | ||||||
|     return decompiler.GetResult(); |     return decompiler.GetResult(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -12,12 +12,9 @@ | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
|  | #include "video_core/shader/registry.h" | ||||||
| #include "video_core/shader/shader_ir.h" | #include "video_core/shader/shader_ir.h" | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon::Shader { |  | ||||||
| class ShaderIR; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| namespace OpenGL { | namespace OpenGL { | ||||||
| 
 | 
 | ||||||
| class Device; | class Device; | ||||||
|  | @ -80,6 +77,7 @@ struct ShaderEntries { | ||||||
| ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); | ShaderEntries MakeEntries(const VideoCommon::Shader::ShaderIR& ir); | ||||||
| 
 | 
 | ||||||
| std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | std::string DecompileShader(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||||||
|  |                             const VideoCommon::Shader::Registry& registry, | ||||||
|                             Tegra::Engines::ShaderType stage, std::string_view suffix = {}); |                             Tegra::Engines::ShaderType stage, std::string_view suffix = {}); | ||||||
| 
 | 
 | ||||||
| } // namespace OpenGL
 | } // namespace OpenGL
 | ||||||
|  |  | ||||||
|  | @ -48,7 +48,7 @@ struct BindlessSamplerKey { | ||||||
|     Tegra::Engines::SamplerDescriptor sampler; |     Tegra::Engines::SamplerDescriptor sampler; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| constexpr u32 NativeVersion = 16; | constexpr u32 NativeVersion = 17; | ||||||
| 
 | 
 | ||||||
| ShaderCacheVersionHash GetShaderCacheVersionHash() { | ShaderCacheVersionHash GetShaderCacheVersionHash() { | ||||||
|     ShaderCacheVersionHash hash{}; |     ShaderCacheVersionHash hash{}; | ||||||
|  | @ -83,15 +83,16 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     bool is_texture_handler_size_known; |     u8 is_texture_handler_size_known; | ||||||
|     u32 texture_handler_size_value; |     u32 texture_handler_size_value; | ||||||
|     u32 num_keys; |     u32 num_keys; | ||||||
|     u32 num_bound_samplers; |     u32 num_bound_samplers; | ||||||
|     u32 num_bindless_samplers; |     u32 num_bindless_samplers; | ||||||
|     if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || |     if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || | ||||||
|         file.ReadArray(&is_texture_handler_size_known, 1) != 1 || |         file.ReadArray(&is_texture_handler_size_known, 1) != 1 || | ||||||
|         file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || |         file.ReadArray(&texture_handler_size_value, 1) != 1 || | ||||||
|         file.ReadArray(&num_bound_samplers, 1) != 1 || |         file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || | ||||||
|  |         file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || | ||||||
|         file.ReadArray(&num_bindless_samplers, 1) != 1) { |         file.ReadArray(&num_bindless_samplers, 1) != 1) { | ||||||
|         return false; |         return false; | ||||||
|     } |     } | ||||||
|  | @ -136,8 +137,9 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const { | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || |     if (file.WriteObject(unique_identifier) != 1 || file.WriteObject(bound_buffer) != 1 || | ||||||
|         file.WriteObject(texture_handler_size.has_value()) != 1 || |         file.WriteObject(static_cast<u8>(texture_handler_size.has_value())) != 1 || | ||||||
|         file.WriteObject(texture_handler_size.value_or(0)) != 1 || |         file.WriteObject(texture_handler_size.value_or(0)) != 1 || | ||||||
|  |         file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || | ||||||
|         file.WriteObject(static_cast<u32>(keys.size())) != 1 || |         file.WriteObject(static_cast<u32>(keys.size())) != 1 || | ||||||
|         file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || |         file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || | ||||||
|         file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { |         file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { | ||||||
|  |  | ||||||
|  | @ -51,8 +51,10 @@ struct ShaderDiskCacheEntry { | ||||||
|     ProgramCode code_b; |     ProgramCode code_b; | ||||||
| 
 | 
 | ||||||
|     u64 unique_identifier = 0; |     u64 unique_identifier = 0; | ||||||
|     u32 bound_buffer = 0; |  | ||||||
|     std::optional<u32> texture_handler_size; |     std::optional<u32> texture_handler_size; | ||||||
|  |     u32 bound_buffer = 0; | ||||||
|  |     VideoCommon::Shader::GraphicsInfo graphics_info; | ||||||
|  |     VideoCommon::Shader::ComputeInfo compute_info; | ||||||
|     VideoCommon::Shader::KeyMap keys; |     VideoCommon::Shader::KeyMap keys; | ||||||
|     VideoCommon::Shader::BoundSamplerMap bound_samplers; |     VideoCommon::Shader::BoundSamplerMap bound_samplers; | ||||||
|     VideoCommon::Shader::BindlessSamplerMap bindless_samplers; |     VideoCommon::Shader::BindlessSamplerMap bindless_samplers; | ||||||
|  |  | ||||||
|  | @ -6,21 +6,55 @@ | ||||||
| #include <tuple> | #include <tuple> | ||||||
| 
 | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "video_core/engines/kepler_compute.h" | ||||||
| #include "video_core/engines/maxwell_3d.h" | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
| #include "video_core/shader/registry.h" | #include "video_core/shader/registry.h" | ||||||
| 
 | 
 | ||||||
| namespace VideoCommon::Shader { | namespace VideoCommon::Shader { | ||||||
| 
 | 
 | ||||||
|  | using Tegra::Engines::ConstBufferEngineInterface; | ||||||
| using Tegra::Engines::SamplerDescriptor; | using Tegra::Engines::SamplerDescriptor; | ||||||
|  | using Tegra::Engines::ShaderType; | ||||||
| 
 | 
 | ||||||
| Registry::Registry(Tegra::Engines::ShaderType shader_stage, | namespace { | ||||||
|                    VideoCore::GuestDriverProfile stored_guest_driver_profile) | 
 | ||||||
|     : stage{shader_stage}, stored_guest_driver_profile{stored_guest_driver_profile} {} | GraphicsInfo MakeGraphicsInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||||||
|  |     if (shader_stage == ShaderType::Compute) { | ||||||
|  |         return {}; | ||||||
|  |     } | ||||||
|  |     auto& graphics = static_cast<Tegra::Engines::Maxwell3D&>(engine); | ||||||
|  | 
 | ||||||
|  |     GraphicsInfo info; | ||||||
|  |     info.primitive_topology = graphics.regs.draw.topology; | ||||||
|  |     return info; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | ComputeInfo MakeComputeInfo(ShaderType shader_stage, ConstBufferEngineInterface& engine) { | ||||||
|  |     if (shader_stage != ShaderType::Compute) { | ||||||
|  |         return {}; | ||||||
|  |     } | ||||||
|  |     auto& compute = static_cast<Tegra::Engines::KeplerCompute&>(engine); | ||||||
|  |     const auto& launch = compute.launch_description; | ||||||
|  | 
 | ||||||
|  |     ComputeInfo info; | ||||||
|  |     info.workgroup_size = {launch.block_dim_x, launch.block_dim_y, launch.block_dim_z}; | ||||||
|  |     info.local_memory_size_in_words = launch.local_pos_alloc; | ||||||
|  |     info.shared_memory_size_in_words = launch.shared_alloc; | ||||||
|  |     return info; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | } // Anonymous namespace
 | ||||||
|  | 
 | ||||||
|  | Registry::Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info) | ||||||
|  |     : stage{shader_stage}, stored_guest_driver_profile{info.guest_driver_profile}, | ||||||
|  |       bound_buffer{info.bound_buffer}, graphics_info{info.graphics}, compute_info{info.compute} {} | ||||||
| 
 | 
 | ||||||
| Registry::Registry(Tegra::Engines::ShaderType shader_stage, | Registry::Registry(Tegra::Engines::ShaderType shader_stage, | ||||||
|                    Tegra::Engines::ConstBufferEngineInterface& engine) |                    Tegra::Engines::ConstBufferEngineInterface& engine) | ||||||
|     : stage{shader_stage}, engine{&engine} {} |     : stage{shader_stage}, engine{&engine}, bound_buffer{engine.GetBoundBuffer()}, | ||||||
|  |       graphics_info{MakeGraphicsInfo(shader_stage, engine)}, compute_info{MakeComputeInfo( | ||||||
|  |                                                                  shader_stage, engine)} {} | ||||||
| 
 | 
 | ||||||
| Registry::~Registry() = default; | Registry::~Registry() = default; | ||||||
| 
 | 
 | ||||||
|  | @ -67,18 +101,6 @@ std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler | ||||||
|     return value; |     return value; | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| std::optional<u32> Registry::ObtainBoundBuffer() { |  | ||||||
|     if (bound_buffer_saved) { |  | ||||||
|         return bound_buffer; |  | ||||||
|     } |  | ||||||
|     if (!engine) { |  | ||||||
|         return std::nullopt; |  | ||||||
|     } |  | ||||||
|     bound_buffer_saved = true; |  | ||||||
|     bound_buffer = engine->GetBoundBuffer(); |  | ||||||
|     return bound_buffer; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | void Registry::InsertKey(u32 buffer, u32 offset, u32 value) { | ||||||
|     keys.insert_or_assign({buffer, offset}, value); |     keys.insert_or_assign({buffer, offset}, value); | ||||||
| } | } | ||||||
|  | @ -91,11 +113,6 @@ void Registry::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDescriptor s | ||||||
|     bindless_samplers.insert_or_assign({buffer, offset}, sampler); |     bindless_samplers.insert_or_assign({buffer, offset}, sampler); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void Registry::SetBoundBuffer(u32 buffer) { |  | ||||||
|     bound_buffer_saved = true; |  | ||||||
|     bound_buffer = buffer; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| bool Registry::IsConsistent() const { | bool Registry::IsConsistent() const { | ||||||
|     if (!engine) { |     if (!engine) { | ||||||
|         return true; |         return true; | ||||||
|  |  | ||||||
|  | @ -4,11 +4,16 @@ | ||||||
| 
 | 
 | ||||||
| #pragma once | #pragma once | ||||||
| 
 | 
 | ||||||
|  | #include <array> | ||||||
| #include <optional> | #include <optional> | ||||||
|  | #include <type_traits> | ||||||
| #include <unordered_map> | #include <unordered_map> | ||||||
|  | #include <utility> | ||||||
|  | 
 | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
| #include "common/hash.h" | #include "common/hash.h" | ||||||
| #include "video_core/engines/const_buffer_engine_interface.h" | #include "video_core/engines/const_buffer_engine_interface.h" | ||||||
|  | #include "video_core/engines/maxwell_3d.h" | ||||||
| #include "video_core/engines/shader_type.h" | #include "video_core/engines/shader_type.h" | ||||||
| #include "video_core/guest_driver.h" | #include "video_core/guest_driver.h" | ||||||
| 
 | 
 | ||||||
|  | @ -19,6 +24,25 @@ using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescripto | ||||||
| using BindlessSamplerMap = | using BindlessSamplerMap = | ||||||
|     std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; |     std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; | ||||||
| 
 | 
 | ||||||
|  | struct GraphicsInfo { | ||||||
|  |     Tegra::Engines::Maxwell3D::Regs::PrimitiveTopology primitive_topology{}; | ||||||
|  | }; | ||||||
|  | static_assert(std::is_trivially_copyable_v<GraphicsInfo>); | ||||||
|  | 
 | ||||||
|  | struct ComputeInfo { | ||||||
|  |     std::array<u32, 3> workgroup_size{}; | ||||||
|  |     u32 shared_memory_size_in_words = 0; | ||||||
|  |     u32 local_memory_size_in_words = 0; | ||||||
|  | }; | ||||||
|  | static_assert(std::is_trivially_copyable_v<ComputeInfo>); | ||||||
|  | 
 | ||||||
|  | struct SerializedRegistryInfo { | ||||||
|  |     VideoCore::GuestDriverProfile guest_driver_profile; | ||||||
|  |     u32 bound_buffer = 0; | ||||||
|  |     GraphicsInfo graphics; | ||||||
|  |     ComputeInfo compute; | ||||||
|  | }; | ||||||
|  | 
 | ||||||
| /**
 | /**
 | ||||||
|  * The Registry is a class use to interface the 3D and compute engines with the shader compiler. |  * The Registry is a class use to interface the 3D and compute engines with the shader compiler. | ||||||
|  * With it, the shader can obtain required data from GPU state and store it for disk shader |  * With it, the shader can obtain required data from GPU state and store it for disk shader | ||||||
|  | @ -26,8 +50,7 @@ using BindlessSamplerMap = | ||||||
|  */ |  */ | ||||||
| class Registry { | class Registry { | ||||||
| public: | public: | ||||||
|     explicit Registry(Tegra::Engines::ShaderType shader_stage, |     explicit Registry(Tegra::Engines::ShaderType shader_stage, const SerializedRegistryInfo& info); | ||||||
|                       VideoCore::GuestDriverProfile stored_guest_driver_profile); |  | ||||||
| 
 | 
 | ||||||
|     explicit Registry(Tegra::Engines::ShaderType shader_stage, |     explicit Registry(Tegra::Engines::ShaderType shader_stage, | ||||||
|                       Tegra::Engines::ConstBufferEngineInterface& engine); |                       Tegra::Engines::ConstBufferEngineInterface& engine); | ||||||
|  | @ -42,8 +65,6 @@ public: | ||||||
| 
 | 
 | ||||||
|     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); |     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); | ||||||
| 
 | 
 | ||||||
|     std::optional<u32> ObtainBoundBuffer(); |  | ||||||
| 
 |  | ||||||
|     /// Inserts a key.
 |     /// Inserts a key.
 | ||||||
|     void InsertKey(u32 buffer, u32 offset, u32 value); |     void InsertKey(u32 buffer, u32 offset, u32 value); | ||||||
| 
 | 
 | ||||||
|  | @ -53,9 +74,6 @@ public: | ||||||
|     /// Inserts a bindless sampler key.
 |     /// Inserts a bindless sampler key.
 | ||||||
|     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); |     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler); | ||||||
| 
 | 
 | ||||||
|     /// Set the bound buffer for this registry.
 |  | ||||||
|     void SetBoundBuffer(u32 buffer); |  | ||||||
| 
 |  | ||||||
|     /// Checks keys and samplers against engine's current const buffers.
 |     /// Checks keys and samplers against engine's current const buffers.
 | ||||||
|     /// Returns true if they are the same value, false otherwise.
 |     /// Returns true if they are the same value, false otherwise.
 | ||||||
|     bool IsConsistent() const; |     bool IsConsistent() const; | ||||||
|  | @ -83,6 +101,18 @@ public: | ||||||
|         return bound_buffer; |         return bound_buffer; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|  |     /// Returns compute information from this shader
 | ||||||
|  |     const GraphicsInfo& GetGraphicsInfo() const { | ||||||
|  |         ASSERT(stage != Tegra::Engines::ShaderType::Compute); | ||||||
|  |         return graphics_info; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     /// Returns compute information from this shader
 | ||||||
|  |     const ComputeInfo& GetComputeInfo() const { | ||||||
|  |         ASSERT(stage == Tegra::Engines::ShaderType::Compute); | ||||||
|  |         return compute_info; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     /// Obtains access to the guest driver's profile.
 |     /// Obtains access to the guest driver's profile.
 | ||||||
|     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { |     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() { | ||||||
|         return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; |         return engine ? engine->AccessGuestDriverProfile() : stored_guest_driver_profile; | ||||||
|  | @ -95,8 +125,9 @@ private: | ||||||
|     KeyMap keys; |     KeyMap keys; | ||||||
|     BoundSamplerMap bound_samplers; |     BoundSamplerMap bound_samplers; | ||||||
|     BindlessSamplerMap bindless_samplers; |     BindlessSamplerMap bindless_samplers; | ||||||
|     bool bound_buffer_saved{}; |     u32 bound_buffer; | ||||||
|     u32 bound_buffer{}; |     GraphicsInfo graphics_info; | ||||||
|  |     ComputeInfo compute_info; | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| } // namespace VideoCommon::Shader
 | } // namespace VideoCommon::Shader
 | ||||||
|  |  | ||||||
|  | @ -81,14 +81,11 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons | ||||||
|                 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); |                 MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue()); | ||||||
|             return {tracked, track}; |             return {tracked, track}; | ||||||
|         } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { |         } else if (const auto operation = std::get_if<OperationNode>(&*offset)) { | ||||||
|             const auto bound_buffer = registry.ObtainBoundBuffer(); |             const u32 bound_buffer = registry.GetBoundBuffer(); | ||||||
|             if (!bound_buffer) { |             if (bound_buffer != cbuf->GetIndex()) { | ||||||
|                 return {}; |                 return {}; | ||||||
|             } |             } | ||||||
|             if (*bound_buffer != cbuf->GetIndex()) { |             const auto pair = DecoupleIndirectRead(*operation); | ||||||
|                 return {}; |  | ||||||
|             } |  | ||||||
|             auto pair = DecoupleIndirectRead(*operation); |  | ||||||
|             if (!pair) { |             if (!pair) { | ||||||
|                 return {}; |                 return {}; | ||||||
|             } |             } | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ReinUsesLisp
						ReinUsesLisp