forked from eden-emu/eden
		
	Merge pull request #2734 from ReinUsesLisp/compute-shaders
gl_rasterizer: Implement compute shaders
This commit is contained in:
		
						commit
						92195406c7
					
				
					 15 changed files with 358 additions and 141 deletions
				
			
		|  | @ -50,13 +50,14 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) { | |||
| } | ||||
| 
 | ||||
| void KeplerCompute::ProcessLaunch() { | ||||
| 
 | ||||
|     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address(); | ||||
|     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description, | ||||
|                                    LaunchParams::NUM_LAUNCH_PARAMETERS * sizeof(u32)); | ||||
| 
 | ||||
|     const GPUVAddr code_loc = regs.code_loc.Address() + launch_description.program_start; | ||||
|     LOG_WARNING(HW_GPU, "Compute Kernel Execute at Address 0x{:016x}, STUBBED", code_loc); | ||||
|     const GPUVAddr code_addr = regs.code_loc.Address() + launch_description.program_start; | ||||
|     LOG_TRACE(HW_GPU, "Compute invocation launched at address 0x{:016x}", code_addr); | ||||
| 
 | ||||
|     rasterizer.DispatchCompute(code_addr); | ||||
| } | ||||
| 
 | ||||
| } // namespace Tegra::Engines
 | ||||
|  |  | |||
|  | @ -50,6 +50,14 @@ const Engines::Maxwell3D& GPU::Maxwell3D() const { | |||
|     return *maxwell_3d; | ||||
| } | ||||
| 
 | ||||
| Engines::KeplerCompute& GPU::KeplerCompute() { | ||||
|     return *kepler_compute; | ||||
| } | ||||
| 
 | ||||
| const Engines::KeplerCompute& GPU::KeplerCompute() const { | ||||
|     return *kepler_compute; | ||||
| } | ||||
| 
 | ||||
| MemoryManager& GPU::MemoryManager() { | ||||
|     return *memory_manager; | ||||
| } | ||||
|  |  | |||
|  | @ -155,6 +155,12 @@ public: | |||
|     /// Returns a const reference to the Maxwell3D GPU engine.
 | ||||
|     const Engines::Maxwell3D& Maxwell3D() const; | ||||
| 
 | ||||
|     /// Returns a reference to the KeplerCompute GPU engine.
 | ||||
|     Engines::KeplerCompute& KeplerCompute(); | ||||
| 
 | ||||
|     /// Returns a reference to the KeplerCompute GPU engine.
 | ||||
|     const Engines::KeplerCompute& KeplerCompute() const; | ||||
| 
 | ||||
|     /// Returns a reference to the GPU memory manager.
 | ||||
|     Tegra::MemoryManager& MemoryManager(); | ||||
| 
 | ||||
|  |  | |||
|  | @ -34,6 +34,9 @@ public: | |||
|     /// Clear the current framebuffer
 | ||||
|     virtual void Clear() = 0; | ||||
| 
 | ||||
|     /// Dispatches a compute shader invocation
 | ||||
|     virtual void DispatchCompute(GPUVAddr code_addr) = 0; | ||||
| 
 | ||||
|     /// Notify rasterizer that all caches should be flushed to Switch memory
 | ||||
|     virtual void FlushAll() = 0; | ||||
| 
 | ||||
|  |  | |||
|  | @ -4,6 +4,7 @@ | |||
| 
 | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
| #include <bitset> | ||||
| #include <memory> | ||||
| #include <string> | ||||
| #include <string_view> | ||||
|  | @ -19,6 +20,7 @@ | |||
| #include "core/core.h" | ||||
| #include "core/hle/kernel/process.h" | ||||
| #include "core/settings.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||
|  | @ -326,9 +328,9 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { | |||
| 
 | ||||
|         Shader shader{shader_cache.GetStageProgram(program)}; | ||||
| 
 | ||||
|         const auto stage_enum{static_cast<Maxwell::ShaderStage>(stage)}; | ||||
|         const auto stage_enum = static_cast<Maxwell::ShaderStage>(stage); | ||||
|         SetupDrawConstBuffers(stage_enum, shader); | ||||
|         SetupGlobalRegions(stage_enum, shader); | ||||
|         SetupDrawGlobalMemory(stage_enum, shader); | ||||
|         const auto texture_buffer_usage{SetupTextures(stage_enum, shader, base_bindings)}; | ||||
| 
 | ||||
|         const ProgramVariant variant{base_bindings, primitive_mode, texture_buffer_usage}; | ||||
|  | @ -783,6 +785,45 @@ void RasterizerOpenGL::DrawArrays() { | |||
|     gpu.dirty.memory_general = false; | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { | ||||
|     if (!GLAD_GL_ARB_compute_variable_group_size) { | ||||
|         LOG_ERROR(Render_OpenGL, "Compute is currently not supported on this device due to the " | ||||
|                                  "lack of GL_ARB_compute_variable_group_size"); | ||||
|         return; | ||||
|     } | ||||
| 
 | ||||
|     auto kernel = shader_cache.GetComputeKernel(code_addr); | ||||
|     const auto [program, next_bindings] = kernel->GetProgramHandle({}); | ||||
|     state.draw.shader_program = program; | ||||
|     state.draw.program_pipeline = 0; | ||||
| 
 | ||||
|     const std::size_t buffer_size = | ||||
|         Tegra::Engines::KeplerCompute::NumConstBuffers * | ||||
|         (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); | ||||
|     buffer_cache.Map(buffer_size); | ||||
| 
 | ||||
|     bind_ubo_pushbuffer.Setup(0); | ||||
|     bind_ssbo_pushbuffer.Setup(0); | ||||
| 
 | ||||
|     SetupComputeConstBuffers(kernel); | ||||
|     SetupComputeGlobalMemory(kernel); | ||||
| 
 | ||||
|     // TODO(Rodrigo): Bind images and samplers
 | ||||
| 
 | ||||
|     buffer_cache.Unmap(); | ||||
| 
 | ||||
|     bind_ubo_pushbuffer.Bind(); | ||||
|     bind_ssbo_pushbuffer.Bind(); | ||||
| 
 | ||||
|     state.ApplyShaderProgram(); | ||||
|     state.ApplyProgramPipeline(); | ||||
| 
 | ||||
|     const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||||
|     glDispatchComputeGroupSizeARB(launch_desc.grid_dim_x, launch_desc.grid_dim_y, | ||||
|                                   launch_desc.grid_dim_z, launch_desc.block_dim_x, | ||||
|                                   launch_desc.block_dim_y, launch_desc.block_dim_z); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushAll() {} | ||||
| 
 | ||||
| void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { | ||||
|  | @ -856,12 +897,25 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
| void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|                                              const Shader& shader) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_UBO); | ||||
|     const auto stage_index = static_cast<std::size_t>(stage); | ||||
|     const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; | ||||
| 
 | ||||
|     // Upload only the enabled buffers from the 16 constbuffers of each shader stage
 | ||||
|     const auto& stages = system.GPU().Maxwell3D().state.shader_stages; | ||||
|     const auto& shader_stage = stages[static_cast<std::size_t>(stage)]; | ||||
|     for (const auto& entry : shader->GetShaderEntries().const_buffers) { | ||||
|         SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); | ||||
|         const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; | ||||
|         SetupConstBuffer(buffer, entry); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_UBO); | ||||
|     const auto& launch_desc = system.GPU().KeplerCompute().launch_description; | ||||
|     for (const auto& entry : kernel->GetShaderEntries().const_buffers) { | ||||
|         const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; | ||||
|         const std::bitset<8> mask = launch_desc.memory_config.const_buffer_enable_mask.Value(); | ||||
|         Tegra::Engines::ConstBufferInfo buffer; | ||||
|         buffer.address = config.Address(); | ||||
|         buffer.size = config.size; | ||||
|         buffer.enabled = mask[entry.GetIndex()]; | ||||
|         SetupConstBuffer(buffer, entry); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
|  | @ -882,24 +936,39 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b | |||
|     bind_ubo_pushbuffer.Push(cbuf, offset, size); | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
| void RasterizerOpenGL::SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|                                              const Shader& shader) { | ||||
|     auto& gpu{system.GPU()}; | ||||
|     auto& memory_manager{gpu.MemoryManager()}; | ||||
|     const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast<std::size_t>(stage)]}; | ||||
|     const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||||
| 
 | ||||
|     for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { | ||||
|         const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; | ||||
|         const auto actual_addr{memory_manager.Read<u64>(addr)}; | ||||
|         const auto gpu_addr{memory_manager.Read<u64>(addr)}; | ||||
|         const auto size{memory_manager.Read<u32>(addr + 8)}; | ||||
| 
 | ||||
|         const auto [ssbo, buffer_offset] = | ||||
|             buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); | ||||
|         bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||||
|         SetupGlobalMemory(entry, gpu_addr, size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { | ||||
|     auto& gpu{system.GPU()}; | ||||
|     auto& memory_manager{gpu.MemoryManager()}; | ||||
|     const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; | ||||
|     for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { | ||||
|         const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; | ||||
|         const auto gpu_addr{memory_manager.Read<u64>(addr)}; | ||||
|         const auto size{memory_manager.Read<u32>(addr + 8)}; | ||||
|         SetupGlobalMemory(entry, gpu_addr, size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, | ||||
|                                          GPUVAddr gpu_addr, std::size_t size) { | ||||
|     const auto alignment{device.GetShaderStorageBufferAlignment()}; | ||||
|     const auto [ssbo, buffer_offset] = | ||||
|         buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); | ||||
|     bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); | ||||
| } | ||||
| 
 | ||||
| TextureBufferUsage RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& shader, | ||||
|                                                    BaseBindings base_bindings) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Texture); | ||||
|  |  | |||
|  | @ -58,6 +58,7 @@ public: | |||
| 
 | ||||
|     void DrawArrays() override; | ||||
|     void Clear() override; | ||||
|     void DispatchCompute(GPUVAddr code_addr) override; | ||||
|     void FlushAll() override; | ||||
|     void FlushRegion(CacheAddr addr, u64 size) override; | ||||
|     void InvalidateRegion(CacheAddr addr, u64 size) override; | ||||
|  | @ -115,14 +116,24 @@ private: | |||
|     void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|                                const Shader& shader); | ||||
| 
 | ||||
|     /// Configures the current constbuffers to use for the kernel invocation.
 | ||||
|     void SetupComputeConstBuffers(const Shader& kernel); | ||||
| 
 | ||||
|     /// Configures a constant buffer.
 | ||||
|     void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, | ||||
|                           const GLShader::ConstBufferEntry& entry); | ||||
| 
 | ||||
|     /// Configures the current global memory entries to use for the draw command.
 | ||||
|     void SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|     void SetupDrawGlobalMemory(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|                                const Shader& shader); | ||||
| 
 | ||||
|     /// Configures the current global memory entries to use for the kernel invocation.
 | ||||
|     void SetupComputeGlobalMemory(const Shader& kernel); | ||||
| 
 | ||||
|     /// Configures a constant buffer.
 | ||||
|     void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, | ||||
|                            std::size_t size); | ||||
| 
 | ||||
|     /// Configures the current textures to use for the draw command. Returns shaders texture buffer
 | ||||
|     /// usage.
 | ||||
|     TextureBufferUsage SetupTextures(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, | ||||
|  |  | |||
|  | @ -23,13 +23,13 @@ namespace OpenGL { | |||
| 
 | ||||
| using VideoCommon::Shader::ProgramCode; | ||||
| 
 | ||||
| // One UBO is always reserved for emulation values
 | ||||
| constexpr u32 RESERVED_UBOS = 1; | ||||
| // One UBO is always reserved for emulation values on staged shaders
 | ||||
| constexpr u32 STAGE_RESERVED_UBOS = 1; | ||||
| 
 | ||||
| struct UnspecializedShader { | ||||
|     std::string code; | ||||
|     GLShader::ShaderEntries entries; | ||||
|     Maxwell::ShaderProgram program_type; | ||||
|     ProgramType program_type; | ||||
| }; | ||||
| 
 | ||||
| namespace { | ||||
|  | @ -55,15 +55,17 @@ ProgramCode GetShaderCode(Tegra::MemoryManager& memory_manager, const GPUVAddr g | |||
| } | ||||
| 
 | ||||
| /// Gets the shader type from a Maxwell program type
 | ||||
| constexpr GLenum GetShaderType(Maxwell::ShaderProgram program_type) { | ||||
| constexpr GLenum GetShaderType(ProgramType program_type) { | ||||
|     switch (program_type) { | ||||
|     case Maxwell::ShaderProgram::VertexA: | ||||
|     case Maxwell::ShaderProgram::VertexB: | ||||
|     case ProgramType::VertexA: | ||||
|     case ProgramType::VertexB: | ||||
|         return GL_VERTEX_SHADER; | ||||
|     case Maxwell::ShaderProgram::Geometry: | ||||
|     case ProgramType::Geometry: | ||||
|         return GL_GEOMETRY_SHADER; | ||||
|     case Maxwell::ShaderProgram::Fragment: | ||||
|     case ProgramType::Fragment: | ||||
|         return GL_FRAGMENT_SHADER; | ||||
|     case ProgramType::Compute: | ||||
|         return GL_COMPUTE_SHADER; | ||||
|     default: | ||||
|         return GL_NONE; | ||||
|     } | ||||
|  | @ -100,6 +102,25 @@ constexpr std::tuple<const char*, const char*, u32> GetPrimitiveDescription(GLen | |||
|     } | ||||
| } | ||||
| 
 | ||||
| ProgramType GetProgramType(Maxwell::ShaderProgram program) { | ||||
|     switch (program) { | ||||
|     case Maxwell::ShaderProgram::VertexA: | ||||
|         return ProgramType::VertexA; | ||||
|     case Maxwell::ShaderProgram::VertexB: | ||||
|         return ProgramType::VertexB; | ||||
|     case Maxwell::ShaderProgram::TesselationControl: | ||||
|         return ProgramType::TessellationControl; | ||||
|     case Maxwell::ShaderProgram::TesselationEval: | ||||
|         return ProgramType::TessellationEval; | ||||
|     case Maxwell::ShaderProgram::Geometry: | ||||
|         return ProgramType::Geometry; | ||||
|     case Maxwell::ShaderProgram::Fragment: | ||||
|         return ProgramType::Fragment; | ||||
|     } | ||||
|     UNREACHABLE(); | ||||
|     return {}; | ||||
| } | ||||
| 
 | ||||
| /// Calculates the size of a program stream
 | ||||
| std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | ||||
|     constexpr std::size_t start_offset = 10; | ||||
|  | @ -128,13 +149,13 @@ std::size_t CalculateProgramSize(const GLShader::ProgramCode& program) { | |||
| } | ||||
| 
 | ||||
| /// Hashes one (or two) program streams
 | ||||
| u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& code, | ||||
| u64 GetUniqueIdentifier(ProgramType program_type, const ProgramCode& code, | ||||
|                         const ProgramCode& code_b, std::size_t size_a = 0, std::size_t size_b = 0) { | ||||
|     if (size_a == 0) { | ||||
|         size_a = CalculateProgramSize(code); | ||||
|     } | ||||
|     u64 unique_identifier = Common::CityHash64(reinterpret_cast<const char*>(code.data()), size_a); | ||||
|     if (program_type != Maxwell::ShaderProgram::VertexA) { | ||||
|     if (program_type != ProgramType::VertexA) { | ||||
|         return unique_identifier; | ||||
|     } | ||||
|     // VertexA programs include two programs
 | ||||
|  | @ -152,12 +173,12 @@ u64 GetUniqueIdentifier(Maxwell::ShaderProgram program_type, const ProgramCode& | |||
| } | ||||
| 
 | ||||
| /// Creates an unspecialized program from code streams
 | ||||
| GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgram program_type, | ||||
| GLShader::ProgramResult CreateProgram(const Device& device, ProgramType program_type, | ||||
|                                       ProgramCode program_code, ProgramCode program_code_b) { | ||||
|     GLShader::ShaderSetup setup(program_code); | ||||
|     setup.program.size_a = CalculateProgramSize(program_code); | ||||
|     setup.program.size_b = 0; | ||||
|     if (program_type == Maxwell::ShaderProgram::VertexA) { | ||||
|     if (program_type == ProgramType::VertexA) { | ||||
|         // VertexB is always enabled, so when VertexA is enabled, we have two vertex shaders.
 | ||||
|         // Conventional HW does not support this, so we combine VertexA and VertexB into one
 | ||||
|         // stage here.
 | ||||
|  | @ -168,22 +189,23 @@ GLShader::ProgramResult CreateProgram(const Device& device, Maxwell::ShaderProgr | |||
|         program_type, program_code, program_code_b, setup.program.size_a, setup.program.size_b); | ||||
| 
 | ||||
|     switch (program_type) { | ||||
|     case Maxwell::ShaderProgram::VertexA: | ||||
|     case Maxwell::ShaderProgram::VertexB: | ||||
|     case ProgramType::VertexA: | ||||
|     case ProgramType::VertexB: | ||||
|         return GLShader::GenerateVertexShader(device, setup); | ||||
|     case Maxwell::ShaderProgram::Geometry: | ||||
|     case ProgramType::Geometry: | ||||
|         return GLShader::GenerateGeometryShader(device, setup); | ||||
|     case Maxwell::ShaderProgram::Fragment: | ||||
|     case ProgramType::Fragment: | ||||
|         return GLShader::GenerateFragmentShader(device, setup); | ||||
|     case ProgramType::Compute: | ||||
|         return GLShader::GenerateComputeShader(device, setup); | ||||
|     default: | ||||
|         LOG_CRITICAL(HW_GPU, "Unimplemented program_type={}", static_cast<u32>(program_type)); | ||||
|         UNREACHABLE(); | ||||
|         UNIMPLEMENTED_MSG("Unimplemented program_type={}", static_cast<u32>(program_type)); | ||||
|         return {}; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEntries& entries, | ||||
|                                Maxwell::ShaderProgram program_type, const ProgramVariant& variant, | ||||
|                                ProgramType program_type, const ProgramVariant& variant, | ||||
|                                bool hint_retrievable = false) { | ||||
|     auto base_bindings{variant.base_bindings}; | ||||
|     const auto primitive_mode{variant.primitive_mode}; | ||||
|  | @ -194,7 +216,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
|     if (entries.shader_viewport_layer_array) { | ||||
|         source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; | ||||
|     } | ||||
|     source += fmt::format("\n#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||||
|     if (program_type == ProgramType::Compute) { | ||||
|         source += "#extension GL_ARB_compute_variable_group_size : require\n"; | ||||
|     } | ||||
|     source += '\n'; | ||||
| 
 | ||||
|     if (program_type != ProgramType::Compute) { | ||||
|         source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); | ||||
|     } | ||||
| 
 | ||||
|     for (const auto& cbuf : entries.const_buffers) { | ||||
|         source += | ||||
|  | @ -221,13 +250,16 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn | |||
|         source += fmt::format("#define SAMPLER_{}_IS_BUFFER", i); | ||||
|     } | ||||
| 
 | ||||
|     if (program_type == Maxwell::ShaderProgram::Geometry) { | ||||
|     if (program_type == ProgramType::Geometry) { | ||||
|         const auto [glsl_topology, debug_name, max_vertices] = | ||||
|             GetPrimitiveDescription(primitive_mode); | ||||
| 
 | ||||
|         source += "layout (" + std::string(glsl_topology) + ") in;\n"; | ||||
|         source += "#define MAX_VERTEX_INPUT " + std::to_string(max_vertices) + '\n'; | ||||
|     } | ||||
|     if (program_type == ProgramType::Compute) { | ||||
|         source += "layout (local_size_variable) in;\n"; | ||||
|     } | ||||
| 
 | ||||
|     source += code; | ||||
| 
 | ||||
|  | @ -255,7 +287,7 @@ std::set<GLenum> GetSupportedFormats() { | |||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| CachedShader::CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | ||||
| CachedShader::CachedShader(const ShaderParameters& params, ProgramType program_type, | ||||
|                            GLShader::ProgramResult result) | ||||
|     : RasterizerCacheObject{params.host_ptr}, host_ptr{params.host_ptr}, cpu_addr{params.cpu_addr}, | ||||
|       unique_identifier{params.unique_identifier}, program_type{program_type}, | ||||
|  | @ -268,29 +300,50 @@ Shader CachedShader::CreateStageFromMemory(const ShaderParameters& params, | |||
|                                            ProgramCode&& program_code_b) { | ||||
|     const auto code_size{CalculateProgramSize(program_code)}; | ||||
|     const auto code_size_b{CalculateProgramSize(program_code_b)}; | ||||
|     auto result{CreateProgram(params.device, program_type, program_code, program_code_b)}; | ||||
|     auto result{ | ||||
|         CreateProgram(params.device, GetProgramType(program_type), program_code, program_code_b)}; | ||||
|     if (result.first.empty()) { | ||||
|         // TODO(Rodrigo): Unimplemented shader stages hit here, avoid using these for now
 | ||||
|         return {}; | ||||
|     } | ||||
| 
 | ||||
|     params.disk_cache.SaveRaw(ShaderDiskCacheRaw( | ||||
|         params.unique_identifier, program_type, static_cast<u32>(code_size / sizeof(u64)), | ||||
|         static_cast<u32>(code_size_b / sizeof(u64)), std::move(program_code), | ||||
|         std::move(program_code_b))); | ||||
|         params.unique_identifier, GetProgramType(program_type), | ||||
|         static_cast<u32>(code_size / sizeof(u64)), static_cast<u32>(code_size_b / sizeof(u64)), | ||||
|         std::move(program_code), std::move(program_code_b))); | ||||
| 
 | ||||
|     return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | ||||
|     return std::shared_ptr<CachedShader>( | ||||
|         new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||||
| } | ||||
| 
 | ||||
| Shader CachedShader::CreateStageFromCache(const ShaderParameters& params, | ||||
|                                           Maxwell::ShaderProgram program_type, | ||||
|                                           GLShader::ProgramResult result) { | ||||
|     return std::shared_ptr<CachedShader>(new CachedShader(params, program_type, std::move(result))); | ||||
|     return std::shared_ptr<CachedShader>( | ||||
|         new CachedShader(params, GetProgramType(program_type), std::move(result))); | ||||
| } | ||||
| 
 | ||||
| Shader CachedShader::CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code) { | ||||
|     auto result{CreateProgram(params.device, ProgramType::Compute, code, {})}; | ||||
| 
 | ||||
|     const auto code_size{CalculateProgramSize(code)}; | ||||
|     params.disk_cache.SaveRaw(ShaderDiskCacheRaw(params.unique_identifier, ProgramType::Compute, | ||||
|                                                  static_cast<u32>(code_size / sizeof(u64)), 0, | ||||
|                                                  std::move(code), {})); | ||||
| 
 | ||||
|     return std::shared_ptr<CachedShader>( | ||||
|         new CachedShader(params, ProgramType::Compute, std::move(result))); | ||||
| } | ||||
| 
 | ||||
| Shader CachedShader::CreateKernelFromCache(const ShaderParameters& params, | ||||
|                                            GLShader::ProgramResult result) { | ||||
|     return std::shared_ptr<CachedShader>( | ||||
|         new CachedShader(params, ProgramType::Compute, std::move(result))); | ||||
| } | ||||
| 
 | ||||
| std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVariant& variant) { | ||||
|     GLuint handle{}; | ||||
|     if (program_type == Maxwell::ShaderProgram::Geometry) { | ||||
|     if (program_type == ProgramType::Geometry) { | ||||
|         handle = GetGeometryShader(variant); | ||||
|     } else { | ||||
|         const auto [entry, is_cache_miss] = programs.try_emplace(variant); | ||||
|  | @ -308,8 +361,11 @@ std::tuple<GLuint, BaseBindings> CachedShader::GetProgramHandle(const ProgramVar | |||
|         handle = program->handle; | ||||
|     } | ||||
| 
 | ||||
|     auto base_bindings{variant.base_bindings}; | ||||
|     base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()) + RESERVED_UBOS; | ||||
|     auto base_bindings = variant.base_bindings; | ||||
|     base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); | ||||
|     if (program_type != ProgramType::Compute) { | ||||
|         base_bindings.cbuf += STAGE_RESERVED_UBOS; | ||||
|     } | ||||
|     base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size()); | ||||
|     base_bindings.sampler += static_cast<u32>(entries.samplers.size()); | ||||
| 
 | ||||
|  | @ -589,13 +645,15 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
|     // No shader found - create a new one
 | ||||
|     ProgramCode program_code{GetShaderCode(memory_manager, program_addr, host_ptr)}; | ||||
|     ProgramCode program_code_b; | ||||
|     if (program == Maxwell::ShaderProgram::VertexA) { | ||||
|     const bool is_program_a{program == Maxwell::ShaderProgram::VertexA}; | ||||
|     if (is_program_a) { | ||||
|         const GPUVAddr program_addr_b{GetShaderAddress(system, Maxwell::ShaderProgram::VertexB)}; | ||||
|         program_code_b = GetShaderCode(memory_manager, program_addr_b, | ||||
|                                        memory_manager.GetPointer(program_addr_b)); | ||||
|     } | ||||
| 
 | ||||
|     const auto unique_identifier = GetUniqueIdentifier(program, program_code, program_code_b); | ||||
|     const auto unique_identifier = | ||||
|         GetUniqueIdentifier(GetProgramType(program), program_code, program_code_b); | ||||
|     const auto cpu_addr{*memory_manager.GpuToCpuAddress(program_addr)}; | ||||
|     const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||||
|                                   host_ptr,   unique_identifier}; | ||||
|  | @ -612,4 +670,30 @@ Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) { | |||
|     return last_shaders[static_cast<std::size_t>(program)] = shader; | ||||
| } | ||||
| 
 | ||||
| Shader ShaderCacheOpenGL::GetComputeKernel(GPUVAddr code_addr) { | ||||
|     auto& memory_manager{system.GPU().MemoryManager()}; | ||||
|     const auto host_ptr{memory_manager.GetPointer(code_addr)}; | ||||
|     auto kernel = TryGet(host_ptr); | ||||
|     if (kernel) { | ||||
|         return kernel; | ||||
|     } | ||||
| 
 | ||||
|     // No kernel found - create a new one
 | ||||
|     auto code{GetShaderCode(memory_manager, code_addr, host_ptr)}; | ||||
|     const auto unique_identifier{GetUniqueIdentifier(ProgramType::Compute, code, {})}; | ||||
|     const auto cpu_addr{*memory_manager.GpuToCpuAddress(code_addr)}; | ||||
|     const ShaderParameters params{disk_cache, precompiled_programs, device, cpu_addr, | ||||
|                                   host_ptr,   unique_identifier}; | ||||
| 
 | ||||
|     const auto found = precompiled_shaders.find(unique_identifier); | ||||
|     if (found == precompiled_shaders.end()) { | ||||
|         kernel = CachedShader::CreateKernelFromMemory(params, std::move(code)); | ||||
|     } else { | ||||
|         kernel = CachedShader::CreateKernelFromCache(params, found->second); | ||||
|     } | ||||
| 
 | ||||
|     Register(kernel); | ||||
|     return kernel; | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -61,6 +61,11 @@ public: | |||
|                                        Maxwell::ShaderProgram program_type, | ||||
|                                        GLShader::ProgramResult result); | ||||
| 
 | ||||
|     static Shader CreateKernelFromMemory(const ShaderParameters& params, ProgramCode&& code); | ||||
| 
 | ||||
|     static Shader CreateKernelFromCache(const ShaderParameters& params, | ||||
|                                         GLShader::ProgramResult result); | ||||
| 
 | ||||
|     VAddr GetCpuAddr() const override { | ||||
|         return cpu_addr; | ||||
|     } | ||||
|  | @ -78,7 +83,7 @@ public: | |||
|     std::tuple<GLuint, BaseBindings> GetProgramHandle(const ProgramVariant& variant); | ||||
| 
 | ||||
| private: | ||||
|     explicit CachedShader(const ShaderParameters& params, Maxwell::ShaderProgram program_type, | ||||
|     explicit CachedShader(const ShaderParameters& params, ProgramType program_type, | ||||
|                           GLShader::ProgramResult result); | ||||
| 
 | ||||
|     // Geometry programs. These are needed because GLSL needs an input topology but it's not
 | ||||
|  | @ -104,7 +109,7 @@ private: | |||
|     u8* host_ptr{}; | ||||
|     VAddr cpu_addr{}; | ||||
|     u64 unique_identifier{}; | ||||
|     Maxwell::ShaderProgram program_type{}; | ||||
|     ProgramType program_type{}; | ||||
|     ShaderDiskCacheOpenGL& disk_cache; | ||||
|     const PrecompiledPrograms& precompiled_programs; | ||||
| 
 | ||||
|  | @ -132,6 +137,9 @@ public: | |||
|     /// Gets the current specified shader stage program
 | ||||
|     Shader GetStageProgram(Maxwell::ShaderProgram program); | ||||
| 
 | ||||
|     /// Gets a compute kernel in the passed address
 | ||||
|     Shader GetComputeKernel(GPUVAddr code_addr); | ||||
| 
 | ||||
| protected: | ||||
|     // We do not have to flush this cache as things in it are never modified by us.
 | ||||
|     void FlushObjectInner(const Shader& object) override {} | ||||
|  |  | |||
|  | @ -37,7 +37,6 @@ using namespace std::string_literals; | |||
| using namespace VideoCommon::Shader; | ||||
| 
 | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; | ||||
| using Operation = const OperationNode&; | ||||
| 
 | ||||
| enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; | ||||
|  | @ -162,9 +161,13 @@ std::string FlowStackTopName(MetaStackClass stack) { | |||
|     return fmt::format("{}_flow_stack_top", GetFlowStackPrefix(stack)); | ||||
| } | ||||
| 
 | ||||
| constexpr bool IsVertexShader(ProgramType stage) { | ||||
|     return stage == ProgramType::VertexA || stage == ProgramType::VertexB; | ||||
| } | ||||
| 
 | ||||
| class GLSLDecompiler final { | ||||
| public: | ||||
|     explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderStage stage, | ||||
|     explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ProgramType stage, | ||||
|                             std::string suffix) | ||||
|         : device{device}, ir{ir}, stage{stage}, suffix{suffix}, header{ir.GetHeader()} {} | ||||
| 
 | ||||
|  | @ -248,21 +251,21 @@ public: | |||
|         } | ||||
|         entries.clip_distances = ir.GetClipDistances(); | ||||
|         entries.shader_viewport_layer_array = | ||||
|             stage == ShaderStage::Vertex && (ir.UsesLayer() || ir.UsesViewportIndex()); | ||||
|             IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); | ||||
|         entries.shader_length = ir.GetLength(); | ||||
|         return entries; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     void DeclareVertex() { | ||||
|         if (stage != ShaderStage::Vertex) | ||||
|         if (!IsVertexShader(stage)) | ||||
|             return; | ||||
| 
 | ||||
|         DeclareVertexRedeclarations(); | ||||
|     } | ||||
| 
 | ||||
|     void DeclareGeometry() { | ||||
|         if (stage != ShaderStage::Geometry) { | ||||
|         if (stage != ProgramType::Geometry) { | ||||
|             return; | ||||
|         } | ||||
| 
 | ||||
|  | @ -293,14 +296,14 @@ private: | |||
|                 break; | ||||
|             } | ||||
|         } | ||||
|         if (stage != ShaderStage::Vertex || device.HasVertexViewportLayer()) { | ||||
|         if (!IsVertexShader(stage) || device.HasVertexViewportLayer()) { | ||||
|             if (ir.UsesLayer()) { | ||||
|                 code.AddLine("int gl_Layer;"); | ||||
|             } | ||||
|             if (ir.UsesViewportIndex()) { | ||||
|                 code.AddLine("int gl_ViewportIndex;"); | ||||
|             } | ||||
|         } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && stage == ShaderStage::Vertex && | ||||
|         } else if ((ir.UsesLayer() || ir.UsesViewportIndex()) && IsVertexShader(stage) && | ||||
|                    !device.HasVertexViewportLayer()) { | ||||
|             LOG_ERROR( | ||||
|                 Render_OpenGL, | ||||
|  | @ -337,12 +340,17 @@ private: | |||
|     } | ||||
| 
 | ||||
|     void DeclareLocalMemory() { | ||||
|         if (const u64 local_memory_size = header.GetLocalMemorySize(); local_memory_size > 0) { | ||||
|         // TODO(Rodrigo): Unstub kernel local memory size and pass it from a register at
 | ||||
|         // specialization time.
 | ||||
|         const u64 local_memory_size = | ||||
|             stage == ProgramType::Compute ? 0x400 : header.GetLocalMemorySize(); | ||||
|         if (local_memory_size == 0) { | ||||
|             return; | ||||
|         } | ||||
|         const auto element_count = Common::AlignUp(local_memory_size, 4) / 4; | ||||
|         code.AddLine("float {}[{}];", GetLocalMemory(), element_count); | ||||
|         code.AddNewLine(); | ||||
|     } | ||||
|     } | ||||
| 
 | ||||
|     void DeclareInternalFlags() { | ||||
|         for (u32 flag = 0; flag < static_cast<u32>(InternalFlag::Amount); flag++) { | ||||
|  | @ -395,12 +403,12 @@ private: | |||
|         const u32 location{GetGenericAttributeIndex(index)}; | ||||
| 
 | ||||
|         std::string name{GetInputAttribute(index)}; | ||||
|         if (stage == ShaderStage::Geometry) { | ||||
|         if (stage == ProgramType::Geometry) { | ||||
|             name = "gs_" + name + "[]"; | ||||
|         } | ||||
| 
 | ||||
|         std::string suffix; | ||||
|         if (stage == ShaderStage::Fragment) { | ||||
|         if (stage == ProgramType::Fragment) { | ||||
|             const auto input_mode{header.ps.GetAttributeUse(location)}; | ||||
|             if (skip_unused && input_mode == AttributeUse::Unused) { | ||||
|                 return; | ||||
|  | @ -412,7 +420,7 @@ private: | |||
|     } | ||||
| 
 | ||||
|     void DeclareOutputAttributes() { | ||||
|         if (ir.HasPhysicalAttributes() && stage != ShaderStage::Fragment) { | ||||
|         if (ir.HasPhysicalAttributes() && stage != ProgramType::Fragment) { | ||||
|             for (u32 i = 0; i < GetNumPhysicalVaryings(); ++i) { | ||||
|                 DeclareOutputAttribute(ToGenericAttribute(i)); | ||||
|             } | ||||
|  | @ -534,7 +542,7 @@ private: | |||
|                 constexpr u32 element_stride{4}; | ||||
|                 const u32 address{generic_base + index * generic_stride + element * element_stride}; | ||||
| 
 | ||||
|                 const bool declared{stage != ShaderStage::Fragment || | ||||
|                 const bool declared{stage != ProgramType::Fragment || | ||||
|                                     header.ps.GetAttributeUse(index) != AttributeUse::Unused}; | ||||
|                 const std::string value{declared ? ReadAttribute(attribute, element) : "0"}; | ||||
|                 code.AddLine("case 0x{:x}: return {};", address, value); | ||||
|  | @ -638,7 +646,7 @@ private: | |||
|         } | ||||
| 
 | ||||
|         if (const auto abuf = std::get_if<AbufNode>(&*node)) { | ||||
|             UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ShaderStage::Geometry, | ||||
|             UNIMPLEMENTED_IF_MSG(abuf->IsPhysicalBuffer() && stage == ProgramType::Geometry, | ||||
|                                  "Physical attributes in geometry shaders are not implemented"); | ||||
|             if (abuf->IsPhysicalBuffer()) { | ||||
|                 return fmt::format("readPhysicalAttribute(ftou({}))", | ||||
|  | @ -693,6 +701,9 @@ private: | |||
|         } | ||||
| 
 | ||||
|         if (const auto lmem = std::get_if<LmemNode>(&*node)) { | ||||
|             if (stage == ProgramType::Compute) { | ||||
|                 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||||
|             } | ||||
|             return fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | ||||
|         } | ||||
| 
 | ||||
|  | @ -722,7 +733,7 @@ private: | |||
| 
 | ||||
|     std::string ReadAttribute(Attribute::Index attribute, u32 element, const Node& buffer = {}) { | ||||
|         const auto GeometryPass = [&](std::string_view name) { | ||||
|             if (stage == ShaderStage::Geometry && buffer) { | ||||
|             if (stage == ProgramType::Geometry && buffer) { | ||||
|                 // TODO(Rodrigo): Guard geometry inputs against out of bound reads. Some games
 | ||||
|                 // set an 0x80000000 index for those and the shader fails to build. Find out why
 | ||||
|                 // this happens and what's its intent.
 | ||||
|  | @ -734,10 +745,10 @@ private: | |||
|         switch (attribute) { | ||||
|         case Attribute::Index::Position: | ||||
|             switch (stage) { | ||||
|             case ShaderStage::Geometry: | ||||
|             case ProgramType::Geometry: | ||||
|                 return fmt::format("gl_in[ftou({})].gl_Position{}", Visit(buffer), | ||||
|                                    GetSwizzle(element)); | ||||
|             case ShaderStage::Fragment: | ||||
|             case ProgramType::Fragment: | ||||
|                 return element == 3 ? "1.0f" : ("gl_FragCoord"s + GetSwizzle(element)); | ||||
|             default: | ||||
|                 UNREACHABLE(); | ||||
|  | @ -758,7 +769,7 @@ private: | |||
|             // TODO(Subv): Find out what the values are for the first two elements when inside a
 | ||||
|             // vertex shader, and what's the value of the fourth element when inside a Tess Eval
 | ||||
|             // shader.
 | ||||
|             ASSERT(stage == ShaderStage::Vertex); | ||||
|             ASSERT(IsVertexShader(stage)); | ||||
|             switch (element) { | ||||
|             case 2: | ||||
|                 // Config pack's first value is instance_id.
 | ||||
|  | @ -770,7 +781,7 @@ private: | |||
|             return "0"; | ||||
|         case Attribute::Index::FrontFacing: | ||||
|             // TODO(Subv): Find out what the values are for the other elements.
 | ||||
|             ASSERT(stage == ShaderStage::Fragment); | ||||
|             ASSERT(stage == ProgramType::Fragment); | ||||
|             switch (element) { | ||||
|             case 3: | ||||
|                 return "itof(gl_FrontFacing ? -1 : 0)"; | ||||
|  | @ -792,7 +803,7 @@ private: | |||
|             return value; | ||||
|         } | ||||
|         // There's a bug in NVidia's proprietary drivers that makes precise fail on fragment shaders
 | ||||
|         const std::string precise = stage != ShaderStage::Fragment ? "precise " : ""; | ||||
|         const std::string precise = stage != ProgramType::Fragment ? "precise " : ""; | ||||
| 
 | ||||
|         const std::string temporary = code.GenerateTemporary(); | ||||
|         code.AddLine("{}float {} = {};", precise, temporary, value); | ||||
|  | @ -827,12 +838,12 @@ private: | |||
|                 UNIMPLEMENTED(); | ||||
|                 return {}; | ||||
|             case 1: | ||||
|                 if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | ||||
|                 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | ||||
|                     return {}; | ||||
|                 } | ||||
|                 return std::make_pair("gl_Layer", true); | ||||
|             case 2: | ||||
|                 if (stage == ShaderStage::Vertex && !device.HasVertexViewportLayer()) { | ||||
|                 if (IsVertexShader(stage) && !device.HasVertexViewportLayer()) { | ||||
|                     return {}; | ||||
|                 } | ||||
|                 return std::make_pair("gl_ViewportIndex", true); | ||||
|  | @ -1069,6 +1080,9 @@ private: | |||
|             target = result->first; | ||||
|             is_integer = result->second; | ||||
|         } else if (const auto lmem = std::get_if<LmemNode>(&*dest)) { | ||||
|             if (stage == ProgramType::Compute) { | ||||
|                 LOG_WARNING(Render_OpenGL, "Local memory is stubbed on compute shaders"); | ||||
|             } | ||||
|             target = fmt::format("{}[ftou({}) / 4]", GetLocalMemory(), Visit(lmem->GetAddress())); | ||||
|         } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { | ||||
|             const std::string real = Visit(gmem->GetRealAddress()); | ||||
|  | @ -1622,7 +1636,7 @@ private: | |||
|     } | ||||
| 
 | ||||
|     std::string Exit(Operation operation) { | ||||
|         if (stage != ShaderStage::Fragment) { | ||||
|         if (stage != ProgramType::Fragment) { | ||||
|             code.AddLine("return;"); | ||||
|             return {}; | ||||
|         } | ||||
|  | @ -1673,7 +1687,7 @@ private: | |||
|     } | ||||
| 
 | ||||
|     std::string EmitVertex(Operation operation) { | ||||
|         ASSERT_MSG(stage == ShaderStage::Geometry, | ||||
|         ASSERT_MSG(stage == ProgramType::Geometry, | ||||
|                    "EmitVertex is expected to be used in a geometry shader."); | ||||
| 
 | ||||
|         // If a geometry shader is attached, it will always flip (it's the last stage before
 | ||||
|  | @ -1684,7 +1698,7 @@ private: | |||
|     } | ||||
| 
 | ||||
|     std::string EndPrimitive(Operation operation) { | ||||
|         ASSERT_MSG(stage == ShaderStage::Geometry, | ||||
|         ASSERT_MSG(stage == ProgramType::Geometry, | ||||
|                    "EndPrimitive is expected to be used in a geometry shader."); | ||||
| 
 | ||||
|         code.AddLine("EndPrimitive();"); | ||||
|  | @ -1919,7 +1933,7 @@ private: | |||
|     } | ||||
| 
 | ||||
|     u32 GetNumPhysicalInputAttributes() const { | ||||
|         return stage == ShaderStage::Vertex ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | ||||
|         return IsVertexShader(stage) ? GetNumPhysicalAttributes() : GetNumPhysicalVaryings(); | ||||
|     } | ||||
| 
 | ||||
|     u32 GetNumPhysicalAttributes() const { | ||||
|  | @ -1932,7 +1946,7 @@ private: | |||
| 
 | ||||
|     const Device& device; | ||||
|     const ShaderIR& ir; | ||||
|     const ShaderStage stage; | ||||
|     const ProgramType stage; | ||||
|     const std::string suffix; | ||||
|     const Header header; | ||||
| 
 | ||||
|  | @ -1963,7 +1977,7 @@ std::string GetCommonDeclarations() { | |||
|         MAX_CONSTBUFFER_ELEMENTS); | ||||
| } | ||||
| 
 | ||||
| ProgramResult Decompile(const Device& device, const ShaderIR& ir, Maxwell::ShaderStage stage, | ||||
| ProgramResult Decompile(const Device& device, const ShaderIR& ir, ProgramType stage, | ||||
|                         const std::string& suffix) { | ||||
|     GLSLDecompiler decompiler(device, ir, stage, suffix); | ||||
|     decompiler.Decompile(); | ||||
|  |  | |||
|  | @ -12,14 +12,26 @@ | |||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/shader/shader_ir.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| class Device; | ||||
| } | ||||
| 
 | ||||
| namespace VideoCommon::Shader { | ||||
| class ShaderIR; | ||||
| } | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| class Device; | ||||
| 
 | ||||
| enum class ProgramType : u32 { | ||||
|     VertexA = 0, | ||||
|     VertexB = 1, | ||||
|     TessellationControl = 2, | ||||
|     TessellationEval = 3, | ||||
|     Geometry = 4, | ||||
|     Fragment = 5, | ||||
|     Compute = 6 | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
| 
 | ||||
| namespace OpenGL::GLShader { | ||||
| 
 | ||||
| struct ShaderEntries; | ||||
|  | @ -85,6 +97,6 @@ struct ShaderEntries { | |||
| std::string GetCommonDeclarations(); | ||||
| 
 | ||||
| ProgramResult Decompile(const Device& device, const VideoCommon::Shader::ShaderIR& ir, | ||||
|                         Maxwell::ShaderStage stage, const std::string& suffix); | ||||
|                         ProgramType stage, const std::string& suffix); | ||||
| 
 | ||||
| } // namespace OpenGL::GLShader
 | ||||
|  |  | |||
|  | @ -51,7 +51,7 @@ ShaderCacheVersionHash GetShaderCacheVersionHash() { | |||
| 
 | ||||
| } // namespace
 | ||||
| 
 | ||||
| ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | ||||
| ShaderDiskCacheRaw::ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | ||||
|                                        u32 program_code_size, u32 program_code_size_b, | ||||
|                                        ProgramCode program_code, ProgramCode program_code_b) | ||||
|     : unique_identifier{unique_identifier}, program_type{program_type}, | ||||
|  |  | |||
|  | @ -18,7 +18,6 @@ | |||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "core/file_sys/vfs_vector.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_gen.h" | ||||
| 
 | ||||
| namespace Core { | ||||
|  | @ -34,14 +33,11 @@ namespace OpenGL { | |||
| struct ShaderDiskCacheUsage; | ||||
| struct ShaderDiskCacheDump; | ||||
| 
 | ||||
| using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||||
| 
 | ||||
| using ProgramCode = std::vector<u64>; | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| 
 | ||||
| using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; | ||||
| using TextureBufferUsage = std::bitset<64>; | ||||
| 
 | ||||
| /// Allocated bindings used by an OpenGL shader program.
 | ||||
| /// Allocated bindings used by an OpenGL shader program
 | ||||
| struct BaseBindings { | ||||
|     u32 cbuf{}; | ||||
|     u32 gmem{}; | ||||
|  | @ -126,7 +122,7 @@ namespace OpenGL { | |||
| /// Describes a shader how it's used by the guest GPU
 | ||||
| class ShaderDiskCacheRaw { | ||||
| public: | ||||
|     explicit ShaderDiskCacheRaw(u64 unique_identifier, Maxwell::ShaderProgram program_type, | ||||
|     explicit ShaderDiskCacheRaw(u64 unique_identifier, ProgramType program_type, | ||||
|                                 u32 program_code_size, u32 program_code_size_b, | ||||
|                                 ProgramCode program_code, ProgramCode program_code_b); | ||||
|     ShaderDiskCacheRaw(); | ||||
|  | @ -141,30 +137,13 @@ public: | |||
|     } | ||||
| 
 | ||||
|     bool HasProgramA() const { | ||||
|         return program_type == Maxwell::ShaderProgram::VertexA; | ||||
|         return program_type == ProgramType::VertexA; | ||||
|     } | ||||
| 
 | ||||
|     Maxwell::ShaderProgram GetProgramType() const { | ||||
|     ProgramType GetProgramType() const { | ||||
|         return program_type; | ||||
|     } | ||||
| 
 | ||||
|     Maxwell::ShaderStage GetProgramStage() const { | ||||
|         switch (program_type) { | ||||
|         case Maxwell::ShaderProgram::VertexA: | ||||
|         case Maxwell::ShaderProgram::VertexB: | ||||
|             return Maxwell::ShaderStage::Vertex; | ||||
|         case Maxwell::ShaderProgram::TesselationControl: | ||||
|             return Maxwell::ShaderStage::TesselationControl; | ||||
|         case Maxwell::ShaderProgram::TesselationEval: | ||||
|             return Maxwell::ShaderStage::TesselationEval; | ||||
|         case Maxwell::ShaderProgram::Geometry: | ||||
|             return Maxwell::ShaderStage::Geometry; | ||||
|         case Maxwell::ShaderProgram::Fragment: | ||||
|             return Maxwell::ShaderStage::Fragment; | ||||
|         } | ||||
|         UNREACHABLE(); | ||||
|     } | ||||
| 
 | ||||
|     const ProgramCode& GetProgramCode() const { | ||||
|         return program_code; | ||||
|     } | ||||
|  | @ -175,7 +154,7 @@ public: | |||
| 
 | ||||
| private: | ||||
|     u64 unique_identifier{}; | ||||
|     Maxwell::ShaderProgram program_type{}; | ||||
|     ProgramType program_type{}; | ||||
|     u32 program_code_size{}; | ||||
|     u32 program_code_size_b{}; | ||||
| 
 | ||||
|  |  | |||
|  | @ -14,7 +14,8 @@ using Tegra::Engines::Maxwell3D; | |||
| using VideoCommon::Shader::ProgramCode; | ||||
| using VideoCommon::Shader::ShaderIR; | ||||
| 
 | ||||
| static constexpr u32 PROGRAM_OFFSET{10}; | ||||
| static constexpr u32 PROGRAM_OFFSET = 10; | ||||
| static constexpr u32 COMPUTE_OFFSET = 0; | ||||
| 
 | ||||
| ProgramResult GenerateVertexShader(const Device& device, const ShaderSetup& setup) { | ||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||||
|  | @ -29,17 +30,15 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { | |||
| }; | ||||
| 
 | ||||
| )"; | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||||
|     ProgramResult program = | ||||
|         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Vertex, "vertex"); | ||||
| 
 | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||||
|     const auto stage = setup.IsDualProgram() ? ProgramType::VertexA : ProgramType::VertexB; | ||||
|     ProgramResult program = Decompile(device, program_ir, stage, "vertex"); | ||||
|     out += program.first; | ||||
| 
 | ||||
|     if (setup.IsDualProgram()) { | ||||
|         const ShaderIR program_ir_b(setup.program.code_b, PROGRAM_OFFSET, setup.program.size_b); | ||||
|         ProgramResult program_b = | ||||
|             Decompile(device, program_ir_b, Maxwell3D::Regs::ShaderStage::Vertex, "vertex_b"); | ||||
| 
 | ||||
|         ProgramResult program_b = Decompile(device, program_ir_b, ProgramType::VertexB, "vertex_b"); | ||||
|         out += program_b.first; | ||||
|     } | ||||
| 
 | ||||
|  | @ -80,9 +79,9 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { | |||
| }; | ||||
| 
 | ||||
| )"; | ||||
| 
 | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||||
|     ProgramResult program = | ||||
|         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Geometry, "geometry"); | ||||
|     ProgramResult program = Decompile(device, program_ir, ProgramType::Geometry, "geometry"); | ||||
|     out += program.first; | ||||
| 
 | ||||
|     out += R"( | ||||
|  | @ -116,9 +115,7 @@ layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { | |||
| 
 | ||||
| )"; | ||||
|     const ShaderIR program_ir(setup.program.code, PROGRAM_OFFSET, setup.program.size_a); | ||||
|     ProgramResult program = | ||||
|         Decompile(device, program_ir, Maxwell3D::Regs::ShaderStage::Fragment, "fragment"); | ||||
| 
 | ||||
|     ProgramResult program = Decompile(device, program_ir, ProgramType::Fragment, "fragment"); | ||||
|     out += program.first; | ||||
| 
 | ||||
|     out += R"( | ||||
|  | @ -130,4 +127,22 @@ void main() { | |||
|     return {std::move(out), std::move(program.second)}; | ||||
| } | ||||
| 
 | ||||
| ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup) { | ||||
|     const std::string id = fmt::format("{:016x}", setup.program.unique_identifier); | ||||
| 
 | ||||
|     std::string out = "// Shader Unique Id: CS" + id + "\n\n"; | ||||
|     out += GetCommonDeclarations(); | ||||
| 
 | ||||
|     const ShaderIR program_ir(setup.program.code, COMPUTE_OFFSET, setup.program.size_a); | ||||
|     ProgramResult program = Decompile(device, program_ir, ProgramType::Compute, "compute"); | ||||
|     out += program.first; | ||||
| 
 | ||||
|     out += R"( | ||||
| void main() { | ||||
|     execute_compute(); | ||||
| } | ||||
| )"; | ||||
|     return {std::move(out), std::move(program.second)}; | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL::GLShader
 | ||||
|  |  | |||
|  | @ -54,4 +54,7 @@ ProgramResult GenerateGeometryShader(const Device& device, const ShaderSetup& se | |||
| /// Generates the GLSL fragment shader program source code for the given FS program
 | ||||
| ProgramResult GenerateFragmentShader(const Device& device, const ShaderSetup& setup); | ||||
| 
 | ||||
| /// Generates the GLSL compute shader program source code for the given CS program
 | ||||
| ProgramResult GenerateComputeShader(const Device& device, const ShaderSetup& setup); | ||||
| 
 | ||||
| } // namespace OpenGL::GLShader
 | ||||
|  |  | |||
|  | @ -10,21 +10,25 @@ | |||
| 
 | ||||
| namespace OpenGL::GLShader { | ||||
| 
 | ||||
| GLuint LoadShader(const char* source, GLenum type) { | ||||
|     const char* debug_type; | ||||
| namespace { | ||||
| const char* GetStageDebugName(GLenum type) { | ||||
|     switch (type) { | ||||
|     case GL_VERTEX_SHADER: | ||||
|         debug_type = "vertex"; | ||||
|         break; | ||||
|         return "vertex"; | ||||
|     case GL_GEOMETRY_SHADER: | ||||
|         debug_type = "geometry"; | ||||
|         break; | ||||
|         return "geometry"; | ||||
|     case GL_FRAGMENT_SHADER: | ||||
|         debug_type = "fragment"; | ||||
|         break; | ||||
|     default: | ||||
|         UNREACHABLE(); | ||||
|         return "fragment"; | ||||
|     case GL_COMPUTE_SHADER: | ||||
|         return "compute"; | ||||
|     } | ||||
|     UNIMPLEMENTED(); | ||||
|     return "unknown"; | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| GLuint LoadShader(const char* source, GLenum type) { | ||||
|     const char* debug_type = GetStageDebugName(type); | ||||
|     const GLuint shader_id = glCreateShader(type); | ||||
|     glShaderSource(shader_id, 1, &source, nullptr); | ||||
|     LOG_DEBUG(Render_OpenGL, "Compiling {} shader...", debug_type); | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 bunnei
						bunnei