forked from eden-emu/eden
		
	shader: Initial OpenGL implementation
This commit is contained in:
		
							parent
							
								
									850b08a16c
								
							
						
					
					
						commit
						d621e96d0d
					
				
					 38 changed files with 1427 additions and 705 deletions
				
			
		|  | @ -355,6 +355,10 @@ U32 IREmitter::WorkgroupIdZ() { | |||
|     return U32{CompositeExtract(Inst(Opcode::WorkgroupId), 2)}; | ||||
| } | ||||
| 
 | ||||
| Value IREmitter::LocalInvocationId() { | ||||
|     return Inst(Opcode::LocalInvocationId); | ||||
| } | ||||
| 
 | ||||
| U32 IREmitter::LocalInvocationIdX() { | ||||
|     return U32{CompositeExtract(Inst(Opcode::LocalInvocationId), 0)}; | ||||
| } | ||||
|  |  | |||
|  | @ -95,6 +95,7 @@ public: | |||
|     [[nodiscard]] U32 WorkgroupIdY(); | ||||
|     [[nodiscard]] U32 WorkgroupIdZ(); | ||||
| 
 | ||||
|     [[nodiscard]] Value LocalInvocationId(); | ||||
|     [[nodiscard]] U32 LocalInvocationIdX(); | ||||
|     [[nodiscard]] U32 LocalInvocationIdY(); | ||||
|     [[nodiscard]] U32 LocalInvocationIdZ(); | ||||
|  |  | |||
|  | @ -120,6 +120,13 @@ enum class SpecialRegister : u64 { | |||
|     case SpecialRegister::SR_INVOCATION_INFO: | ||||
|         // LOG_WARNING(..., "SR_INVOCATION_INFO is stubbed");
 | ||||
|         return ir.Imm32(0x00ff'0000); | ||||
|     case SpecialRegister::SR_TID: { | ||||
|         const IR::Value tid{ir.LocalInvocationId()}; | ||||
|         return ir.BitFieldInsert(ir.BitFieldInsert(IR::U32{ir.CompositeExtract(tid, 0)}, | ||||
|                                                    IR::U32{ir.CompositeExtract(tid, 1)}, | ||||
|                                                    ir.Imm32(16), ir.Imm32(8)), | ||||
|                                  IR::U32{ir.CompositeExtract(tid, 2)}, ir.Imm32(26), ir.Imm32(6)); | ||||
|     } | ||||
|     case SpecialRegister::SR_TID_X: | ||||
|         return ir.LocalInvocationIdX(); | ||||
|     case SpecialRegister::SR_TID_Y: | ||||
|  |  | |||
|  | @ -67,10 +67,14 @@ add_library(video_core STATIC | |||
|     renderer_base.h | ||||
|     renderer_opengl/gl_buffer_cache.cpp | ||||
|     renderer_opengl/gl_buffer_cache.h | ||||
|     renderer_opengl/gl_compute_program.cpp | ||||
|     renderer_opengl/gl_compute_program.h | ||||
|     renderer_opengl/gl_device.cpp | ||||
|     renderer_opengl/gl_device.h | ||||
|     renderer_opengl/gl_fence_manager.cpp | ||||
|     renderer_opengl/gl_fence_manager.h | ||||
|     renderer_opengl/gl_graphics_program.cpp | ||||
|     renderer_opengl/gl_graphics_program.h | ||||
|     renderer_opengl/gl_rasterizer.cpp | ||||
|     renderer_opengl/gl_rasterizer.h | ||||
|     renderer_opengl/gl_resource_manager.cpp | ||||
|  |  | |||
|  | @ -70,8 +70,8 @@ class BufferCache { | |||
|         P::HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT; | ||||
|     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = P::NEEDS_BIND_UNIFORM_INDEX; | ||||
|     static constexpr bool NEEDS_BIND_STORAGE_INDEX = P::NEEDS_BIND_STORAGE_INDEX; | ||||
|     static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = P::NEEDS_BIND_TEXTURE_BUFFER_INDEX; | ||||
|     static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; | ||||
|     static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; | ||||
| 
 | ||||
|     static constexpr BufferId NULL_BUFFER_ID{0}; | ||||
| 
 | ||||
|  | @ -154,7 +154,7 @@ public: | |||
|     void UnbindGraphicsTextureBuffers(size_t stage); | ||||
| 
 | ||||
|     void BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||||
|                                    PixelFormat format, bool is_written); | ||||
|                                    PixelFormat format, bool is_written, bool is_image); | ||||
| 
 | ||||
|     void UnbindComputeStorageBuffers(); | ||||
| 
 | ||||
|  | @ -164,7 +164,7 @@ public: | |||
|     void UnbindComputeTextureBuffers(); | ||||
| 
 | ||||
|     void BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, PixelFormat format, | ||||
|                                   bool is_written); | ||||
|                                   bool is_written, bool is_image); | ||||
| 
 | ||||
|     void FlushCachedWrites(); | ||||
| 
 | ||||
|  | @ -197,6 +197,7 @@ public: | |||
|     [[nodiscard]] bool IsRegionCpuModified(VAddr addr, size_t size); | ||||
| 
 | ||||
|     std::mutex mutex; | ||||
|     Runtime& runtime; | ||||
| 
 | ||||
| private: | ||||
|     template <typename Func> | ||||
|  | @ -366,7 +367,6 @@ private: | |||
|     Tegra::Engines::KeplerCompute& kepler_compute; | ||||
|     Tegra::MemoryManager& gpu_memory; | ||||
|     Core::Memory::Memory& cpu_memory; | ||||
|     Runtime& runtime; | ||||
| 
 | ||||
|     SlotVector<Buffer> slot_buffers; | ||||
|     DelayedDestructionRing<Buffer, 8> delayed_destruction_ring; | ||||
|  | @ -394,8 +394,10 @@ private: | |||
| 
 | ||||
|     std::array<u32, NUM_STAGES> enabled_texture_buffers{}; | ||||
|     std::array<u32, NUM_STAGES> written_texture_buffers{}; | ||||
|     std::array<u32, NUM_STAGES> image_texture_buffers{}; | ||||
|     u32 enabled_compute_texture_buffers = 0; | ||||
|     u32 written_compute_texture_buffers = 0; | ||||
|     u32 image_compute_texture_buffers = 0; | ||||
| 
 | ||||
|     std::array<u32, NUM_STAGES> fast_bound_uniform_buffers{}; | ||||
| 
 | ||||
|  | @ -431,8 +433,8 @@ BufferCache<P>::BufferCache(VideoCore::RasterizerInterface& rasterizer_, | |||
|                             Tegra::Engines::KeplerCompute& kepler_compute_, | ||||
|                             Tegra::MemoryManager& gpu_memory_, Core::Memory::Memory& cpu_memory_, | ||||
|                             Runtime& runtime_) | ||||
|     : rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, kepler_compute{kepler_compute_}, | ||||
|       gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { | ||||
|     : runtime{runtime_}, rasterizer{rasterizer_}, maxwell3d{maxwell3d_}, | ||||
|       kepler_compute{kepler_compute_}, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_} { | ||||
|     // Ensure the first slot is used for the null buffer
 | ||||
|     void(slot_buffers.insert(runtime, NullBufferParams{})); | ||||
|     deletion_iterator = slot_buffers.end(); | ||||
|  | @ -703,13 +705,18 @@ template <class P> | |||
| void BufferCache<P>::UnbindGraphicsTextureBuffers(size_t stage) { | ||||
|     enabled_texture_buffers[stage] = 0; | ||||
|     written_texture_buffers[stage] = 0; | ||||
|     image_texture_buffers[stage] = 0; | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void BufferCache<P>::BindGraphicsTextureBuffer(size_t stage, size_t tbo_index, GPUVAddr gpu_addr, | ||||
|                                                u32 size, PixelFormat format, bool is_written) { | ||||
|                                                u32 size, PixelFormat format, bool is_written, | ||||
|                                                bool is_image) { | ||||
|     enabled_texture_buffers[stage] |= 1U << tbo_index; | ||||
|     written_texture_buffers[stage] |= (is_written ? 1U : 0U) << tbo_index; | ||||
|     if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||||
|         image_texture_buffers[stage] |= (is_image ? 1U : 0U) << tbo_index; | ||||
|     } | ||||
|     texture_buffers[stage][tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | ||||
| } | ||||
| 
 | ||||
|  | @ -717,6 +724,7 @@ template <class P> | |||
| void BufferCache<P>::UnbindComputeStorageBuffers() { | ||||
|     enabled_compute_storage_buffers = 0; | ||||
|     written_compute_storage_buffers = 0; | ||||
|     image_compute_texture_buffers = 0; | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
|  | @ -737,13 +745,17 @@ template <class P> | |||
| void BufferCache<P>::UnbindComputeTextureBuffers() { | ||||
|     enabled_compute_texture_buffers = 0; | ||||
|     written_compute_texture_buffers = 0; | ||||
|     image_compute_texture_buffers = 0; | ||||
| } | ||||
| 
 | ||||
| template <class P> | ||||
| void BufferCache<P>::BindComputeTextureBuffer(size_t tbo_index, GPUVAddr gpu_addr, u32 size, | ||||
|                                               PixelFormat format, bool is_written) { | ||||
|                                               PixelFormat format, bool is_written, bool is_image) { | ||||
|     enabled_compute_texture_buffers |= 1U << tbo_index; | ||||
|     written_compute_texture_buffers |= (is_written ? 1U : 0U) << tbo_index; | ||||
|     if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||||
|         image_compute_texture_buffers |= (is_image ? 1U : 0U) << tbo_index; | ||||
|     } | ||||
|     compute_texture_buffers[tbo_index] = GetTextureBufferBinding(gpu_addr, size, format); | ||||
| } | ||||
| 
 | ||||
|  | @ -1057,7 +1069,6 @@ void BufferCache<P>::BindHostGraphicsStorageBuffers(size_t stage) { | |||
| 
 | ||||
| template <class P> | ||||
| void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | ||||
|     u32 binding_index = 0; | ||||
|     ForEachEnabledBit(enabled_texture_buffers[stage], [&](u32 index) { | ||||
|         const TextureBufferBinding& binding = texture_buffers[stage][index]; | ||||
|         Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||
|  | @ -1066,9 +1077,12 @@ void BufferCache<P>::BindHostGraphicsTextureBuffers(size_t stage) { | |||
| 
 | ||||
|         const u32 offset = buffer.Offset(binding.cpu_addr); | ||||
|         const PixelFormat format = binding.format; | ||||
|         if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { | ||||
|             runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); | ||||
|             ++binding_index; | ||||
|         if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||||
|             if (((image_texture_buffers[stage] >> index) & 1) != 0) { | ||||
|                 runtime.BindImageBuffer(buffer, offset, size, format); | ||||
|             } else { | ||||
|                 runtime.BindTextureBuffer(buffer, offset, size, format); | ||||
|             } | ||||
|         } else { | ||||
|             runtime.BindTextureBuffer(buffer, offset, size, format); | ||||
|         } | ||||
|  | @ -1139,7 +1153,6 @@ void BufferCache<P>::BindHostComputeStorageBuffers() { | |||
| 
 | ||||
| template <class P> | ||||
| void BufferCache<P>::BindHostComputeTextureBuffers() { | ||||
|     u32 binding_index = 0; | ||||
|     ForEachEnabledBit(enabled_compute_texture_buffers, [&](u32 index) { | ||||
|         const TextureBufferBinding& binding = compute_texture_buffers[index]; | ||||
|         Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||
|  | @ -1148,9 +1161,12 @@ void BufferCache<P>::BindHostComputeTextureBuffers() { | |||
| 
 | ||||
|         const u32 offset = buffer.Offset(binding.cpu_addr); | ||||
|         const PixelFormat format = binding.format; | ||||
|         if constexpr (NEEDS_BIND_TEXTURE_BUFFER_INDEX) { | ||||
|             runtime.BindTextureBuffer(binding_index, buffer, offset, size, format); | ||||
|             ++binding_index; | ||||
|         if constexpr (SEPARATE_IMAGE_BUFFERS_BINDINGS) { | ||||
|             if (((image_compute_texture_buffers >> index) & 1) != 0) { | ||||
|                 runtime.BindImageBuffer(buffer, offset, size, format); | ||||
|             } else { | ||||
|                 runtime.BindTextureBuffer(buffer, offset, size, format); | ||||
|             } | ||||
|         } else { | ||||
|             runtime.BindTextureBuffer(buffer, offset, size, format); | ||||
|         } | ||||
|  | @ -1339,11 +1355,10 @@ void BufferCache<P>::UpdateComputeStorageBuffers() { | |||
|     ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { | ||||
|         // Resolve buffer
 | ||||
|         Binding& binding = compute_storage_buffers[index]; | ||||
|         const BufferId buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||||
|         binding.buffer_id = buffer_id; | ||||
|         binding.buffer_id = FindBuffer(binding.cpu_addr, binding.size); | ||||
|         // Mark as written if needed
 | ||||
|         if (((written_compute_storage_buffers >> index) & 1) != 0) { | ||||
|             MarkWrittenBuffer(buffer_id, binding.cpu_addr, binding.size); | ||||
|             MarkWrittenBuffer(binding.buffer_id, binding.cpu_addr, binding.size); | ||||
|         } | ||||
|     }); | ||||
| } | ||||
|  |  | |||
|  | @ -2,14 +2,18 @@ | |||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <span> | ||||
| 
 | ||||
| #include "video_core/buffer_cache/buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| namespace { | ||||
| using VideoCore::Surface::PixelFormat; | ||||
| 
 | ||||
| struct BindlessSSBO { | ||||
|     GLuint64EXT address; | ||||
|     GLsizei length; | ||||
|  | @ -62,6 +66,26 @@ void Buffer::MakeResident(GLenum access) noexcept { | |||
|     glMakeNamedBufferResidentNV(buffer.handle, access); | ||||
| } | ||||
| 
 | ||||
| GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) { | ||||
|     const auto it{std::ranges::find_if(views, [offset, size, format](const BufferView& view) { | ||||
|         return offset == view.offset && size == view.size && format == view.format; | ||||
|     })}; | ||||
|     if (it != views.end()) { | ||||
|         return it->texture.handle; | ||||
|     } | ||||
|     OGLTexture texture; | ||||
|     texture.Create(GL_TEXTURE_BUFFER); | ||||
|     const GLenum gl_format{MaxwellToGL::GetFormatTuple(format).internal_format}; | ||||
|     glTextureBufferRange(texture.handle, gl_format, buffer.handle, offset, size); | ||||
|     views.push_back({ | ||||
|         .offset = offset, | ||||
|         .size = size, | ||||
|         .format = format, | ||||
|         .texture = std::move(texture), | ||||
|     }); | ||||
|     return views.back().texture.handle; | ||||
| } | ||||
| 
 | ||||
| BufferCacheRuntime::BufferCacheRuntime(const Device& device_) | ||||
|     : device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | ||||
|       use_assembly_shaders{device.UseAssemblyShaders()}, | ||||
|  | @ -144,7 +168,7 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff | |||
|         glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, | ||||
|                             static_cast<GLsizeiptr>(size)); | ||||
|     } else { | ||||
|         const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||||
|         const GLuint base_binding = graphics_base_uniform_bindings[stage]; | ||||
|         const GLuint binding = base_binding + binding_index; | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer.Handle(), | ||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
|  | @ -181,7 +205,7 @@ void BufferCacheRuntime::BindStorageBuffer(size_t stage, u32 binding_index, Buff | |||
|         glProgramLocalParametersI4uivNV(PROGRAM_LUT[stage], binding_index, 1, | ||||
|                                         reinterpret_cast<const GLuint*>(&ssbo)); | ||||
|     } else { | ||||
|         const GLuint base_binding = device.GetBaseBindings(stage).shader_storage_buffer; | ||||
|         const GLuint base_binding = graphics_base_storage_bindings[stage]; | ||||
|         const GLuint binding = base_binding + binding_index; | ||||
|         glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, buffer.Handle(), | ||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
|  | @ -213,4 +237,13 @@ void BufferCacheRuntime::BindTransformFeedbackBuffer(u32 index, Buffer& buffer, | |||
|                       static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | ||||
|                                            PixelFormat format) { | ||||
|     *texture_handles++ = buffer.View(offset, size, format); | ||||
| } | ||||
| 
 | ||||
| void BufferCacheRuntime::BindImageBuffer(Buffer& buffer, u32 offset, u32 size, PixelFormat format) { | ||||
|     *image_handles++ = buffer.View(offset, size, format); | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -32,6 +32,8 @@ public: | |||
| 
 | ||||
|     void MakeResident(GLenum access) noexcept; | ||||
| 
 | ||||
|     [[nodiscard]] GLuint View(u32 offset, u32 size, VideoCore::Surface::PixelFormat format); | ||||
| 
 | ||||
|     [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | ||||
|         return address; | ||||
|     } | ||||
|  | @ -41,9 +43,17 @@ public: | |||
|     } | ||||
| 
 | ||||
| private: | ||||
|     struct BufferView { | ||||
|         u32 offset; | ||||
|         u32 size; | ||||
|         VideoCore::Surface::PixelFormat format; | ||||
|         OGLTexture texture; | ||||
|     }; | ||||
| 
 | ||||
|     GLuint64EXT address = 0; | ||||
|     OGLBuffer buffer; | ||||
|     GLenum current_residency_access = GL_NONE; | ||||
|     std::vector<BufferView> views; | ||||
| }; | ||||
| 
 | ||||
| class BufferCacheRuntime { | ||||
|  | @ -75,13 +85,19 @@ public: | |||
| 
 | ||||
|     void BindTransformFeedbackBuffer(u32 index, Buffer& buffer, u32 offset, u32 size); | ||||
| 
 | ||||
|     void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size, | ||||
|                            VideoCore::Surface::PixelFormat format); | ||||
| 
 | ||||
|     void BindImageBuffer(Buffer& buffer, u32 offset, u32 size, | ||||
|                          VideoCore::Surface::PixelFormat format); | ||||
| 
 | ||||
|     void BindFastUniformBuffer(size_t stage, u32 binding_index, u32 size) { | ||||
|         if (use_assembly_shaders) { | ||||
|             const GLuint handle = fast_uniforms[stage][binding_index].handle; | ||||
|             const GLsizeiptr gl_size = static_cast<GLsizeiptr>(size); | ||||
|             glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, gl_size); | ||||
|         } else { | ||||
|             const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||||
|             const GLuint base_binding = graphics_base_uniform_bindings[stage]; | ||||
|             const GLuint binding = base_binding + binding_index; | ||||
|             glBindBufferRange(GL_UNIFORM_BUFFER, binding, | ||||
|                               fast_uniforms[stage][binding_index].handle, 0, | ||||
|  | @ -103,7 +119,7 @@ public: | |||
| 
 | ||||
|     std::span<u8> BindMappedUniformBuffer(size_t stage, u32 binding_index, u32 size) noexcept { | ||||
|         const auto [mapped_span, offset] = stream_buffer->Request(static_cast<size_t>(size)); | ||||
|         const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||||
|         const GLuint base_binding = graphics_base_uniform_bindings[stage]; | ||||
|         const GLuint binding = base_binding + binding_index; | ||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, binding, stream_buffer->Handle(), | ||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||
|  | @ -118,6 +134,19 @@ public: | |||
|         return has_fast_buffer_sub_data; | ||||
|     } | ||||
| 
 | ||||
|     void SetBaseUniformBindings(const std::array<GLuint, 5>& bindings) { | ||||
|         graphics_base_uniform_bindings = bindings; | ||||
|     } | ||||
| 
 | ||||
|     void SetBaseStorageBindings(const std::array<GLuint, 5>& bindings) { | ||||
|         graphics_base_storage_bindings = bindings; | ||||
|     } | ||||
| 
 | ||||
|     void SetImagePointers(GLuint* texture_handles_, GLuint* image_handles_) { | ||||
|         texture_handles = texture_handles_; | ||||
|         image_handles = image_handles_; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     static constexpr std::array PABO_LUT{ | ||||
|         GL_VERTEX_PROGRAM_PARAMETER_BUFFER_NV,          GL_TESS_CONTROL_PROGRAM_PARAMETER_BUFFER_NV, | ||||
|  | @ -133,6 +162,11 @@ private: | |||
| 
 | ||||
|     u32 max_attributes = 0; | ||||
| 
 | ||||
|     std::array<GLuint, 5> graphics_base_uniform_bindings{}; | ||||
|     std::array<GLuint, 5> graphics_base_storage_bindings{}; | ||||
|     GLuint* texture_handles = nullptr; | ||||
|     GLuint* image_handles = nullptr; | ||||
| 
 | ||||
|     std::optional<StreamBuffer> stream_buffer; | ||||
| 
 | ||||
|     std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||||
|  | @ -155,8 +189,8 @@ struct BufferCacheParams { | |||
|     static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; | ||||
|     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; | ||||
|     static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; | ||||
|     static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = true; | ||||
|     static constexpr bool USE_MEMORY_MAPS = false; | ||||
|     static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; | ||||
| }; | ||||
| 
 | ||||
| using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||||
|  |  | |||
							
								
								
									
										178
									
								
								src/video_core/renderer_opengl/gl_compute_program.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										178
									
								
								src/video_core/renderer_opengl/gl_compute_program.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,178 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <cstring> | ||||
| 
 | ||||
| #include "common/cityhash.h" | ||||
| #include "video_core/renderer_opengl/gl_compute_program.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| using Shader::ImageBufferDescriptor; | ||||
| using Tegra::Texture::TexturePair; | ||||
| using VideoCommon::ImageId; | ||||
| 
 | ||||
| constexpr u32 MAX_TEXTURES = 64; | ||||
| constexpr u32 MAX_IMAGES = 16; | ||||
| 
 | ||||
| size_t ComputeProgramKey::Hash() const noexcept { | ||||
|     return static_cast<size_t>( | ||||
|         Common::CityHash64(reinterpret_cast<const char*>(this), sizeof *this)); | ||||
| } | ||||
| 
 | ||||
| bool ComputeProgramKey::operator==(const ComputeProgramKey& rhs) const noexcept { | ||||
|     return std::memcmp(this, &rhs, sizeof *this) == 0; | ||||
| } | ||||
| 
 | ||||
| ComputeProgram::ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                                Tegra::MemoryManager& gpu_memory_, | ||||
|                                Tegra::Engines::KeplerCompute& kepler_compute_, | ||||
|                                ProgramManager& program_manager_, OGLProgram program_, | ||||
|                                const Shader::Info& info_) | ||||
|     : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_}, | ||||
|       kepler_compute{kepler_compute_}, | ||||
|       program_manager{program_manager_}, program{std::move(program_)}, info{info_} { | ||||
|     for (const auto& desc : info.texture_buffer_descriptors) { | ||||
|         num_texture_buffers += desc.count; | ||||
|     } | ||||
|     for (const auto& desc : info.image_buffer_descriptors) { | ||||
|         num_image_buffers += desc.count; | ||||
|     } | ||||
|     u32 num_textures = num_texture_buffers; | ||||
|     for (const auto& desc : info.texture_descriptors) { | ||||
|         num_textures += desc.count; | ||||
|     } | ||||
|     ASSERT(num_textures <= MAX_TEXTURES); | ||||
| 
 | ||||
|     u32 num_images = num_image_buffers; | ||||
|     for (const auto& desc : info.image_descriptors) { | ||||
|         num_images += desc.count; | ||||
|     } | ||||
|     ASSERT(num_images <= MAX_IMAGES); | ||||
| } | ||||
| 
 | ||||
| void ComputeProgram::Configure() { | ||||
|     buffer_cache.SetEnabledComputeUniformBuffers(info.constant_buffer_mask); | ||||
|     buffer_cache.UnbindComputeStorageBuffers(); | ||||
|     size_t ssbo_index{}; | ||||
|     for (const auto& desc : info.storage_buffers_descriptors) { | ||||
|         ASSERT(desc.count == 1); | ||||
|         buffer_cache.BindComputeStorageBuffer(ssbo_index, desc.cbuf_index, desc.cbuf_offset, | ||||
|                                               desc.is_written); | ||||
|         ++ssbo_index; | ||||
|     } | ||||
|     texture_cache.SynchronizeComputeDescriptors(); | ||||
| 
 | ||||
|     std::array<ImageViewId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | ||||
|     boost::container::static_vector<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | ||||
|     std::array<GLuint, MAX_TEXTURES> samplers; | ||||
|     std::array<GLuint, MAX_TEXTURES> textures; | ||||
|     std::array<GLuint, MAX_IMAGES> images; | ||||
|     GLsizei sampler_binding{}; | ||||
|     GLsizei texture_binding{}; | ||||
|     GLsizei image_binding{}; | ||||
| 
 | ||||
|     const auto& qmd{kepler_compute.launch_description}; | ||||
|     const auto& cbufs{qmd.const_buffer_config}; | ||||
|     const bool via_header_index{qmd.linked_tsc != 0}; | ||||
|     const auto read_handle{[&](const auto& desc, u32 index) { | ||||
|         ASSERT(((qmd.const_buffer_enable_mask >> desc.cbuf_index) & 1) != 0); | ||||
|         const u32 index_offset{index << desc.size_shift}; | ||||
|         const u32 offset{desc.cbuf_offset + index_offset}; | ||||
|         const GPUVAddr addr{cbufs[desc.cbuf_index].Address() + offset}; | ||||
|         if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||||
|                       std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||||
|             if (desc.has_secondary) { | ||||
|                 ASSERT(((qmd.const_buffer_enable_mask >> desc.secondary_cbuf_index) & 1) != 0); | ||||
|                 const u32 secondary_offset{desc.secondary_cbuf_offset + index_offset}; | ||||
|                 const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].Address() + | ||||
|                                              secondary_offset}; | ||||
|                 const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||||
|                 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||||
|                 return TexturePair(lhs_raw | rhs_raw, via_header_index); | ||||
|             } | ||||
|         } | ||||
|         return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||||
|     }}; | ||||
|     const auto add_image{[&](const auto& desc) { | ||||
|         for (u32 index = 0; index < desc.count; ++index) { | ||||
|             const auto handle{read_handle(desc, index)}; | ||||
|             image_view_indices.push_back(handle.first); | ||||
|         } | ||||
|     }}; | ||||
|     for (const auto& desc : info.texture_buffer_descriptors) { | ||||
|         for (u32 index = 0; index < desc.count; ++index) { | ||||
|             const auto handle{read_handle(desc, index)}; | ||||
|             image_view_indices.push_back(handle.first); | ||||
|             samplers[sampler_binding++] = 0; | ||||
|         } | ||||
|     } | ||||
|     std::ranges::for_each(info.image_buffer_descriptors, add_image); | ||||
|     for (const auto& desc : info.texture_descriptors) { | ||||
|         for (u32 index = 0; index < desc.count; ++index) { | ||||
|             const auto handle{read_handle(desc, index)}; | ||||
|             image_view_indices.push_back(handle.first); | ||||
| 
 | ||||
|             Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); | ||||
|             samplers[sampler_binding++] = sampler->Handle(); | ||||
|         } | ||||
|     } | ||||
|     std::ranges::for_each(info.image_descriptors, add_image); | ||||
| 
 | ||||
|     const std::span indices_span(image_view_indices.data(), image_view_indices.size()); | ||||
|     texture_cache.FillComputeImageViews(indices_span, image_view_ids); | ||||
| 
 | ||||
|     buffer_cache.UnbindComputeTextureBuffers(); | ||||
|     size_t texbuf_index{}; | ||||
|     const auto add_buffer{[&](const auto& desc) { | ||||
|         constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||||
|         for (u32 i = 0; i < desc.count; ++i) { | ||||
|             bool is_written{false}; | ||||
|             if constexpr (is_image) { | ||||
|                 is_written = desc.is_written; | ||||
|             } | ||||
|             ImageView& image_view{texture_cache.GetImageView(image_view_ids[texbuf_index])}; | ||||
|             buffer_cache.BindComputeTextureBuffer(texbuf_index, image_view.GpuAddr(), | ||||
|                                                   image_view.BufferSize(), image_view.format, | ||||
|                                                   is_written, is_image); | ||||
|             ++texbuf_index; | ||||
|         } | ||||
|     }}; | ||||
|     std::ranges::for_each(info.texture_buffer_descriptors, add_buffer); | ||||
|     std::ranges::for_each(info.image_buffer_descriptors, add_buffer); | ||||
| 
 | ||||
|     buffer_cache.UpdateComputeBuffers(); | ||||
| 
 | ||||
|     buffer_cache.runtime.SetImagePointers(textures.data(), images.data()); | ||||
|     buffer_cache.BindHostComputeBuffers(); | ||||
| 
 | ||||
|     const ImageId* views_it{image_view_ids.data() + num_texture_buffers + num_image_buffers}; | ||||
|     texture_binding += num_texture_buffers; | ||||
|     image_binding += num_image_buffers; | ||||
| 
 | ||||
|     for (const auto& desc : info.texture_descriptors) { | ||||
|         for (u32 index = 0; index < desc.count; ++index) { | ||||
|             ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||||
|             textures[texture_binding++] = image_view.Handle(desc.type); | ||||
|         } | ||||
|     } | ||||
|     for (const auto& desc : info.image_descriptors) { | ||||
|         for (u32 index = 0; index < desc.count; ++index) { | ||||
|             ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||||
|             images[image_binding++] = image_view.Handle(desc.type); | ||||
|         } | ||||
|     } | ||||
|     if (texture_binding != 0) { | ||||
|         ASSERT(texture_binding == sampler_binding); | ||||
|         glBindTextures(0, texture_binding, textures.data()); | ||||
|         glBindSamplers(0, sampler_binding, samplers.data()); | ||||
|     } | ||||
|     if (image_binding != 0) { | ||||
|         glBindImageTextures(0, image_binding, images.data()); | ||||
|     } | ||||
|     program_manager.BindProgram(program.handle); | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
							
								
								
									
										83
									
								
								src/video_core/renderer_opengl/gl_compute_program.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										83
									
								
								src/video_core/renderer_opengl/gl_compute_program.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,83 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <type_traits> | ||||
| #include <utility> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/shader_info.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
| class MemoryManager; | ||||
| } | ||||
| 
 | ||||
| namespace Tegra::Engines { | ||||
| class KeplerCompute; | ||||
| } | ||||
| 
 | ||||
| namespace Shader { | ||||
| struct Info; | ||||
| } | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| class ProgramManager; | ||||
| 
 | ||||
| struct ComputeProgramKey { | ||||
|     u64 unique_hash; | ||||
|     u32 shared_memory_size; | ||||
|     std::array<u32, 3> workgroup_size; | ||||
| 
 | ||||
|     size_t Hash() const noexcept; | ||||
| 
 | ||||
|     bool operator==(const ComputeProgramKey&) const noexcept; | ||||
| 
 | ||||
|     bool operator!=(const ComputeProgramKey& rhs) const noexcept { | ||||
|         return !operator==(rhs); | ||||
|     } | ||||
| }; | ||||
| static_assert(std::has_unique_object_representations_v<ComputeProgramKey>); | ||||
| static_assert(std::is_trivially_copyable_v<ComputeProgramKey>); | ||||
| static_assert(std::is_trivially_constructible_v<ComputeProgramKey>); | ||||
| 
 | ||||
| class ComputeProgram { | ||||
| public: | ||||
|     explicit ComputeProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                             Tegra::MemoryManager& gpu_memory_, | ||||
|                             Tegra::Engines::KeplerCompute& kepler_compute_, | ||||
|                             ProgramManager& program_manager_, OGLProgram program_, | ||||
|                             const Shader::Info& info_); | ||||
| 
 | ||||
|     void Configure(); | ||||
| 
 | ||||
| private: | ||||
|     TextureCache& texture_cache; | ||||
|     BufferCache& buffer_cache; | ||||
|     Tegra::MemoryManager& gpu_memory; | ||||
|     Tegra::Engines::KeplerCompute& kepler_compute; | ||||
|     ProgramManager& program_manager; | ||||
| 
 | ||||
|     OGLProgram program; | ||||
|     Shader::Info info; | ||||
| 
 | ||||
|     u32 num_texture_buffers{}; | ||||
|     u32 num_image_buffers{}; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
| 
 | ||||
| namespace std { | ||||
| template <> | ||||
| struct hash<OpenGL::ComputeProgramKey> { | ||||
|     size_t operator()(const OpenGL::ComputeProgramKey& k) const noexcept { | ||||
|         return k.Hash(); | ||||
|     } | ||||
| }; | ||||
| } // namespace std
 | ||||
|  | @ -22,34 +22,11 @@ | |||
| 
 | ||||
| namespace OpenGL { | ||||
| namespace { | ||||
| // One uniform block is reserved for emulation purposes
 | ||||
| constexpr u32 ReservedUniformBlocks = 1; | ||||
| 
 | ||||
| constexpr u32 NumStages = 5; | ||||
| 
 | ||||
| constexpr std::array LIMIT_UBOS = { | ||||
|     GL_MAX_VERTEX_UNIFORM_BLOCKS,          GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, | ||||
|     GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, GL_MAX_GEOMETRY_UNIFORM_BLOCKS, | ||||
|     GL_MAX_FRAGMENT_UNIFORM_BLOCKS,        GL_MAX_COMPUTE_UNIFORM_BLOCKS, | ||||
| }; | ||||
| constexpr std::array LIMIT_SSBOS = { | ||||
|     GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS,          GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, | ||||
|     GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, | ||||
|     GL_MAX_FRAGMENT_SHADER_STORAGE_BLOCKS,        GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, | ||||
| }; | ||||
| constexpr std::array LIMIT_SAMPLERS = { | ||||
|     GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, | ||||
|     GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, | ||||
|     GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, | ||||
|     GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, | ||||
|     GL_MAX_TEXTURE_IMAGE_UNITS, | ||||
|     GL_MAX_COMPUTE_TEXTURE_IMAGE_UNITS, | ||||
| }; | ||||
| constexpr std::array LIMIT_IMAGES = { | ||||
|     GL_MAX_VERTEX_IMAGE_UNIFORMS,          GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS, | ||||
|     GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS, GL_MAX_GEOMETRY_IMAGE_UNIFORMS, | ||||
|     GL_MAX_FRAGMENT_IMAGE_UNIFORMS,        GL_MAX_COMPUTE_IMAGE_UNIFORMS, | ||||
| }; | ||||
| 
 | ||||
| template <typename T> | ||||
| T GetInteger(GLenum pname) { | ||||
|  | @ -82,15 +59,6 @@ bool HasExtension(std::span<const std::string_view> extensions, std::string_view | |||
|     return std::ranges::find(extensions, extension) != extensions.end(); | ||||
| } | ||||
| 
 | ||||
| u32 Extract(u32& base, u32& num, u32 amount, std::optional<GLenum> limit = {}) { | ||||
|     ASSERT(num >= amount); | ||||
|     if (limit) { | ||||
|         amount = std::min(amount, GetInteger<u32>(*limit)); | ||||
|     } | ||||
|     num -= amount; | ||||
|     return std::exchange(base, base + amount); | ||||
| } | ||||
| 
 | ||||
| std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcept { | ||||
|     std::array<u32, Tegra::Engines::MaxShaderTypes> max; | ||||
|     std::ranges::transform(LIMIT_UBOS, max.begin(), | ||||
|  | @ -98,62 +66,6 @@ std::array<u32, Tegra::Engines::MaxShaderTypes> BuildMaxUniformBuffers() noexcep | |||
|     return max; | ||||
| } | ||||
| 
 | ||||
| std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> BuildBaseBindings() noexcept { | ||||
|     std::array<Device::BaseBindings, Tegra::Engines::MaxShaderTypes> bindings; | ||||
| 
 | ||||
|     static constexpr std::array<std::size_t, 5> stage_swizzle{0, 1, 2, 3, 4}; | ||||
|     const u32 total_ubos = GetInteger<u32>(GL_MAX_UNIFORM_BUFFER_BINDINGS); | ||||
|     const u32 total_ssbos = GetInteger<u32>(GL_MAX_SHADER_STORAGE_BUFFER_BINDINGS); | ||||
|     const u32 total_samplers = GetInteger<u32>(GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS); | ||||
| 
 | ||||
|     u32 num_ubos = total_ubos - ReservedUniformBlocks; | ||||
|     u32 num_ssbos = total_ssbos; | ||||
|     u32 num_samplers = total_samplers; | ||||
| 
 | ||||
|     u32 base_ubo = ReservedUniformBlocks; | ||||
|     u32 base_ssbo = 0; | ||||
|     u32 base_samplers = 0; | ||||
| 
 | ||||
|     for (std::size_t i = 0; i < NumStages; ++i) { | ||||
|         const std::size_t stage = stage_swizzle[i]; | ||||
|         bindings[stage] = { | ||||
|             Extract(base_ubo, num_ubos, total_ubos / NumStages, LIMIT_UBOS[stage]), | ||||
|             Extract(base_ssbo, num_ssbos, total_ssbos / NumStages, LIMIT_SSBOS[stage]), | ||||
|             Extract(base_samplers, num_samplers, total_samplers / NumStages, | ||||
|                     LIMIT_SAMPLERS[stage])}; | ||||
|     } | ||||
| 
 | ||||
|     u32 num_images = GetInteger<u32>(GL_MAX_IMAGE_UNITS); | ||||
|     u32 base_images = 0; | ||||
| 
 | ||||
|     // GL_MAX_IMAGE_UNITS is guaranteed by the spec to have a minimum value of 8.
 | ||||
|     // Due to the limitation of GL_MAX_IMAGE_UNITS, reserve at least 4 image bindings on the
 | ||||
|     // fragment stage, and at least 1 for the rest of the stages.
 | ||||
|     // So far games are observed to use 1 image binding on vertex and 4 on fragment stages.
 | ||||
| 
 | ||||
|     // Reserve at least 4 image bindings on the fragment stage.
 | ||||
|     bindings[4].image = | ||||
|         Extract(base_images, num_images, std::max(4U, num_images / NumStages), LIMIT_IMAGES[4]); | ||||
| 
 | ||||
|     // This is guaranteed to be at least 1.
 | ||||
|     const u32 total_extracted_images = num_images / (NumStages - 1); | ||||
| 
 | ||||
|     // Reserve the other image bindings.
 | ||||
|     for (std::size_t i = 0; i < NumStages; ++i) { | ||||
|         const std::size_t stage = stage_swizzle[i]; | ||||
|         if (stage == 4) { | ||||
|             continue; | ||||
|         } | ||||
|         bindings[stage].image = | ||||
|             Extract(base_images, num_images, total_extracted_images, LIMIT_IMAGES[stage]); | ||||
|     } | ||||
| 
 | ||||
|     // Compute doesn't care about any of this.
 | ||||
|     bindings[5] = {0, 0, 0, 0}; | ||||
| 
 | ||||
|     return bindings; | ||||
| } | ||||
| 
 | ||||
| bool IsASTCSupported() { | ||||
|     static constexpr std::array targets = {GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY}; | ||||
|     static constexpr std::array formats = { | ||||
|  | @ -225,7 +137,6 @@ Device::Device() { | |||
|     } | ||||
| 
 | ||||
|     max_uniform_buffers = BuildMaxUniformBuffers(); | ||||
|     base_bindings = BuildBaseBindings(); | ||||
|     uniform_buffer_alignment = GetInteger<size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); | ||||
|     shader_storage_alignment = GetInteger<size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); | ||||
|     max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); | ||||
|  |  | |||
|  | @ -12,13 +12,6 @@ namespace OpenGL { | |||
| 
 | ||||
| class Device { | ||||
| public: | ||||
|     struct BaseBindings { | ||||
|         u32 uniform_buffer{}; | ||||
|         u32 shader_storage_buffer{}; | ||||
|         u32 sampler{}; | ||||
|         u32 image{}; | ||||
|     }; | ||||
| 
 | ||||
|     explicit Device(); | ||||
|     explicit Device(std::nullptr_t); | ||||
| 
 | ||||
|  | @ -28,14 +21,6 @@ public: | |||
|         return max_uniform_buffers[static_cast<std::size_t>(shader_type)]; | ||||
|     } | ||||
| 
 | ||||
|     const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { | ||||
|         return base_bindings[stage_index]; | ||||
|     } | ||||
| 
 | ||||
|     const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { | ||||
|         return GetBaseBindings(static_cast<std::size_t>(shader_type)); | ||||
|     } | ||||
| 
 | ||||
|     size_t GetUniformBufferAlignment() const { | ||||
|         return uniform_buffer_alignment; | ||||
|     } | ||||
|  | @ -134,7 +119,6 @@ private: | |||
| 
 | ||||
|     std::string vendor_name; | ||||
|     std::array<u32, Tegra::Engines::MaxShaderTypes> max_uniform_buffers{}; | ||||
|     std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings{}; | ||||
|     size_t uniform_buffer_alignment{}; | ||||
|     size_t shader_storage_alignment{}; | ||||
|     u32 max_vertex_attributes{}; | ||||
|  |  | |||
							
								
								
									
										296
									
								
								src/video_core/renderer_opengl/gl_graphics_program.cpp
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										296
									
								
								src/video_core/renderer_opengl/gl_graphics_program.cpp
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,296 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <cstring> | ||||
| 
 | ||||
| #include "common/cityhash.h" | ||||
| #include "shader_recompiler/shader_info.h" | ||||
| #include "video_core/renderer_opengl/gl_graphics_program.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||
| #include "video_core/texture_cache/texture_cache.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| using Shader::ImageBufferDescriptor; | ||||
| using Tegra::Texture::TexturePair; | ||||
| using VideoCommon::ImageId; | ||||
| 
 | ||||
| constexpr u32 MAX_TEXTURES = 64; | ||||
| constexpr u32 MAX_IMAGES = 8; | ||||
| 
 | ||||
| size_t GraphicsProgramKey::Hash() const noexcept { | ||||
|     return static_cast<size_t>(Common::CityHash64(reinterpret_cast<const char*>(this), Size())); | ||||
| } | ||||
| 
 | ||||
| bool GraphicsProgramKey::operator==(const GraphicsProgramKey& rhs) const noexcept { | ||||
|     return std::memcmp(this, &rhs, Size()) == 0; | ||||
| } | ||||
| 
 | ||||
| GraphicsProgram::GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                                  Tegra::MemoryManager& gpu_memory_, | ||||
|                                  Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
|                                  ProgramManager& program_manager_, StateTracker& state_tracker_, | ||||
|                                  OGLProgram program_, | ||||
|                                  const std::array<const Shader::Info*, 5>& infos) | ||||
|     : texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, | ||||
|       gpu_memory{gpu_memory_}, maxwell3d{maxwell3d_}, program_manager{program_manager_}, | ||||
|       state_tracker{state_tracker_}, program{std::move(program_)} { | ||||
|     std::ranges::transform(infos, stage_infos.begin(), | ||||
|                            [](const Shader::Info* info) { return info ? *info : Shader::Info{}; }); | ||||
| 
 | ||||
|     u32 num_textures{}; | ||||
|     u32 num_images{}; | ||||
|     for (size_t stage = 0; stage < base_uniform_bindings.size() - 1; ++stage) { | ||||
|         const auto& info{stage_infos[stage]}; | ||||
|         base_uniform_bindings[stage + 1] = base_uniform_bindings[stage]; | ||||
|         base_storage_bindings[stage + 1] = base_storage_bindings[stage]; | ||||
|         for (const auto& desc : info.constant_buffer_descriptors) { | ||||
|             base_uniform_bindings[stage + 1] += desc.count; | ||||
|         } | ||||
|         for (const auto& desc : info.storage_buffers_descriptors) { | ||||
|             base_storage_bindings[stage + 1] += desc.count; | ||||
|         } | ||||
|         for (const auto& desc : info.texture_buffer_descriptors) { | ||||
|             num_texture_buffers[stage] += desc.count; | ||||
|             num_textures += desc.count; | ||||
|         } | ||||
|         for (const auto& desc : info.image_buffer_descriptors) { | ||||
|             num_image_buffers[stage] += desc.count; | ||||
|             num_images += desc.count; | ||||
|         } | ||||
|         for (const auto& desc : info.texture_descriptors) { | ||||
|             num_textures += desc.count; | ||||
|         } | ||||
|         for (const auto& desc : info.image_descriptors) { | ||||
|             num_images += desc.count; | ||||
|         } | ||||
|     } | ||||
|     ASSERT(num_textures <= MAX_TEXTURES); | ||||
|     ASSERT(num_images <= MAX_IMAGES); | ||||
| } | ||||
| 
 | ||||
| struct Spec { | ||||
|     static constexpr std::array<bool, 5> enabled_stages{true, true, true, true, true}; | ||||
|     static constexpr bool has_storage_buffers = true; | ||||
|     static constexpr bool has_texture_buffers = true; | ||||
|     static constexpr bool has_image_buffers = true; | ||||
|     static constexpr bool has_images = true; | ||||
| }; | ||||
| 
 | ||||
| void GraphicsProgram::Configure(bool is_indexed) { | ||||
|     std::array<ImageId, MAX_TEXTURES + MAX_IMAGES> image_view_ids; | ||||
|     std::array<u32, MAX_TEXTURES + MAX_IMAGES> image_view_indices; | ||||
|     std::array<GLuint, MAX_TEXTURES> samplers; | ||||
|     size_t image_view_index{}; | ||||
|     GLsizei sampler_binding{}; | ||||
| 
 | ||||
|     texture_cache.SynchronizeGraphicsDescriptors(); | ||||
| 
 | ||||
|     buffer_cache.runtime.SetBaseUniformBindings(base_uniform_bindings); | ||||
|     buffer_cache.runtime.SetBaseStorageBindings(base_storage_bindings); | ||||
| 
 | ||||
|     const auto& regs{maxwell3d.regs}; | ||||
|     const bool via_header_index{regs.sampler_index == Maxwell::SamplerIndex::ViaHeaderIndex}; | ||||
|     const auto config_stage{[&](size_t stage) { | ||||
|         const Shader::Info& info{stage_infos[stage]}; | ||||
|         buffer_cache.SetEnabledUniformBuffers(stage, info.constant_buffer_mask); | ||||
|         buffer_cache.UnbindGraphicsStorageBuffers(stage); | ||||
|         if constexpr (Spec::has_storage_buffers) { | ||||
|             size_t ssbo_index{}; | ||||
|             for (const auto& desc : info.storage_buffers_descriptors) { | ||||
|                 ASSERT(desc.count == 1); | ||||
|                 buffer_cache.BindGraphicsStorageBuffer(stage, ssbo_index, desc.cbuf_index, | ||||
|                                                        desc.cbuf_offset, desc.is_written); | ||||
|                 ++ssbo_index; | ||||
|             } | ||||
|         } | ||||
|         const auto& cbufs{maxwell3d.state.shader_stages[stage].const_buffers}; | ||||
|         const auto read_handle{[&](const auto& desc, u32 index) { | ||||
|             ASSERT(cbufs[desc.cbuf_index].enabled); | ||||
|             const u32 index_offset{index << desc.size_shift}; | ||||
|             const u32 offset{desc.cbuf_offset + index_offset}; | ||||
|             const GPUVAddr addr{cbufs[desc.cbuf_index].address + offset}; | ||||
|             if constexpr (std::is_same_v<decltype(desc), const Shader::TextureDescriptor&> || | ||||
|                           std::is_same_v<decltype(desc), const Shader::TextureBufferDescriptor&>) { | ||||
|                 if (desc.has_secondary) { | ||||
|                     ASSERT(cbufs[desc.secondary_cbuf_index].enabled); | ||||
|                     const u32 second_offset{desc.secondary_cbuf_offset + index_offset}; | ||||
|                     const GPUVAddr separate_addr{cbufs[desc.secondary_cbuf_index].address + | ||||
|                                                  second_offset}; | ||||
|                     const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||||
|                     const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||||
|                     const u32 raw{lhs_raw | rhs_raw}; | ||||
|                     return TexturePair(raw, via_header_index); | ||||
|                 } | ||||
|             } | ||||
|             return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||||
|         }}; | ||||
|         const auto add_image{[&](const auto& desc) { | ||||
|             for (u32 index = 0; index < desc.count; ++index) { | ||||
|                 const auto handle{read_handle(desc, index)}; | ||||
|                 image_view_indices[image_view_index++] = handle.first; | ||||
|             } | ||||
|         }}; | ||||
|         if constexpr (Spec::has_texture_buffers) { | ||||
|             for (const auto& desc : info.texture_buffer_descriptors) { | ||||
|                 for (u32 index = 0; index < desc.count; ++index) { | ||||
|                     const auto handle{read_handle(desc, index)}; | ||||
|                     image_view_indices[image_view_index++] = handle.first; | ||||
|                     samplers[sampler_binding++] = 0; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|         if constexpr (Spec::has_image_buffers) { | ||||
|             for (const auto& desc : info.image_buffer_descriptors) { | ||||
|                 add_image(desc); | ||||
|             } | ||||
|         } | ||||
|         for (const auto& desc : info.texture_descriptors) { | ||||
|             for (u32 index = 0; index < desc.count; ++index) { | ||||
|                 const auto handle{read_handle(desc, index)}; | ||||
|                 image_view_indices[image_view_index++] = handle.first; | ||||
| 
 | ||||
|                 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; | ||||
|                 samplers[sampler_binding++] = sampler->Handle(); | ||||
|             } | ||||
|         } | ||||
|         if constexpr (Spec::has_images) { | ||||
|             for (const auto& desc : info.image_descriptors) { | ||||
|                 add_image(desc); | ||||
|             } | ||||
|         } | ||||
|     }}; | ||||
|     if constexpr (Spec::enabled_stages[0]) { | ||||
|         config_stage(0); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[1]) { | ||||
|         config_stage(1); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[2]) { | ||||
|         config_stage(2); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[3]) { | ||||
|         config_stage(3); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[4]) { | ||||
|         config_stage(4); | ||||
|     } | ||||
|     const std::span indices_span(image_view_indices.data(), image_view_index); | ||||
|     texture_cache.FillGraphicsImageViews(indices_span, image_view_ids); | ||||
| 
 | ||||
|     ImageId* texture_buffer_index{image_view_ids.data()}; | ||||
|     const auto bind_stage_info{[&](size_t stage) { | ||||
|         size_t index{}; | ||||
|         const auto add_buffer{[&](const auto& desc) { | ||||
|             constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||||
|             for (u32 i = 0; i < desc.count; ++i) { | ||||
|                 bool is_written{false}; | ||||
|                 if constexpr (is_image) { | ||||
|                     is_written = desc.is_written; | ||||
|                 } | ||||
|                 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | ||||
|                 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | ||||
|                                                        image_view.BufferSize(), image_view.format, | ||||
|                                                        is_written, is_image); | ||||
|                 ++index; | ||||
|                 ++texture_buffer_index; | ||||
|             } | ||||
|         }}; | ||||
|         const Shader::Info& info{stage_infos[stage]}; | ||||
|         buffer_cache.UnbindGraphicsTextureBuffers(stage); | ||||
| 
 | ||||
|         if constexpr (Spec::has_texture_buffers) { | ||||
|             for (const auto& desc : info.texture_buffer_descriptors) { | ||||
|                 add_buffer(desc); | ||||
|             } | ||||
|         } | ||||
|         if constexpr (Spec::has_image_buffers) { | ||||
|             for (const auto& desc : info.image_buffer_descriptors) { | ||||
|                 add_buffer(desc); | ||||
|             } | ||||
|         } | ||||
|         for (const auto& desc : info.texture_descriptors) { | ||||
|             texture_buffer_index += desc.count; | ||||
|         } | ||||
|         if constexpr (Spec::has_images) { | ||||
|             for (const auto& desc : info.image_descriptors) { | ||||
|                 texture_buffer_index += desc.count; | ||||
|             } | ||||
|         } | ||||
|     }}; | ||||
|     if constexpr (Spec::enabled_stages[0]) { | ||||
|         bind_stage_info(0); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[1]) { | ||||
|         bind_stage_info(1); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[2]) { | ||||
|         bind_stage_info(2); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[3]) { | ||||
|         bind_stage_info(3); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[4]) { | ||||
|         bind_stage_info(4); | ||||
|     } | ||||
|     buffer_cache.UpdateGraphicsBuffers(is_indexed); | ||||
|     buffer_cache.BindHostGeometryBuffers(is_indexed); | ||||
| 
 | ||||
|     const ImageId* views_it{image_view_ids.data()}; | ||||
|     GLsizei texture_binding = 0; | ||||
|     GLsizei image_binding = 0; | ||||
|     std::array<GLuint, MAX_TEXTURES> textures; | ||||
|     std::array<GLuint, MAX_IMAGES> images; | ||||
|     const auto prepare_stage{[&](size_t stage) { | ||||
|         buffer_cache.runtime.SetImagePointers(&textures[texture_binding], &images[image_binding]); | ||||
|         buffer_cache.BindHostStageBuffers(stage); | ||||
| 
 | ||||
|         texture_binding += num_texture_buffers[stage]; | ||||
|         image_binding += num_image_buffers[stage]; | ||||
| 
 | ||||
|         const auto& info{stage_infos[stage]}; | ||||
|         for (const auto& desc : info.texture_descriptors) { | ||||
|             for (u32 index = 0; index < desc.count; ++index) { | ||||
|                 ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||||
|                 textures[texture_binding++] = image_view.Handle(desc.type); | ||||
|             } | ||||
|         } | ||||
|         for (const auto& desc : info.image_descriptors) { | ||||
|             for (u32 index = 0; index < desc.count; ++index) { | ||||
|                 ImageView& image_view{texture_cache.GetImageView(*(views_it++))}; | ||||
|                 images[image_binding++] = image_view.Handle(desc.type); | ||||
|             } | ||||
|         } | ||||
|     }}; | ||||
|     if constexpr (Spec::enabled_stages[0]) { | ||||
|         prepare_stage(0); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[1]) { | ||||
|         prepare_stage(1); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[2]) { | ||||
|         prepare_stage(2); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[3]) { | ||||
|         prepare_stage(3); | ||||
|     } | ||||
|     if constexpr (Spec::enabled_stages[4]) { | ||||
|         prepare_stage(4); | ||||
|     } | ||||
|     if (texture_binding != 0) { | ||||
|         ASSERT(texture_binding == sampler_binding); | ||||
|         glBindTextures(0, texture_binding, textures.data()); | ||||
|         glBindSamplers(0, sampler_binding, samplers.data()); | ||||
|     } | ||||
|     if (image_binding != 0) { | ||||
|         glBindImageTextures(0, image_binding, images.data()); | ||||
|     } | ||||
|     texture_cache.UpdateRenderTargets(false); | ||||
| 
 | ||||
|     state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||||
|     program_manager.BindProgram(program.handle); | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
							
								
								
									
										105
									
								
								src/video_core/renderer_opengl/gl_graphics_program.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										105
									
								
								src/video_core/renderer_opengl/gl_graphics_program.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,105 @@ | |||
| // Copyright 2021 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <type_traits> | ||||
| #include <utility> | ||||
| 
 | ||||
| #include "common/bit_field.h" | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/shader_info.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/memory_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| class ProgramManager; | ||||
| 
 | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| 
 | ||||
| struct GraphicsProgramKey { | ||||
|     struct TransformFeedbackState { | ||||
|         struct Layout { | ||||
|             u32 stream; | ||||
|             u32 varying_count; | ||||
|             u32 stride; | ||||
|         }; | ||||
|         std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts; | ||||
|         std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings; | ||||
|     }; | ||||
| 
 | ||||
|     std::array<u64, 6> unique_hashes; | ||||
|     union { | ||||
|         u32 raw; | ||||
|         BitField<0, 1, u32> xfb_enabled; | ||||
|         BitField<1, 1, u32> early_z; | ||||
|         BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; | ||||
|         BitField<6, 2, Maxwell::TessellationPrimitive> tessellation_primitive; | ||||
|         BitField<8, 2, Maxwell::TessellationSpacing> tessellation_spacing; | ||||
|         BitField<10, 1, u32> tessellation_clockwise; | ||||
|     }; | ||||
|     std::array<u32, 3> padding; | ||||
|     TransformFeedbackState xfb_state; | ||||
| 
 | ||||
|     size_t Hash() const noexcept; | ||||
| 
 | ||||
|     bool operator==(const GraphicsProgramKey&) const noexcept; | ||||
| 
 | ||||
|     bool operator!=(const GraphicsProgramKey& rhs) const noexcept { | ||||
|         return !operator==(rhs); | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] size_t Size() const noexcept { | ||||
|         if (xfb_enabled != 0) { | ||||
|             return sizeof(GraphicsProgramKey); | ||||
|         } else { | ||||
|             return offsetof(GraphicsProgramKey, padding); | ||||
|         } | ||||
|     } | ||||
| }; | ||||
| static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>); | ||||
| static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>); | ||||
| static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>); | ||||
| 
 | ||||
| class GraphicsProgram { | ||||
| public: | ||||
|     explicit GraphicsProgram(TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                              Tegra::MemoryManager& gpu_memory_, | ||||
|                              Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
|                              ProgramManager& program_manager_, StateTracker& state_tracker_, | ||||
|                              OGLProgram program_, const std::array<const Shader::Info*, 5>& infos); | ||||
| 
 | ||||
|     void Configure(bool is_indexed); | ||||
| 
 | ||||
| private: | ||||
|     TextureCache& texture_cache; | ||||
|     BufferCache& buffer_cache; | ||||
|     Tegra::MemoryManager& gpu_memory; | ||||
|     Tegra::Engines::Maxwell3D& maxwell3d; | ||||
|     ProgramManager& program_manager; | ||||
|     StateTracker& state_tracker; | ||||
| 
 | ||||
|     OGLProgram program; | ||||
|     std::array<Shader::Info, 5> stage_infos{}; | ||||
|     std::array<u32, 5> base_uniform_bindings{}; | ||||
|     std::array<u32, 5> base_storage_bindings{}; | ||||
|     std::array<u32, 5> num_texture_buffers{}; | ||||
|     std::array<u32, 5> num_image_buffers{}; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
| 
 | ||||
| namespace std { | ||||
| template <> | ||||
| struct hash<OpenGL::GraphicsProgramKey> { | ||||
|     size_t operator()(const OpenGL::GraphicsProgramKey& k) const noexcept { | ||||
|         return k.Hash(); | ||||
|     } | ||||
| }; | ||||
| } // namespace std
 | ||||
|  | @ -98,7 +98,8 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra | |||
|       texture_cache(texture_cache_runtime, *this, maxwell3d, kepler_compute, gpu_memory), | ||||
|       buffer_cache_runtime(device), | ||||
|       buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime), | ||||
|       shader_cache(*this, emu_window_, gpu, maxwell3d, kepler_compute, gpu_memory, device), | ||||
|       shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache, | ||||
|                    buffer_cache, program_manager, state_tracker), | ||||
|       query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache), | ||||
|       fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {} | ||||
| 
 | ||||
|  | @ -246,12 +247,10 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| 
 | ||||
|     SyncState(); | ||||
| 
 | ||||
|     // Setup shaders and their used resources.
 | ||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|     GraphicsProgram* const program{shader_cache.CurrentGraphicsProgram()}; | ||||
| 
 | ||||
|     texture_cache.UpdateRenderTargets(false); | ||||
|     state_tracker.BindFramebuffer(texture_cache.GetFramebuffer()->Handle()); | ||||
|     program_manager.BindGraphicsPipeline(); | ||||
|     std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; | ||||
|     program->Configure(is_indexed); | ||||
| 
 | ||||
|     const GLenum primitive_mode = MaxwellToGL::PrimitiveTopology(maxwell3d.regs.draw.topology); | ||||
|     BeginTransformFeedback(primitive_mode); | ||||
|  | @ -293,7 +292,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
|                                               num_instances, base_instance); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     EndTransformFeedback(); | ||||
| 
 | ||||
|     ++num_queued_commands; | ||||
|  | @ -302,7 +300,14 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) { | |||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::DispatchCompute() { | ||||
|     UNREACHABLE_MSG("Not implemented"); | ||||
|     ComputeProgram* const program{shader_cache.CurrentComputeProgram()}; | ||||
|     if (!program) { | ||||
|         return; | ||||
|     } | ||||
|     program->Configure(); | ||||
|     const auto& qmd{kepler_compute.launch_description}; | ||||
|     glDispatchCompute(qmd.grid_dim_x, qmd.grid_dim_y, qmd.grid_dim_z); | ||||
|     ++num_queued_commands; | ||||
| } | ||||
| 
 | ||||
| void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) { | ||||
|  | @ -515,7 +520,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, | |||
|     // ASSERT_MSG(image_view->size.width == config.width, "Framebuffer width is different");
 | ||||
|     // ASSERT_MSG(image_view->size.height == config.height, "Framebuffer height is different");
 | ||||
| 
 | ||||
|     screen_info.display_texture = image_view->Handle(ImageViewType::e2D); | ||||
|     screen_info.display_texture = image_view->Handle(Shader::TextureType::Color2D); | ||||
|     screen_info.display_srgb = VideoCore::Surface::IsPixelFormatSRGB(image_view->format); | ||||
|     return true; | ||||
| } | ||||
|  |  | |||
|  | @ -16,6 +16,11 @@ | |||
| #include "common/scope_exit.h" | ||||
| #include "core/core.h" | ||||
| #include "core/frontend/emu_window.h" | ||||
| #include "shader_recompiler/backend/spirv/emit_spirv.h" | ||||
| #include "shader_recompiler/frontend/ir/program.h" | ||||
| #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||||
| #include "shader_recompiler/frontend/maxwell/program.h" | ||||
| #include "shader_recompiler/profile.h" | ||||
| #include "video_core/engines/kepler_compute.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/engines/shader_type.h" | ||||
|  | @ -25,17 +30,281 @@ | |||
| #include "video_core/renderer_opengl/gl_shader_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||
| #include "video_core/shader_cache.h" | ||||
| #include "video_core/shader_environment.h" | ||||
| #include "video_core/shader_notify.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| namespace { | ||||
| // FIXME: Move this somewhere else
 | ||||
| const Shader::Profile profile{ | ||||
|     .supported_spirv = 0x00010000, | ||||
| 
 | ||||
|     .unified_descriptor_binding = false, | ||||
|     .support_descriptor_aliasing = false, | ||||
|     .support_int8 = false, | ||||
|     .support_int16 = false, | ||||
|     .support_vertex_instance_id = true, | ||||
|     .support_float_controls = false, | ||||
|     .support_separate_denorm_behavior = false, | ||||
|     .support_separate_rounding_mode = false, | ||||
|     .support_fp16_denorm_preserve = false, | ||||
|     .support_fp32_denorm_preserve = false, | ||||
|     .support_fp16_denorm_flush = false, | ||||
|     .support_fp32_denorm_flush = false, | ||||
|     .support_fp16_signed_zero_nan_preserve = false, | ||||
|     .support_fp32_signed_zero_nan_preserve = false, | ||||
|     .support_fp64_signed_zero_nan_preserve = false, | ||||
|     .support_explicit_workgroup_layout = false, | ||||
|     .support_vote = true, | ||||
|     .support_viewport_index_layer_non_geometry = true, | ||||
|     .support_viewport_mask = true, | ||||
|     .support_typeless_image_loads = true, | ||||
|     .support_demote_to_helper_invocation = false, | ||||
|     .warp_size_potentially_larger_than_guest = true, | ||||
|     .support_int64_atomics = false, | ||||
|     .lower_left_origin_mode = true, | ||||
| 
 | ||||
|     .has_broken_spirv_clamp = true, | ||||
|     .has_broken_unsigned_image_offsets = true, | ||||
|     .has_broken_signed_operations = true, | ||||
|     .ignore_nan_fp_comparisons = true, | ||||
| 
 | ||||
|     .generic_input_types = {}, | ||||
|     .convert_depth_mode = false, | ||||
|     .force_early_z = false, | ||||
| 
 | ||||
|     .tess_primitive = {}, | ||||
|     .tess_spacing = {}, | ||||
|     .tess_clockwise = false, | ||||
| 
 | ||||
|     .input_topology = Shader::InputTopology::Triangles, | ||||
| 
 | ||||
|     .fixed_state_point_size = std::nullopt, | ||||
| 
 | ||||
|     .alpha_test_func = Shader::CompareFunction::Always, | ||||
|     .alpha_test_reference = 0.0f, | ||||
| 
 | ||||
|     .y_negate = false, | ||||
| 
 | ||||
|     .xfb_varyings = {}, | ||||
| }; | ||||
| 
 | ||||
| using Shader::Backend::SPIRV::EmitSPIRV; | ||||
| using Shader::Maxwell::TranslateProgram; | ||||
| using VideoCommon::ComputeEnvironment; | ||||
| using VideoCommon::GraphicsEnvironment; | ||||
| 
 | ||||
| template <typename Container> | ||||
| auto MakeSpan(Container& container) { | ||||
|     return std::span(container.data(), container.size()); | ||||
| } | ||||
| 
 | ||||
| void AddShader(GLenum stage, GLuint program, std::span<const u32> code) { | ||||
|     OGLShader shader; | ||||
|     shader.handle = glCreateShader(stage); | ||||
| 
 | ||||
|     glShaderBinary(1, &shader.handle, GL_SHADER_BINARY_FORMAT_SPIR_V_ARB, code.data(), | ||||
|                    static_cast<GLsizei>(code.size_bytes())); | ||||
|     glSpecializeShader(shader.handle, "main", 0, nullptr, nullptr); | ||||
|     glAttachShader(program, shader.handle); | ||||
|     if (!Settings::values.renderer_debug) { | ||||
|         return; | ||||
|     } | ||||
|     GLint shader_status{}; | ||||
|     glGetShaderiv(shader.handle, GL_COMPILE_STATUS, &shader_status); | ||||
|     if (shader_status == GL_FALSE) { | ||||
|         LOG_ERROR(Render_OpenGL, "Failed to build shader"); | ||||
|     } | ||||
|     GLint log_length{}; | ||||
|     glGetShaderiv(shader.handle, GL_INFO_LOG_LENGTH, &log_length); | ||||
|     if (log_length == 0) { | ||||
|         return; | ||||
|     } | ||||
|     std::string log(log_length, 0); | ||||
|     glGetShaderInfoLog(shader.handle, log_length, nullptr, log.data()); | ||||
|     if (shader_status == GL_FALSE) { | ||||
|         LOG_ERROR(Render_OpenGL, "{}", log); | ||||
|     } else { | ||||
|         LOG_WARNING(Render_OpenGL, "{}", log); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void LinkProgram(GLuint program) { | ||||
|     glLinkProgram(program); | ||||
|     if (!Settings::values.renderer_debug) { | ||||
|         return; | ||||
|     } | ||||
|     GLint link_status{}; | ||||
|     glGetProgramiv(program, GL_LINK_STATUS, &link_status); | ||||
| 
 | ||||
|     GLint log_length{}; | ||||
|     glGetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); | ||||
|     if (log_length == 0) { | ||||
|         return; | ||||
|     } | ||||
|     std::string log(log_length, 0); | ||||
|     glGetProgramInfoLog(program, log_length, nullptr, log.data()); | ||||
|     if (link_status == GL_FALSE) { | ||||
|         LOG_ERROR(Render_OpenGL, "{}", log); | ||||
|     } else { | ||||
|         LOG_WARNING(Render_OpenGL, "{}", log); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| GLenum Stage(size_t stage_index) { | ||||
|     switch (stage_index) { | ||||
|     case 0: | ||||
|         return GL_VERTEX_SHADER; | ||||
|     case 1: | ||||
|         return GL_TESS_CONTROL_SHADER; | ||||
|     case 2: | ||||
|         return GL_TESS_EVALUATION_SHADER; | ||||
|     case 3: | ||||
|         return GL_GEOMETRY_SHADER; | ||||
|     case 4: | ||||
|         return GL_FRAGMENT_SHADER; | ||||
|     } | ||||
|     UNREACHABLE_MSG("{}", stage_index); | ||||
|     return GL_NONE; | ||||
| } | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | ||||
|                          Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
|                          Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
|                          Tegra::Engines::KeplerCompute& kepler_compute_, | ||||
|                          Tegra::MemoryManager& gpu_memory_, const Device& device_) | ||||
|                          Tegra::MemoryManager& gpu_memory_, const Device& device_, | ||||
|                          TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                          ProgramManager& program_manager_, StateTracker& state_tracker_) | ||||
|     : VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_}, | ||||
|       emu_window{emu_window_}, gpu{gpu_}, device{device_} {} | ||||
|       emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_}, | ||||
|       buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{ | ||||
|                                                                           state_tracker_} {} | ||||
| 
 | ||||
| ShaderCache::~ShaderCache() = default; | ||||
| 
 | ||||
| GraphicsProgram* ShaderCache::CurrentGraphicsProgram() { | ||||
|     if (!RefreshStages(graphics_key.unique_hashes)) { | ||||
|         return nullptr; | ||||
|     } | ||||
|     const auto& regs{maxwell3d.regs}; | ||||
|     graphics_key.raw = 0; | ||||
|     graphics_key.early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); | ||||
|     graphics_key.gs_input_topology.Assign(graphics_key.unique_hashes[4] != 0 | ||||
|                                               ? regs.draw.topology.Value() | ||||
|                                               : Maxwell::PrimitiveTopology{}); | ||||
|     graphics_key.tessellation_primitive.Assign(regs.tess_mode.prim.Value()); | ||||
|     graphics_key.tessellation_spacing.Assign(regs.tess_mode.spacing.Value()); | ||||
|     graphics_key.tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); | ||||
| 
 | ||||
|     const auto [pair, is_new]{graphics_cache.try_emplace(graphics_key)}; | ||||
|     auto& program{pair->second}; | ||||
|     if (is_new) { | ||||
|         program = CreateGraphicsProgram(); | ||||
|     } | ||||
|     return program.get(); | ||||
| } | ||||
| 
 | ||||
| ComputeProgram* ShaderCache::CurrentComputeProgram() { | ||||
|     const VideoCommon::ShaderInfo* const shader{ComputeShader()}; | ||||
|     if (!shader) { | ||||
|         return nullptr; | ||||
|     } | ||||
|     const auto& qmd{kepler_compute.launch_description}; | ||||
|     const ComputeProgramKey key{ | ||||
|         .unique_hash = shader->unique_hash, | ||||
|         .shared_memory_size = qmd.shared_alloc, | ||||
|         .workgroup_size{qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z}, | ||||
|     }; | ||||
|     const auto [pair, is_new]{compute_cache.try_emplace(key)}; | ||||
|     auto& pipeline{pair->second}; | ||||
|     if (!is_new) { | ||||
|         return pipeline.get(); | ||||
|     } | ||||
|     pipeline = CreateComputeProgram(key, shader); | ||||
|     return pipeline.get(); | ||||
| } | ||||
| 
 | ||||
| std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram() { | ||||
|     GraphicsEnvironments environments; | ||||
|     GetGraphicsEnvironments(environments, graphics_key.unique_hashes); | ||||
| 
 | ||||
|     main_pools.ReleaseContents(); | ||||
|     return CreateGraphicsProgram(main_pools, graphics_key, environments.Span(), true); | ||||
| } | ||||
| 
 | ||||
| std::unique_ptr<GraphicsProgram> ShaderCache::CreateGraphicsProgram( | ||||
|     ShaderPools& pools, const GraphicsProgramKey& key, std::span<Shader::Environment* const> envs, | ||||
|     bool build_in_parallel) { | ||||
|     LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | ||||
|     size_t env_index{0}; | ||||
|     std::array<Shader::IR::Program, Maxwell::MaxShaderProgram> programs; | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         if (key.unique_hashes[index] == 0) { | ||||
|             continue; | ||||
|         } | ||||
|         Shader::Environment& env{*envs[env_index]}; | ||||
|         ++env_index; | ||||
| 
 | ||||
|         const u32 cfg_offset{static_cast<u32>(env.StartAddress() + sizeof(Shader::ProgramHeader))}; | ||||
|         Shader::Maxwell::Flow::CFG cfg(env, pools.flow_block, cfg_offset); | ||||
|         programs[index] = TranslateProgram(pools.inst, pools.block, env, cfg); | ||||
|     } | ||||
|     std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{}; | ||||
| 
 | ||||
|     OGLProgram gl_program; | ||||
|     gl_program.handle = glCreateProgram(); | ||||
| 
 | ||||
|     Shader::Backend::SPIRV::Bindings binding; | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         if (key.unique_hashes[index] == 0) { | ||||
|             continue; | ||||
|         } | ||||
|         UNIMPLEMENTED_IF(index == 0); | ||||
| 
 | ||||
|         Shader::IR::Program& program{programs[index]}; | ||||
|         const size_t stage_index{index - 1}; | ||||
|         infos[stage_index] = &program.info; | ||||
| 
 | ||||
|         const std::vector<u32> code{EmitSPIRV(profile, program, binding)}; | ||||
|         FILE* file = fopen("D:\\shader.spv", "wb"); | ||||
|         fwrite(code.data(), 4, code.size(), file); | ||||
|         fclose(file); | ||||
|         AddShader(Stage(stage_index), gl_program.handle, code); | ||||
|     } | ||||
|     LinkProgram(gl_program.handle); | ||||
| 
 | ||||
|     return std::make_unique<GraphicsProgram>(texture_cache, buffer_cache, gpu_memory, maxwell3d, | ||||
|                                              program_manager, state_tracker, std::move(gl_program), | ||||
|                                              infos); | ||||
| } | ||||
| 
 | ||||
| std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram( | ||||
|     const ComputeProgramKey& key, const VideoCommon::ShaderInfo* shader) { | ||||
|     const GPUVAddr program_base{kepler_compute.regs.code_loc.Address()}; | ||||
|     const auto& qmd{kepler_compute.launch_description}; | ||||
|     ComputeEnvironment env{kepler_compute, gpu_memory, program_base, qmd.program_start}; | ||||
|     env.SetCachedSize(shader->size_bytes); | ||||
| 
 | ||||
|     main_pools.ReleaseContents(); | ||||
|     return CreateComputeProgram(main_pools, key, env, true); | ||||
| } | ||||
| 
 | ||||
| std::unique_ptr<ComputeProgram> ShaderCache::CreateComputeProgram(ShaderPools& pools, | ||||
|                                                                   const ComputeProgramKey& key, | ||||
|                                                                   Shader::Environment& env, | ||||
|                                                                   bool build_in_parallel) { | ||||
|     LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash()); | ||||
| 
 | ||||
|     Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()}; | ||||
|     Shader::IR::Program program{TranslateProgram(pools.inst, pools.block, env, cfg)}; | ||||
|     Shader::Backend::SPIRV::Bindings binding; | ||||
|     const std::vector<u32> code{EmitSPIRV(profile, program, binding)}; | ||||
|     OGLProgram gl_program; | ||||
|     gl_program.handle = glCreateProgram(); | ||||
|     AddShader(GL_COMPUTE_SHADER, gl_program.handle, code); | ||||
|     LinkProgram(gl_program.handle); | ||||
|     return std::make_unique<ComputeProgram>(texture_cache, buffer_cache, gpu_memory, kepler_compute, | ||||
|                                             program_manager, std::move(gl_program), program.info); | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -5,20 +5,18 @@ | |||
| #pragma once | ||||
| 
 | ||||
| #include <array> | ||||
| #include <atomic> | ||||
| #include <bitset> | ||||
| #include <memory> | ||||
| #include <string> | ||||
| #include <tuple> | ||||
| #include <unordered_map> | ||||
| #include <unordered_set> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "shader_recompiler/frontend/ir/basic_block.h" | ||||
| #include "shader_recompiler/frontend/ir/value.h" | ||||
| #include "shader_recompiler/frontend/maxwell/control_flow.h" | ||||
| #include "shader_recompiler/object_pool.h" | ||||
| #include "video_core/engines/shader_type.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_compute_program.h" | ||||
| #include "video_core/renderer_opengl/gl_graphics_program.h" | ||||
| #include "video_core/shader_cache.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
|  | @ -32,64 +30,62 @@ class EmuWindow; | |||
| namespace OpenGL { | ||||
| 
 | ||||
| class Device; | ||||
| class ProgramManager; | ||||
| class RasterizerOpenGL; | ||||
| 
 | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| 
 | ||||
| struct GraphicsProgramKey { | ||||
|     struct TransformFeedbackState { | ||||
|         struct Layout { | ||||
|             u32 stream; | ||||
|             u32 varying_count; | ||||
|             u32 stride; | ||||
|         }; | ||||
|         std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts; | ||||
|         std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings; | ||||
|     }; | ||||
| 
 | ||||
|     std::array<u64, 6> unique_hashes; | ||||
|     std::array<u8, Maxwell::NumRenderTargets> color_formats; | ||||
|     union { | ||||
|         u32 raw; | ||||
|         BitField<0, 1, u32> xfb_enabled; | ||||
|         BitField<1, 1, u32> early_z; | ||||
|         BitField<2, 4, Maxwell::PrimitiveTopology> gs_input_topology; | ||||
|         BitField<6, 2, u32> tessellation_primitive; | ||||
|         BitField<8, 2, u32> tessellation_spacing; | ||||
|         BitField<10, 1, u32> tessellation_clockwise; | ||||
|     }; | ||||
|     u32 padding; | ||||
|     TransformFeedbackState xfb_state; | ||||
| 
 | ||||
|     [[nodiscard]] size_t Size() const noexcept { | ||||
|         if (xfb_enabled != 0) { | ||||
|             return sizeof(GraphicsProgramKey); | ||||
|         } else { | ||||
|             return offsetof(GraphicsProgramKey, padding); | ||||
| struct ShaderPools { | ||||
|     void ReleaseContents() { | ||||
|         flow_block.ReleaseContents(); | ||||
|         block.ReleaseContents(); | ||||
|         inst.ReleaseContents(); | ||||
|     } | ||||
|     } | ||||
| }; | ||||
| static_assert(std::has_unique_object_representations_v<GraphicsProgramKey>); | ||||
| static_assert(std::is_trivially_copyable_v<GraphicsProgramKey>); | ||||
| static_assert(std::is_trivially_constructible_v<GraphicsProgramKey>); | ||||
| 
 | ||||
| class GraphicsProgram { | ||||
| public: | ||||
| private: | ||||
|     Shader::ObjectPool<Shader::IR::Inst> inst; | ||||
|     Shader::ObjectPool<Shader::IR::Block> block; | ||||
|     Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block; | ||||
| }; | ||||
| 
 | ||||
| class ShaderCache : public VideoCommon::ShaderCache { | ||||
| public: | ||||
|     explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_, | ||||
|                          Tegra::GPU& gpu_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
|                          Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
|                          Tegra::Engines::KeplerCompute& kepler_compute_, | ||||
|                          Tegra::MemoryManager& gpu_memory_, const Device& device_); | ||||
|                          Tegra::MemoryManager& gpu_memory_, const Device& device_, | ||||
|                          TextureCache& texture_cache_, BufferCache& buffer_cache_, | ||||
|                          ProgramManager& program_manager_, StateTracker& state_tracker_); | ||||
|     ~ShaderCache(); | ||||
| 
 | ||||
|     [[nodiscard]] GraphicsProgram* CurrentGraphicsProgram(); | ||||
| 
 | ||||
|     [[nodiscard]] ComputeProgram* CurrentComputeProgram(); | ||||
| 
 | ||||
| private: | ||||
|     std::unique_ptr<GraphicsProgram> CreateGraphicsProgram(); | ||||
| 
 | ||||
|     std::unique_ptr<GraphicsProgram> CreateGraphicsProgram( | ||||
|         ShaderPools& pools, const GraphicsProgramKey& key, | ||||
|         std::span<Shader::Environment* const> envs, bool build_in_parallel); | ||||
| 
 | ||||
|     std::unique_ptr<ComputeProgram> CreateComputeProgram(const ComputeProgramKey& key, | ||||
|                                                          const VideoCommon::ShaderInfo* shader); | ||||
| 
 | ||||
|     std::unique_ptr<ComputeProgram> CreateComputeProgram(ShaderPools& pools, | ||||
|                                                          const ComputeProgramKey& key, | ||||
|                                                          Shader::Environment& env, | ||||
|                                                          bool build_in_parallel); | ||||
| 
 | ||||
|     Core::Frontend::EmuWindow& emu_window; | ||||
|     Tegra::GPU& gpu; | ||||
|     const Device& device; | ||||
|     TextureCache& texture_cache; | ||||
|     BufferCache& buffer_cache; | ||||
|     ProgramManager& program_manager; | ||||
|     StateTracker& state_tracker; | ||||
| 
 | ||||
|     GraphicsProgramKey graphics_key{}; | ||||
| 
 | ||||
|     ShaderPools main_pools; | ||||
|     std::unordered_map<GraphicsProgramKey, std::unique_ptr<GraphicsProgram>> graphics_cache; | ||||
|     std::unordered_map<ComputeProgramKey, std::unique_ptr<ComputeProgram>> compute_cache; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -1,149 +1,3 @@ | |||
| // Copyright 2018 yuzu Emulator Project
 | ||||
| // Licensed under GPLv2 or any later version
 | ||||
| // Refer to the license.txt file included.
 | ||||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| void BindProgram(GLenum stage, GLuint current, GLuint old, bool& enabled) { | ||||
|     if (current == old) { | ||||
|         return; | ||||
|     } | ||||
|     if (current == 0) { | ||||
|         if (enabled) { | ||||
|             enabled = false; | ||||
|             glDisable(stage); | ||||
|         } | ||||
|         return; | ||||
|     } | ||||
|     if (!enabled) { | ||||
|         enabled = true; | ||||
|         glEnable(stage); | ||||
|     } | ||||
|     glBindProgramARB(stage, current); | ||||
| } | ||||
| 
 | ||||
| } // Anonymous namespace
 | ||||
| 
 | ||||
| ProgramManager::ProgramManager(const Device& device) | ||||
|     : use_assembly_programs{device.UseAssemblyShaders()} { | ||||
|     if (use_assembly_programs) { | ||||
|         glEnable(GL_COMPUTE_PROGRAM_NV); | ||||
|     } else { | ||||
|         graphics_pipeline.Create(); | ||||
|         glBindProgramPipeline(graphics_pipeline.handle); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| ProgramManager::~ProgramManager() = default; | ||||
| 
 | ||||
| void ProgramManager::BindCompute(GLuint program) { | ||||
|     if (use_assembly_programs) { | ||||
|         glBindProgramARB(GL_COMPUTE_PROGRAM_NV, program); | ||||
|     } else { | ||||
|         is_graphics_bound = false; | ||||
|         glUseProgram(program); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::BindGraphicsPipeline() { | ||||
|     if (!use_assembly_programs) { | ||||
|         UpdateSourcePrograms(); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::BindHostPipeline(GLuint pipeline) { | ||||
|     if (use_assembly_programs) { | ||||
|         if (geometry_enabled) { | ||||
|             geometry_enabled = false; | ||||
|             old_state.geometry = 0; | ||||
|             glDisable(GL_GEOMETRY_PROGRAM_NV); | ||||
|         } | ||||
|     } else { | ||||
|         if (!is_graphics_bound) { | ||||
|             glUseProgram(0); | ||||
|         } | ||||
|     } | ||||
|     glBindProgramPipeline(pipeline); | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::RestoreGuestPipeline() { | ||||
|     if (use_assembly_programs) { | ||||
|         glBindProgramPipeline(0); | ||||
|     } else { | ||||
|         glBindProgramPipeline(graphics_pipeline.handle); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::BindHostCompute(GLuint program) { | ||||
|     if (use_assembly_programs) { | ||||
|         glDisable(GL_COMPUTE_PROGRAM_NV); | ||||
|     } | ||||
|     glUseProgram(program); | ||||
|     is_graphics_bound = false; | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::RestoreGuestCompute() { | ||||
|     if (use_assembly_programs) { | ||||
|         glEnable(GL_COMPUTE_PROGRAM_NV); | ||||
|         glUseProgram(0); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::UseVertexShader(GLuint program) { | ||||
|     if (use_assembly_programs) { | ||||
|         BindProgram(GL_VERTEX_PROGRAM_NV, program, current_state.vertex, vertex_enabled); | ||||
|     } | ||||
|     current_state.vertex = program; | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::UseGeometryShader(GLuint program) { | ||||
|     if (use_assembly_programs) { | ||||
|         BindProgram(GL_GEOMETRY_PROGRAM_NV, program, current_state.vertex, geometry_enabled); | ||||
|     } | ||||
|     current_state.geometry = program; | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::UseFragmentShader(GLuint program) { | ||||
|     if (use_assembly_programs) { | ||||
|         BindProgram(GL_FRAGMENT_PROGRAM_NV, program, current_state.vertex, fragment_enabled); | ||||
|     } | ||||
|     current_state.fragment = program; | ||||
| } | ||||
| 
 | ||||
| void ProgramManager::UpdateSourcePrograms() { | ||||
|     if (!is_graphics_bound) { | ||||
|         is_graphics_bound = true; | ||||
|         glUseProgram(0); | ||||
|     } | ||||
| 
 | ||||
|     const GLuint handle = graphics_pipeline.handle; | ||||
|     const auto update_state = [handle](GLenum stage, GLuint current, GLuint old) { | ||||
|         if (current == old) { | ||||
|             return; | ||||
|         } | ||||
|         glUseProgramStages(handle, stage, current); | ||||
|     }; | ||||
|     update_state(GL_VERTEX_SHADER_BIT, current_state.vertex, old_state.vertex); | ||||
|     update_state(GL_GEOMETRY_SHADER_BIT, current_state.geometry, old_state.geometry); | ||||
|     update_state(GL_FRAGMENT_SHADER_BIT, current_state.fragment, old_state.fragment); | ||||
| 
 | ||||
|     old_state = current_state; | ||||
| } | ||||
| 
 | ||||
| void MaxwellUniformData::SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell) { | ||||
|     const auto& regs = maxwell.regs; | ||||
| 
 | ||||
|     // Y_NEGATE controls what value S2R returns for the Y_DIRECTION system value.
 | ||||
|     y_direction = regs.screen_y_control.y_negate == 0 ? 1.0f : -1.0f; | ||||
| } | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -4,79 +4,24 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <cstddef> | ||||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/maxwell_to_gl.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| class Device; | ||||
| 
 | ||||
| /// Uniform structure for the Uniform Buffer Object, all vectors must be 16-byte aligned
 | ||||
| /// @note Always keep a vec4 at the end. The GL spec is not clear whether the alignment at
 | ||||
| ///       the end of a uniform block is included in UNIFORM_BLOCK_DATA_SIZE or not.
 | ||||
| ///       Not following that rule will cause problems on some AMD drivers.
 | ||||
| struct alignas(16) MaxwellUniformData { | ||||
|     void SetFromRegs(const Tegra::Engines::Maxwell3D& maxwell); | ||||
| 
 | ||||
|     GLfloat y_direction; | ||||
| }; | ||||
| static_assert(sizeof(MaxwellUniformData) == 16, "MaxwellUniformData structure size is incorrect"); | ||||
| static_assert(sizeof(MaxwellUniformData) < 16384, | ||||
|               "MaxwellUniformData structure must be less than 16kb as per the OpenGL spec"); | ||||
| 
 | ||||
| class ProgramManager { | ||||
| public: | ||||
|     explicit ProgramManager(const Device& device); | ||||
|     ~ProgramManager(); | ||||
|     void BindProgram(GLuint program) { | ||||
|         if (bound_program == program) { | ||||
|             return; | ||||
|         } | ||||
|         bound_program = program; | ||||
|         glUseProgram(program); | ||||
|     } | ||||
| 
 | ||||
|     /// Binds a compute program
 | ||||
|     void BindCompute(GLuint program); | ||||
| 
 | ||||
|     /// Updates bound programs.
 | ||||
|     void BindGraphicsPipeline(); | ||||
| 
 | ||||
|     /// Binds an OpenGL pipeline object unsynchronized with the guest state.
 | ||||
|     void BindHostPipeline(GLuint pipeline); | ||||
| 
 | ||||
|     /// Rewinds BindHostPipeline state changes.
 | ||||
|     void RestoreGuestPipeline(); | ||||
| 
 | ||||
|     /// Binds an OpenGL GLSL program object unsynchronized with the guest state.
 | ||||
|     void BindHostCompute(GLuint program); | ||||
| 
 | ||||
|     /// Rewinds BindHostCompute state changes.
 | ||||
|     void RestoreGuestCompute(); | ||||
| 
 | ||||
|     void UseVertexShader(GLuint program); | ||||
|     void UseGeometryShader(GLuint program); | ||||
|     void UseFragmentShader(GLuint program); | ||||
|     void RestoreGuestCompute() {} | ||||
| 
 | ||||
| private: | ||||
|     struct PipelineState { | ||||
|         GLuint vertex = 0; | ||||
|         GLuint geometry = 0; | ||||
|         GLuint fragment = 0; | ||||
|     }; | ||||
| 
 | ||||
|     /// Update GLSL programs.
 | ||||
|     void UpdateSourcePrograms(); | ||||
| 
 | ||||
|     OGLPipeline graphics_pipeline; | ||||
| 
 | ||||
|     PipelineState current_state; | ||||
|     PipelineState old_state; | ||||
| 
 | ||||
|     bool use_assembly_programs = false; | ||||
| 
 | ||||
|     bool is_graphics_bound = true; | ||||
| 
 | ||||
|     bool vertex_enabled = false; | ||||
|     bool geometry_enabled = false; | ||||
|     bool fragment_enabled = false; | ||||
|     GLuint bound_program = 0; | ||||
| }; | ||||
| 
 | ||||
| } // namespace OpenGL
 | ||||
|  |  | |||
|  | @ -24,9 +24,7 @@ | |||
| #include "video_core/textures/decoders.h" | ||||
| 
 | ||||
| namespace OpenGL { | ||||
| 
 | ||||
| namespace { | ||||
| 
 | ||||
| using Tegra::Texture::SwizzleSource; | ||||
| using Tegra::Texture::TextureMipmapFilter; | ||||
| using Tegra::Texture::TextureType; | ||||
|  | @ -59,107 +57,6 @@ struct CopyRegion { | |||
|     GLsizei depth; | ||||
| }; | ||||
| 
 | ||||
| struct FormatTuple { | ||||
|     GLenum internal_format; | ||||
|     GLenum format = GL_NONE; | ||||
|     GLenum type = GL_NONE; | ||||
| }; | ||||
| 
 | ||||
| constexpr std::array<FormatTuple, MaxPixelFormat> FORMAT_TABLE = {{ | ||||
|     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // A8B8G8R8_UNORM
 | ||||
|     {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                               // A8B8G8R8_SNORM
 | ||||
|     {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                            // A8B8G8R8_SINT
 | ||||
|     {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                  // A8B8G8R8_UINT
 | ||||
|     {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                     // R5G6B5_UNORM
 | ||||
|     {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                 // B5G6R5_UNORM
 | ||||
|     {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM
 | ||||
|     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM
 | ||||
|     {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
 | ||||
|     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM
 | ||||
|     {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM
 | ||||
|     {GL_R8_SNORM, GL_RED, GL_BYTE},                                   // R8_SNORM
 | ||||
|     {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                // R8_SINT
 | ||||
|     {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                      // R8_UINT
 | ||||
|     {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                             // R16G16B16A16_FLOAT
 | ||||
|     {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                          // R16G16B16A16_UNORM
 | ||||
|     {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                             // R16G16B16A16_SNORM
 | ||||
|     {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                          // R16G16B16A16_SINT
 | ||||
|     {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                // R16G16B16A16_UINT
 | ||||
|     {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},     // B10G11R11_FLOAT
 | ||||
|     {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                  // R32G32B32A32_UINT
 | ||||
|     {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                               // BC1_RGBA_UNORM
 | ||||
|     {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                               // BC2_UNORM
 | ||||
|     {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                               // BC3_UNORM
 | ||||
|     {GL_COMPRESSED_RED_RGTC1},                                        // BC4_UNORM
 | ||||
|     {GL_COMPRESSED_SIGNED_RED_RGTC1},                                 // BC4_SNORM
 | ||||
|     {GL_COMPRESSED_RG_RGTC2},                                         // BC5_UNORM
 | ||||
|     {GL_COMPRESSED_SIGNED_RG_RGTC2},                                  // BC5_SNORM
 | ||||
|     {GL_COMPRESSED_RGBA_BPTC_UNORM},                                  // BC7_UNORM
 | ||||
|     {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT
 | ||||
|     {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM
 | ||||
|     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM
 | ||||
|     {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT
 | ||||
|     {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT
 | ||||
|     {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT
 | ||||
|     {GL_RG32I, GL_RG_INTEGER, GL_INT},                                // R32G32_SINT
 | ||||
|     {GL_R32F, GL_RED, GL_FLOAT},                                      // R32_FLOAT
 | ||||
|     {GL_R16F, GL_RED, GL_HALF_FLOAT},                                 // R16_FLOAT
 | ||||
|     {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                              // R16_UNORM
 | ||||
|     {GL_R16_SNORM, GL_RED, GL_SHORT},                                 // R16_SNORM
 | ||||
|     {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                    // R16_UINT
 | ||||
|     {GL_R16I, GL_RED_INTEGER, GL_SHORT},                              // R16_SINT
 | ||||
|     {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                              // R16G16_UNORM
 | ||||
|     {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                 // R16G16_FLOAT
 | ||||
|     {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                    // R16G16_UINT
 | ||||
|     {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                              // R16G16_SINT
 | ||||
|     {GL_RG16_SNORM, GL_RG, GL_SHORT},                                 // R16G16_SNORM
 | ||||
|     {GL_RGB32F, GL_RGB, GL_FLOAT},                                    // R32G32B32_FLOAT
 | ||||
|     {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},          // A8B8G8R8_SRGB
 | ||||
|     {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                // R8G8_UNORM
 | ||||
|     {GL_RG8_SNORM, GL_RG, GL_BYTE},                                   // R8G8_SNORM
 | ||||
|     {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                // R8G8_SINT
 | ||||
|     {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                      // R8G8_UINT
 | ||||
|     {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                      // R32G32_UINT
 | ||||
|     {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16X16_FLOAT
 | ||||
|     {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                      // R32_UINT
 | ||||
|     {GL_R32I, GL_RED_INTEGER, GL_INT},                                // R32_SINT
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM
 | ||||
|     {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},                            // BC7_SRGB
 | ||||
|     {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV},               // A4B4G4R4_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},                        // ASTC_2D_4X4_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},                        // ASTC_2D_8X8_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},                        // ASTC_2D_8X5_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},                        // ASTC_2D_5X4_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                                // ASTC_2D_5X5_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},                        // ASTC_2D_5X5_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                               // ASTC_2D_10X8_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},                       // ASTC_2D_10X8_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                                // ASTC_2D_6X6_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},                        // ASTC_2D_6X6_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                              // ASTC_2D_10X10_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},                      // ASTC_2D_10X10_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                              // ASTC_2D_12X12_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},                      // ASTC_2D_12X12_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                                // ASTC_2D_8X6_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},                        // ASTC_2D_8X6_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                                // ASTC_2D_6X5_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},                        // ASTC_2D_6X5_SRGB
 | ||||
|     {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                // E5B9G9R9_FLOAT
 | ||||
|     {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},            // D32_FLOAT
 | ||||
|     {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},    // D16_UNORM
 | ||||
|     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // D24_UNORM_S8_UINT
 | ||||
|     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // S8_UINT_D24_UNORM
 | ||||
|     {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | ||||
|      GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
 | ||||
| }}; | ||||
| 
 | ||||
| constexpr std::array ACCELERATED_FORMATS{ | ||||
|     GL_RGBA32F,   GL_RGBA16F,   GL_RG32F,    GL_RG16F,        GL_R11F_G11F_B10F, GL_R32F, | ||||
|     GL_R16F,      GL_RGBA32UI,  GL_RGBA16UI, GL_RGB10_A2UI,   GL_RGBA8UI,        GL_RG32UI, | ||||
|  | @ -170,11 +67,6 @@ constexpr std::array ACCELERATED_FORMATS{ | |||
|     GL_RG8_SNORM, GL_R16_SNORM, GL_R8_SNORM, | ||||
| }; | ||||
| 
 | ||||
| const FormatTuple& GetFormatTuple(PixelFormat pixel_format) { | ||||
|     ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); | ||||
|     return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; | ||||
| } | ||||
| 
 | ||||
| GLenum ImageTarget(const VideoCommon::ImageInfo& info) { | ||||
|     switch (info.type) { | ||||
|     case ImageType::e1D: | ||||
|  | @ -195,26 +87,24 @@ GLenum ImageTarget(const VideoCommon::ImageInfo& info) { | |||
|     return GL_NONE; | ||||
| } | ||||
| 
 | ||||
| GLenum ImageTarget(ImageViewType type, int num_samples = 1) { | ||||
| GLenum ImageTarget(Shader::TextureType type, int num_samples = 1) { | ||||
|     const bool is_multisampled = num_samples > 1; | ||||
|     switch (type) { | ||||
|     case ImageViewType::e1D: | ||||
|     case Shader::TextureType::Color1D: | ||||
|         return GL_TEXTURE_1D; | ||||
|     case ImageViewType::e2D: | ||||
|     case Shader::TextureType::Color2D: | ||||
|         return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE : GL_TEXTURE_2D; | ||||
|     case ImageViewType::Cube: | ||||
|     case Shader::TextureType::ColorCube: | ||||
|         return GL_TEXTURE_CUBE_MAP; | ||||
|     case ImageViewType::e3D: | ||||
|     case Shader::TextureType::Color3D: | ||||
|         return GL_TEXTURE_3D; | ||||
|     case ImageViewType::e1DArray: | ||||
|     case Shader::TextureType::ColorArray1D: | ||||
|         return GL_TEXTURE_1D_ARRAY; | ||||
|     case ImageViewType::e2DArray: | ||||
|     case Shader::TextureType::ColorArray2D: | ||||
|         return is_multisampled ? GL_TEXTURE_2D_MULTISAMPLE_ARRAY : GL_TEXTURE_2D_ARRAY; | ||||
|     case ImageViewType::CubeArray: | ||||
|     case Shader::TextureType::ColorArrayCube: | ||||
|         return GL_TEXTURE_CUBE_MAP_ARRAY; | ||||
|     case ImageViewType::Rect: | ||||
|         return GL_TEXTURE_RECTANGLE; | ||||
|     case ImageViewType::Buffer: | ||||
|     case Shader::TextureType::Buffer: | ||||
|         return GL_TEXTURE_BUFFER; | ||||
|     } | ||||
|     UNREACHABLE_MSG("Invalid image view type={}", type); | ||||
|  | @ -322,7 +212,7 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||
|     default: | ||||
|         return false; | ||||
|     } | ||||
|     const GLenum internal_format = GetFormatTuple(info.format).internal_format; | ||||
|     const GLenum internal_format = MaxwellToGL::GetFormatTuple(info.format).internal_format; | ||||
|     const auto& format_info = runtime.FormatInfo(info.type, internal_format); | ||||
|     if (format_info.is_compressed) { | ||||
|         return false; | ||||
|  | @ -414,11 +304,10 @@ void ApplySwizzle(GLuint handle, PixelFormat format, std::array<SwizzleSource, 4 | |||
| 
 | ||||
| void AttachTexture(GLuint fbo, GLenum attachment, const ImageView* image_view) { | ||||
|     if (False(image_view->flags & VideoCommon::ImageViewFlagBits::Slice)) { | ||||
|         const GLuint texture = image_view->DefaultHandle(); | ||||
|         glNamedFramebufferTexture(fbo, attachment, texture, 0); | ||||
|         glNamedFramebufferTexture(fbo, attachment, image_view->DefaultHandle(), 0); | ||||
|         return; | ||||
|     } | ||||
|     const GLuint texture = image_view->Handle(ImageViewType::e3D); | ||||
|     const GLuint texture = image_view->Handle(Shader::TextureType::Color3D); | ||||
|     if (image_view->range.extent.layers > 1) { | ||||
|         // TODO: OpenGL doesn't support rendering to a fixed number of slices
 | ||||
|         glNamedFramebufferTexture(fbo, attachment, texture, 0); | ||||
|  | @ -453,7 +342,7 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
|     static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; | ||||
|     for (size_t i = 0; i < TARGETS.size(); ++i) { | ||||
|         const GLenum target = TARGETS[i]; | ||||
|         for (const FormatTuple& tuple : FORMAT_TABLE) { | ||||
|         for (const MaxwellToGL::FormatTuple& tuple : MaxwellToGL::FORMAT_TABLE) { | ||||
|             const GLenum format = tuple.internal_format; | ||||
|             GLint compat_class; | ||||
|             GLint compat_type; | ||||
|  | @ -475,11 +364,9 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
|     null_image_1d_array.Create(GL_TEXTURE_1D_ARRAY); | ||||
|     null_image_cube_array.Create(GL_TEXTURE_CUBE_MAP_ARRAY); | ||||
|     null_image_3d.Create(GL_TEXTURE_3D); | ||||
|     null_image_rect.Create(GL_TEXTURE_RECTANGLE); | ||||
|     glTextureStorage2D(null_image_1d_array.handle, 1, GL_R8, 1, 1); | ||||
|     glTextureStorage3D(null_image_cube_array.handle, 1, GL_R8, 1, 1, 6); | ||||
|     glTextureStorage3D(null_image_3d.handle, 1, GL_R8, 1, 1, 1); | ||||
|     glTextureStorage2D(null_image_rect.handle, 1, GL_R8, 1, 1); | ||||
| 
 | ||||
|     std::array<GLuint, 4> new_handles; | ||||
|     glGenTextures(static_cast<GLsizei>(new_handles.size()), new_handles.data()); | ||||
|  | @ -497,28 +384,27 @@ TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& | |||
|                   GL_R8, 0, 1, 0, 6); | ||||
|     const std::array texture_handles{ | ||||
|         null_image_1d_array.handle,  null_image_cube_array.handle, null_image_3d.handle, | ||||
|         null_image_rect.handle,          null_image_view_1d.handle,    null_image_view_2d.handle, | ||||
|         null_image_view_2d_array.handle, null_image_view_cube.handle, | ||||
|         null_image_view_1d.handle,   null_image_view_2d.handle,    null_image_view_2d_array.handle, | ||||
|         null_image_view_cube.handle, | ||||
|     }; | ||||
|     for (const GLuint handle : texture_handles) { | ||||
|         static constexpr std::array NULL_SWIZZLE{GL_ZERO, GL_ZERO, GL_ZERO, GL_ZERO}; | ||||
|         glTextureParameteriv(handle, GL_TEXTURE_SWIZZLE_RGBA, NULL_SWIZZLE.data()); | ||||
|     } | ||||
|     const auto set_view = [this](ImageViewType type, GLuint handle) { | ||||
|     const auto set_view = [this](Shader::TextureType type, GLuint handle) { | ||||
|         if (device.HasDebuggingToolAttached()) { | ||||
|             const std::string name = fmt::format("NullImage {}", type); | ||||
|             glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | ||||
|         } | ||||
|         null_image_views[static_cast<size_t>(type)] = handle; | ||||
|     }; | ||||
|     set_view(ImageViewType::e1D, null_image_view_1d.handle); | ||||
|     set_view(ImageViewType::e2D, null_image_view_2d.handle); | ||||
|     set_view(ImageViewType::Cube, null_image_view_cube.handle); | ||||
|     set_view(ImageViewType::e3D, null_image_3d.handle); | ||||
|     set_view(ImageViewType::e1DArray, null_image_1d_array.handle); | ||||
|     set_view(ImageViewType::e2DArray, null_image_view_2d_array.handle); | ||||
|     set_view(ImageViewType::CubeArray, null_image_cube_array.handle); | ||||
|     set_view(ImageViewType::Rect, null_image_rect.handle); | ||||
|     set_view(Shader::TextureType::Color1D, null_image_view_1d.handle); | ||||
|     set_view(Shader::TextureType::Color2D, null_image_view_2d.handle); | ||||
|     set_view(Shader::TextureType::ColorCube, null_image_view_cube.handle); | ||||
|     set_view(Shader::TextureType::Color3D, null_image_3d.handle); | ||||
|     set_view(Shader::TextureType::ColorArray1D, null_image_1d_array.handle); | ||||
|     set_view(Shader::TextureType::ColorArray2D, null_image_view_2d_array.handle); | ||||
|     set_view(Shader::TextureType::ColorArrayCube, null_image_cube_array.handle); | ||||
| } | ||||
| 
 | ||||
| TextureCacheRuntime::~TextureCacheRuntime() = default; | ||||
|  | @ -710,7 +596,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
|         gl_format = GL_RGBA; | ||||
|         gl_type = GL_UNSIGNED_INT_8_8_8_8_REV; | ||||
|     } else { | ||||
|         const auto& tuple = GetFormatTuple(info.format); | ||||
|         const auto& tuple = MaxwellToGL::GetFormatTuple(info.format); | ||||
|         gl_internal_format = tuple.internal_format; | ||||
|         gl_format = tuple.format; | ||||
|         gl_type = tuple.type; | ||||
|  | @ -750,8 +636,7 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, | |||
|         glTextureStorage3D(handle, num_levels, gl_internal_format, width, height, depth); | ||||
|         break; | ||||
|     case GL_TEXTURE_BUFFER: | ||||
|         buffer.Create(); | ||||
|         glNamedBufferStorage(buffer.handle, guest_size_bytes, nullptr, 0); | ||||
|         UNREACHABLE(); | ||||
|         break; | ||||
|     default: | ||||
|         UNREACHABLE_MSG("Invalid target=0x{:x}", target); | ||||
|  | @ -789,14 +674,6 @@ void Image::UploadMemory(const ImageBufferMap& map, | |||
|     } | ||||
| } | ||||
| 
 | ||||
| void Image::UploadMemory(const ImageBufferMap& map, | ||||
|                          std::span<const VideoCommon::BufferCopy> copies) { | ||||
|     for (const VideoCommon::BufferCopy& copy : copies) { | ||||
|         glCopyNamedBufferSubData(map.buffer, buffer.handle, copy.src_offset + map.offset, | ||||
|                                  copy.dst_offset, copy.size); | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| void Image::DownloadMemory(ImageBufferMap& map, | ||||
|                            std::span<const VideoCommon::BufferImageCopy> copies) { | ||||
|     glMemoryBarrier(GL_PIXEL_BUFFER_BARRIER_BIT); // TODO: Move this to its own API
 | ||||
|  | @ -958,7 +835,7 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
|     if (True(image.flags & ImageFlagBits::Converted)) { | ||||
|         internal_format = IsPixelFormatSRGB(info.format) ? GL_SRGB8_ALPHA8 : GL_RGBA8; | ||||
|     } else { | ||||
|         internal_format = GetFormatTuple(format).internal_format; | ||||
|         internal_format = MaxwellToGL::GetFormatTuple(format).internal_format; | ||||
|     } | ||||
|     VideoCommon::SubresourceRange flatten_range = info.range; | ||||
|     std::array<GLuint, 2> handles; | ||||
|  | @ -970,8 +847,8 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
|         [[fallthrough]]; | ||||
|     case ImageViewType::e1D: | ||||
|         glGenTextures(2, handles.data()); | ||||
|         SetupView(device, image, ImageViewType::e1D, handles[0], info, flatten_range); | ||||
|         SetupView(device, image, ImageViewType::e1DArray, handles[1], info, info.range); | ||||
|         SetupView(device, image, Shader::TextureType::Color1D, handles[0], info, flatten_range); | ||||
|         SetupView(device, image, Shader::TextureType::ColorArray1D, handles[1], info, info.range); | ||||
|         break; | ||||
|     case ImageViewType::e2DArray: | ||||
|         flatten_range.extent.layers = 1; | ||||
|  | @ -985,36 +862,64 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI | |||
|                 .extent = {.levels = 1, .layers = 1}, | ||||
|             }; | ||||
|             glGenTextures(1, handles.data()); | ||||
|             SetupView(device, image, ImageViewType::e3D, handles[0], info, slice_range); | ||||
|             break; | ||||
|         } | ||||
|             SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, slice_range); | ||||
|         } else { | ||||
|             glGenTextures(2, handles.data()); | ||||
|         SetupView(device, image, ImageViewType::e2D, handles[0], info, flatten_range); | ||||
|         SetupView(device, image, ImageViewType::e2DArray, handles[1], info, info.range); | ||||
|             SetupView(device, image, Shader::TextureType::Color2D, handles[0], info, flatten_range); | ||||
|             SetupView(device, image, Shader::TextureType::ColorArray2D, handles[1], info, | ||||
|                       info.range); | ||||
|         } | ||||
|         break; | ||||
|     case ImageViewType::e3D: | ||||
|         glGenTextures(1, handles.data()); | ||||
|         SetupView(device, image, ImageViewType::e3D, handles[0], info, info.range); | ||||
|         SetupView(device, image, Shader::TextureType::Color3D, handles[0], info, info.range); | ||||
|         break; | ||||
|     case ImageViewType::CubeArray: | ||||
|         flatten_range.extent.layers = 6; | ||||
|         [[fallthrough]]; | ||||
|     case ImageViewType::Cube: | ||||
|         glGenTextures(2, handles.data()); | ||||
|         SetupView(device, image, ImageViewType::Cube, handles[0], info, flatten_range); | ||||
|         SetupView(device, image, ImageViewType::CubeArray, handles[1], info, info.range); | ||||
|         SetupView(device, image, Shader::TextureType::ColorCube, handles[0], info, flatten_range); | ||||
|         SetupView(device, image, Shader::TextureType::ColorArrayCube, handles[1], info, info.range); | ||||
|         break; | ||||
|     case ImageViewType::Rect: | ||||
|         glGenTextures(1, handles.data()); | ||||
|         SetupView(device, image, ImageViewType::Rect, handles[0], info, info.range); | ||||
|         UNIMPLEMENTED(); | ||||
|         break; | ||||
|     case ImageViewType::Buffer: | ||||
|         glCreateTextures(GL_TEXTURE_BUFFER, 1, handles.data()); | ||||
|         SetupView(device, image, ImageViewType::Buffer, handles[0], info, info.range); | ||||
|         UNREACHABLE(); | ||||
|         break; | ||||
|     } | ||||
|     default_handle = Handle(info.type); | ||||
|     switch (info.type) { | ||||
|     case ImageViewType::e1D: | ||||
|         default_handle = Handle(Shader::TextureType::Color1D); | ||||
|         break; | ||||
|     case ImageViewType::e1DArray: | ||||
|         default_handle = Handle(Shader::TextureType::ColorArray1D); | ||||
|         break; | ||||
|     case ImageViewType::e2D: | ||||
|         default_handle = Handle(Shader::TextureType::Color2D); | ||||
|         break; | ||||
|     case ImageViewType::e2DArray: | ||||
|         default_handle = Handle(Shader::TextureType::ColorArray2D); | ||||
|         break; | ||||
|     case ImageViewType::e3D: | ||||
|         default_handle = Handle(Shader::TextureType::Color3D); | ||||
|         break; | ||||
|     case ImageViewType::Cube: | ||||
|         default_handle = Handle(Shader::TextureType::ColorCube); | ||||
|         break; | ||||
|     case ImageViewType::CubeArray: | ||||
|         default_handle = Handle(Shader::TextureType::ColorArrayCube); | ||||
|         break; | ||||
|     default: | ||||
|         break; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||||
|                      const VideoCommon::ImageViewInfo& view_info, GPUVAddr gpu_addr_) | ||||
|     : VideoCommon::ImageViewBase{info, view_info}, gpu_addr{gpu_addr_}, | ||||
|       buffer_size{VideoCommon::CalculateGuestSizeInBytes(info)} {} | ||||
| 
 | ||||
| ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||||
|                      const VideoCommon::ImageViewInfo& view_info) | ||||
|  | @ -1023,14 +928,9 @@ ImageView::ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | |||
| ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::NullImageParams& params) | ||||
|     : VideoCommon::ImageViewBase{params}, views{runtime.null_image_views} {} | ||||
| 
 | ||||
| void ImageView::SetupView(const Device& device, Image& image, ImageViewType view_type, | ||||
| void ImageView::SetupView(const Device& device, Image& image, Shader::TextureType view_type, | ||||
|                           GLuint handle, const VideoCommon::ImageViewInfo& info, | ||||
|                           VideoCommon::SubresourceRange view_range) { | ||||
|     if (info.type == ImageViewType::Buffer) { | ||||
|         // TODO: Take offset from buffer cache
 | ||||
|         glTextureBufferRange(handle, internal_format, image.buffer.handle, 0, | ||||
|                              image.guest_size_bytes); | ||||
|     } else { | ||||
|     const GLuint parent = image.texture.handle; | ||||
|     const GLenum target = ImageTarget(view_type, image.info.num_samples); | ||||
|     glTextureView(handle, target, parent, internal_format, view_range.base.level, | ||||
|  | @ -1038,9 +938,8 @@ void ImageView::SetupView(const Device& device, Image& image, ImageViewType view | |||
|     if (!info.IsRenderTarget()) { | ||||
|         ApplySwizzle(handle, format, info.Swizzle()); | ||||
|     } | ||||
|     } | ||||
|     if (device.HasDebuggingToolAttached()) { | ||||
|         const std::string name = VideoCommon::Name(*this, view_type); | ||||
|         const std::string name = VideoCommon::Name(*this); | ||||
|         glObjectLabel(GL_TEXTURE, handle, static_cast<GLsizei>(name.size()), name.data()); | ||||
|     } | ||||
|     stored_views.emplace_back().handle = handle; | ||||
|  |  | |||
|  | @ -9,6 +9,7 @@ | |||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "shader_recompiler/shader_info.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/util_shaders.h" | ||||
| #include "video_core/texture_cache/texture_cache.h" | ||||
|  | @ -127,13 +128,12 @@ private: | |||
|     OGLTexture null_image_1d_array; | ||||
|     OGLTexture null_image_cube_array; | ||||
|     OGLTexture null_image_3d; | ||||
|     OGLTexture null_image_rect; | ||||
|     OGLTextureView null_image_view_1d; | ||||
|     OGLTextureView null_image_view_2d; | ||||
|     OGLTextureView null_image_view_2d_array; | ||||
|     OGLTextureView null_image_view_cube; | ||||
| 
 | ||||
|     std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> null_image_views; | ||||
|     std::array<GLuint, Shader::NUM_TEXTURE_TYPES> null_image_views{}; | ||||
| }; | ||||
| 
 | ||||
| class Image : public VideoCommon::ImageBase { | ||||
|  | @ -154,8 +154,6 @@ public: | |||
|     void UploadMemory(const ImageBufferMap& map, | ||||
|                       std::span<const VideoCommon::BufferImageCopy> copies); | ||||
| 
 | ||||
|     void UploadMemory(const ImageBufferMap& map, std::span<const VideoCommon::BufferCopy> copies); | ||||
| 
 | ||||
|     void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); | ||||
| 
 | ||||
|     GLuint StorageHandle() noexcept; | ||||
|  | @ -170,7 +168,6 @@ private: | |||
|     void CopyImageToBuffer(const VideoCommon::BufferImageCopy& copy, size_t buffer_offset); | ||||
| 
 | ||||
|     OGLTexture texture; | ||||
|     OGLBuffer buffer; | ||||
|     OGLTextureView store_view; | ||||
|     GLenum gl_internal_format = GL_NONE; | ||||
|     GLenum gl_format = GL_NONE; | ||||
|  | @ -182,12 +179,14 @@ class ImageView : public VideoCommon::ImageViewBase { | |||
| 
 | ||||
| public: | ||||
|     explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageViewInfo&, ImageId, Image&); | ||||
|     explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo&, | ||||
|                        const VideoCommon::ImageViewInfo&, GPUVAddr); | ||||
|     explicit ImageView(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, | ||||
|                        const VideoCommon::ImageViewInfo& view_info); | ||||
|     explicit ImageView(TextureCacheRuntime&, const VideoCommon::NullImageParams&); | ||||
| 
 | ||||
|     [[nodiscard]] GLuint Handle(ImageViewType query_type) const noexcept { | ||||
|         return views[static_cast<size_t>(query_type)]; | ||||
|     [[nodiscard]] GLuint Handle(Shader::TextureType handle_type) const noexcept { | ||||
|         return views[static_cast<size_t>(handle_type)]; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] GLuint DefaultHandle() const noexcept { | ||||
|  | @ -198,15 +197,25 @@ public: | |||
|         return internal_format; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] GPUVAddr GpuAddr() const noexcept { | ||||
|         return gpu_addr; | ||||
|     } | ||||
| 
 | ||||
|     [[nodiscard]] u32 BufferSize() const noexcept { | ||||
|         return buffer_size; | ||||
|     } | ||||
| 
 | ||||
| private: | ||||
|     void SetupView(const Device& device, Image& image, ImageViewType view_type, GLuint handle, | ||||
|     void SetupView(const Device& device, Image& image, Shader::TextureType view_type, GLuint handle, | ||||
|                    const VideoCommon::ImageViewInfo& info, | ||||
|                    VideoCommon::SubresourceRange view_range); | ||||
| 
 | ||||
|     std::array<GLuint, VideoCommon::NUM_IMAGE_VIEW_TYPES> views{}; | ||||
|     std::array<GLuint, Shader::NUM_TEXTURE_TYPES> views{}; | ||||
|     std::vector<OGLTextureView> stored_views; | ||||
|     GLuint default_handle = 0; | ||||
|     GLenum internal_format = GL_NONE; | ||||
|     GLuint default_handle = 0; | ||||
|     GPUVAddr gpu_addr = 0; | ||||
|     u32 buffer_size = 0; | ||||
| }; | ||||
| 
 | ||||
| class ImageAlloc : public VideoCommon::ImageAllocBase {}; | ||||
|  |  | |||
|  | @ -5,12 +5,120 @@ | |||
| #pragma once | ||||
| 
 | ||||
| #include <glad/glad.h> | ||||
| 
 | ||||
| #include "video_core/engines/maxwell_3d.h" | ||||
| #include "video_core/surface.h" | ||||
| 
 | ||||
| namespace OpenGL::MaxwellToGL { | ||||
| 
 | ||||
| using Maxwell = Tegra::Engines::Maxwell3D::Regs; | ||||
| 
 | ||||
| struct FormatTuple { | ||||
|     GLenum internal_format; | ||||
|     GLenum format = GL_NONE; | ||||
|     GLenum type = GL_NONE; | ||||
| }; | ||||
| 
 | ||||
| constexpr std::array<FormatTuple, VideoCore::Surface::MaxPixelFormat> FORMAT_TABLE = {{ | ||||
|     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},                 // A8B8G8R8_UNORM
 | ||||
|     {GL_RGBA8_SNORM, GL_RGBA, GL_BYTE},                               // A8B8G8R8_SNORM
 | ||||
|     {GL_RGBA8I, GL_RGBA_INTEGER, GL_BYTE},                            // A8B8G8R8_SINT
 | ||||
|     {GL_RGBA8UI, GL_RGBA_INTEGER, GL_UNSIGNED_BYTE},                  // A8B8G8R8_UINT
 | ||||
|     {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},                     // R5G6B5_UNORM
 | ||||
|     {GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5_REV},                 // B5G6R5_UNORM
 | ||||
|     {GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1R5G5B5_UNORM
 | ||||
|     {GL_RGB10_A2, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV},           // A2B10G10R10_UNORM
 | ||||
|     {GL_RGB10_A2UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT_2_10_10_10_REV}, // A2B10G10R10_UINT
 | ||||
|     {GL_RGB5_A1, GL_RGBA, GL_UNSIGNED_SHORT_1_5_5_5_REV},             // A1B5G5R5_UNORM
 | ||||
|     {GL_R8, GL_RED, GL_UNSIGNED_BYTE},                                // R8_UNORM
 | ||||
|     {GL_R8_SNORM, GL_RED, GL_BYTE},                                   // R8_SNORM
 | ||||
|     {GL_R8I, GL_RED_INTEGER, GL_BYTE},                                // R8_SINT
 | ||||
|     {GL_R8UI, GL_RED_INTEGER, GL_UNSIGNED_BYTE},                      // R8_UINT
 | ||||
|     {GL_RGBA16F, GL_RGBA, GL_HALF_FLOAT},                             // R16G16B16A16_FLOAT
 | ||||
|     {GL_RGBA16, GL_RGBA, GL_UNSIGNED_SHORT},                          // R16G16B16A16_UNORM
 | ||||
|     {GL_RGBA16_SNORM, GL_RGBA, GL_SHORT},                             // R16G16B16A16_SNORM
 | ||||
|     {GL_RGBA16I, GL_RGBA_INTEGER, GL_SHORT},                          // R16G16B16A16_SINT
 | ||||
|     {GL_RGBA16UI, GL_RGBA_INTEGER, GL_UNSIGNED_SHORT},                // R16G16B16A16_UINT
 | ||||
|     {GL_R11F_G11F_B10F, GL_RGB, GL_UNSIGNED_INT_10F_11F_11F_REV},     // B10G11R11_FLOAT
 | ||||
|     {GL_RGBA32UI, GL_RGBA_INTEGER, GL_UNSIGNED_INT},                  // R32G32B32A32_UINT
 | ||||
|     {GL_COMPRESSED_RGBA_S3TC_DXT1_EXT},                               // BC1_RGBA_UNORM
 | ||||
|     {GL_COMPRESSED_RGBA_S3TC_DXT3_EXT},                               // BC2_UNORM
 | ||||
|     {GL_COMPRESSED_RGBA_S3TC_DXT5_EXT},                               // BC3_UNORM
 | ||||
|     {GL_COMPRESSED_RED_RGTC1},                                        // BC4_UNORM
 | ||||
|     {GL_COMPRESSED_SIGNED_RED_RGTC1},                                 // BC4_SNORM
 | ||||
|     {GL_COMPRESSED_RG_RGTC2},                                         // BC5_UNORM
 | ||||
|     {GL_COMPRESSED_SIGNED_RG_RGTC2},                                  // BC5_SNORM
 | ||||
|     {GL_COMPRESSED_RGBA_BPTC_UNORM},                                  // BC7_UNORM
 | ||||
|     {GL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT},                          // BC6H_UFLOAT
 | ||||
|     {GL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT},                            // BC6H_SFLOAT
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_4x4_KHR},                                // ASTC_2D_4X4_UNORM
 | ||||
|     {GL_RGBA8, GL_RGBA, GL_UNSIGNED_BYTE},                            // B8G8R8A8_UNORM
 | ||||
|     {GL_RGBA32F, GL_RGBA, GL_FLOAT},                                  // R32G32B32A32_FLOAT
 | ||||
|     {GL_RGBA32I, GL_RGBA_INTEGER, GL_INT},                            // R32G32B32A32_SINT
 | ||||
|     {GL_RG32F, GL_RG, GL_FLOAT},                                      // R32G32_FLOAT
 | ||||
|     {GL_RG32I, GL_RG_INTEGER, GL_INT},                                // R32G32_SINT
 | ||||
|     {GL_R32F, GL_RED, GL_FLOAT},                                      // R32_FLOAT
 | ||||
|     {GL_R16F, GL_RED, GL_HALF_FLOAT},                                 // R16_FLOAT
 | ||||
|     {GL_R16, GL_RED, GL_UNSIGNED_SHORT},                              // R16_UNORM
 | ||||
|     {GL_R16_SNORM, GL_RED, GL_SHORT},                                 // R16_SNORM
 | ||||
|     {GL_R16UI, GL_RED_INTEGER, GL_UNSIGNED_SHORT},                    // R16_UINT
 | ||||
|     {GL_R16I, GL_RED_INTEGER, GL_SHORT},                              // R16_SINT
 | ||||
|     {GL_RG16, GL_RG, GL_UNSIGNED_SHORT},                              // R16G16_UNORM
 | ||||
|     {GL_RG16F, GL_RG, GL_HALF_FLOAT},                                 // R16G16_FLOAT
 | ||||
|     {GL_RG16UI, GL_RG_INTEGER, GL_UNSIGNED_SHORT},                    // R16G16_UINT
 | ||||
|     {GL_RG16I, GL_RG_INTEGER, GL_SHORT},                              // R16G16_SINT
 | ||||
|     {GL_RG16_SNORM, GL_RG, GL_SHORT},                                 // R16G16_SNORM
 | ||||
|     {GL_RGB32F, GL_RGB, GL_FLOAT},                                    // R32G32B32_FLOAT
 | ||||
|     {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},          // A8B8G8R8_SRGB
 | ||||
|     {GL_RG8, GL_RG, GL_UNSIGNED_BYTE},                                // R8G8_UNORM
 | ||||
|     {GL_RG8_SNORM, GL_RG, GL_BYTE},                                   // R8G8_SNORM
 | ||||
|     {GL_RG8I, GL_RG_INTEGER, GL_BYTE},                                // R8G8_SINT
 | ||||
|     {GL_RG8UI, GL_RG_INTEGER, GL_UNSIGNED_BYTE},                      // R8G8_UINT
 | ||||
|     {GL_RG32UI, GL_RG_INTEGER, GL_UNSIGNED_INT},                      // R32G32_UINT
 | ||||
|     {GL_RGB16F, GL_RGBA, GL_HALF_FLOAT},                              // R16G16B16X16_FLOAT
 | ||||
|     {GL_R32UI, GL_RED_INTEGER, GL_UNSIGNED_INT},                      // R32_UINT
 | ||||
|     {GL_R32I, GL_RED_INTEGER, GL_INT},                                // R32_SINT
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_8x8_KHR},                                // ASTC_2D_8X8_UNORM
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_8x5_KHR},                                // ASTC_2D_8X5_UNORM
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_5x4_KHR},                                // ASTC_2D_5X4_UNORM
 | ||||
|     {GL_SRGB8_ALPHA8, GL_RGBA, GL_UNSIGNED_BYTE},                     // B8G8R8A8_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT},                         // BC1_RGBA_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT},                         // BC2_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT},                         // BC3_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM},                            // BC7_SRGB
 | ||||
|     {GL_RGBA4, GL_RGBA, GL_UNSIGNED_SHORT_4_4_4_4_REV},               // A4B4G4R4_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR},                        // ASTC_2D_4X4_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR},                        // ASTC_2D_8X8_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR},                        // ASTC_2D_8X5_SRGB
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR},                        // ASTC_2D_5X4_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_5x5_KHR},                                // ASTC_2D_5X5_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR},                        // ASTC_2D_5X5_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_10x8_KHR},                               // ASTC_2D_10X8_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR},                       // ASTC_2D_10X8_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_6x6_KHR},                                // ASTC_2D_6X6_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR},                        // ASTC_2D_6X6_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_10x10_KHR},                              // ASTC_2D_10X10_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR},                      // ASTC_2D_10X10_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_12x12_KHR},                              // ASTC_2D_12X12_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR},                      // ASTC_2D_12X12_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_8x6_KHR},                                // ASTC_2D_8X6_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR},                        // ASTC_2D_8X6_SRGB
 | ||||
|     {GL_COMPRESSED_RGBA_ASTC_6x5_KHR},                                // ASTC_2D_6X5_UNORM
 | ||||
|     {GL_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR},                        // ASTC_2D_6X5_SRGB
 | ||||
|     {GL_RGB9_E5, GL_RGB, GL_UNSIGNED_INT_5_9_9_9_REV},                // E5B9G9R9_FLOAT
 | ||||
|     {GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_FLOAT},            // D32_FLOAT
 | ||||
|     {GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT},    // D16_UNORM
 | ||||
|     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // D24_UNORM_S8_UINT
 | ||||
|     {GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8},    // S8_UINT_D24_UNORM
 | ||||
|     {GL_DEPTH32F_STENCIL8, GL_DEPTH_STENCIL, | ||||
|      GL_FLOAT_32_UNSIGNED_INT_24_8_REV}, // D32_FLOAT_S8_UINT
 | ||||
| }}; | ||||
| 
 | ||||
| inline const FormatTuple& GetFormatTuple(VideoCore::Surface::PixelFormat pixel_format) { | ||||
|     ASSERT(static_cast<size_t>(pixel_format) < FORMAT_TABLE.size()); | ||||
|     return FORMAT_TABLE[static_cast<size_t>(pixel_format)]; | ||||
| } | ||||
| 
 | ||||
| inline GLenum VertexFormat(Maxwell::VertexAttribute attrib) { | ||||
|     switch (attrib.type) { | ||||
|     case Maxwell::VertexAttribute::Type::UnsignedNorm: | ||||
|  |  | |||
|  | @ -130,7 +130,6 @@ RendererOpenGL::RendererOpenGL(Core::TelemetrySession& telemetry_session_, | |||
|                                std::unique_ptr<Core::Frontend::GraphicsContext> context_) | ||||
|     : RendererBase{emu_window_, std::move(context_)}, telemetry_session{telemetry_session_}, | ||||
|       emu_window{emu_window_}, cpu_memory{cpu_memory_}, gpu{gpu_}, state_tracker{gpu}, | ||||
|       program_manager{device}, | ||||
|       rasterizer(emu_window, gpu, cpu_memory, device, screen_info, program_manager, state_tracker) { | ||||
|     if (Settings::values.renderer_debug && GLAD_GL_KHR_debug) { | ||||
|         glEnable(GL_DEBUG_OUTPUT); | ||||
|  | @ -236,12 +235,7 @@ void RendererOpenGL::InitOpenGLObjects() { | |||
|     OGLShader fragment_shader; | ||||
|     fragment_shader.Create(HostShaders::OPENGL_PRESENT_FRAG, GL_FRAGMENT_SHADER); | ||||
| 
 | ||||
|     vertex_program.Create(true, false, vertex_shader.handle); | ||||
|     fragment_program.Create(true, false, fragment_shader.handle); | ||||
| 
 | ||||
|     pipeline.Create(); | ||||
|     glUseProgramStages(pipeline.handle, GL_VERTEX_SHADER_BIT, vertex_program.handle); | ||||
|     glUseProgramStages(pipeline.handle, GL_FRAGMENT_SHADER_BIT, fragment_program.handle); | ||||
|     present_program.Create(false, false, vertex_shader.handle, fragment_shader.handle); | ||||
| 
 | ||||
|     // Generate presentation sampler
 | ||||
|     present_sampler.Create(); | ||||
|  | @ -342,8 +336,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
|     // Set projection matrix
 | ||||
|     const std::array ortho_matrix = | ||||
|         MakeOrthographicMatrix(static_cast<float>(layout.width), static_cast<float>(layout.height)); | ||||
|     glProgramUniformMatrix3x2fv(vertex_program.handle, ModelViewMatrixLocation, 1, GL_FALSE, | ||||
|                                 std::data(ortho_matrix)); | ||||
|     program_manager.BindProgram(present_program.handle); | ||||
|     glUniformMatrix3x2fv(ModelViewMatrixLocation, 1, GL_FALSE, ortho_matrix.data()); | ||||
| 
 | ||||
|     const auto& texcoords = screen_info.display_texcoords; | ||||
|     auto left = texcoords.left; | ||||
|  | @ -404,8 +398,6 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
|     state_tracker.NotifyClipControl(); | ||||
|     state_tracker.NotifyAlphaTest(); | ||||
| 
 | ||||
|     program_manager.BindHostPipeline(pipeline.handle); | ||||
| 
 | ||||
|     state_tracker.ClipControl(GL_LOWER_LEFT, GL_ZERO_TO_ONE); | ||||
|     glEnable(GL_CULL_FACE); | ||||
|     if (screen_info.display_srgb) { | ||||
|  | @ -453,7 +445,8 @@ void RendererOpenGL::DrawScreen(const Layout::FramebufferLayout& layout) { | |||
|     glClear(GL_COLOR_BUFFER_BIT); | ||||
|     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); | ||||
| 
 | ||||
|     program_manager.RestoreGuestPipeline(); | ||||
|     // TODO
 | ||||
|     // program_manager.RestoreGuestPipeline();
 | ||||
| } | ||||
| 
 | ||||
| void RendererOpenGL::RenderScreenshot() { | ||||
|  |  | |||
|  | @ -12,7 +12,6 @@ | |||
| #include "video_core/renderer_opengl/gl_device.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_state_tracker.h" | ||||
| 
 | ||||
| namespace Core { | ||||
|  | @ -111,9 +110,7 @@ private: | |||
|     // OpenGL object IDs
 | ||||
|     OGLSampler present_sampler; | ||||
|     OGLBuffer vertex_buffer; | ||||
|     OGLProgram vertex_program; | ||||
|     OGLProgram fragment_program; | ||||
|     OGLPipeline pipeline; | ||||
|     OGLProgram present_program; | ||||
|     OGLFramebuffer screenshot_framebuffer; | ||||
| 
 | ||||
|     // GPU address of the vertex buffer
 | ||||
|  |  | |||
|  | @ -16,7 +16,6 @@ | |||
| #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | ||||
| #include "video_core/host_shaders/opengl_copy_bgra_comp.h" | ||||
| #include "video_core/host_shaders/pitch_unswizzle_comp.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_texture_cache.h" | ||||
| #include "video_core/renderer_opengl/util_shaders.h" | ||||
|  | @ -86,7 +85,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, | |||
|         .width = VideoCore::Surface::DefaultBlockWidth(image.info.format), | ||||
|         .height = VideoCore::Surface::DefaultBlockHeight(image.info.format), | ||||
|     }; | ||||
|     program_manager.BindHostCompute(astc_decoder_program.handle); | ||||
|     program_manager.BindProgram(astc_decoder_program.handle); | ||||
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||||
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_ENC_BUFFER, astc_buffer.handle); | ||||
| 
 | ||||
|  | @ -134,7 +133,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, | |||
|     static constexpr GLuint BINDING_INPUT_BUFFER = 1; | ||||
|     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||||
| 
 | ||||
|     program_manager.BindHostCompute(block_linear_unswizzle_2d_program.handle); | ||||
|     program_manager.BindProgram(block_linear_unswizzle_2d_program.handle); | ||||
|     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||||
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||||
| 
 | ||||
|  | @ -173,7 +172,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, | |||
|     static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; | ||||
| 
 | ||||
|     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||||
|     program_manager.BindHostCompute(block_linear_unswizzle_3d_program.handle); | ||||
|     program_manager.BindProgram(block_linear_unswizzle_3d_program.handle); | ||||
|     glBindBufferBase(GL_SHADER_STORAGE_BUFFER, BINDING_SWIZZLE_BUFFER, swizzle_table_buffer.handle); | ||||
| 
 | ||||
|     const GLenum store_format = StoreFormat(BytesPerBlock(image.info.format)); | ||||
|  | @ -222,7 +221,7 @@ void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, | |||
|     UNIMPLEMENTED_IF_MSG(!std::has_single_bit(bytes_per_block), | ||||
|                          "Non-power of two images are not implemented"); | ||||
| 
 | ||||
|     program_manager.BindHostCompute(pitch_unswizzle_program.handle); | ||||
|     program_manager.BindProgram(pitch_unswizzle_program.handle); | ||||
|     glFlushMappedNamedBufferRange(map.buffer, map.offset, image.guest_size_bytes); | ||||
|     glUniform2ui(LOC_ORIGIN, 0, 0); | ||||
|     glUniform2i(LOC_DESTINATION, 0, 0); | ||||
|  | @ -250,7 +249,7 @@ void UtilShaders::CopyBC4(Image& dst_image, Image& src_image, std::span<const Im | |||
|     static constexpr GLuint LOC_SRC_OFFSET = 0; | ||||
|     static constexpr GLuint LOC_DST_OFFSET = 1; | ||||
| 
 | ||||
|     program_manager.BindHostCompute(copy_bc4_program.handle); | ||||
|     program_manager.BindProgram(copy_bc4_program.handle); | ||||
| 
 | ||||
|     for (const ImageCopy& copy : copies) { | ||||
|         ASSERT(copy.src_subresource.base_layer == 0); | ||||
|  | @ -286,7 +285,7 @@ void UtilShaders::CopyBGR(Image& dst_image, Image& src_image, | |||
|         break; | ||||
|     case 4: { | ||||
|         // BGRA8 copy
 | ||||
|         program_manager.BindHostCompute(copy_bgra_program.handle); | ||||
|         program_manager.BindProgram(copy_bgra_program.handle); | ||||
|         constexpr GLenum FORMAT = GL_RGBA8; | ||||
|         for (const ImageCopy& copy : copies) { | ||||
|             ASSERT(copy.src_offset == zero_offset); | ||||
|  |  | |||
|  | @ -19,23 +19,6 @@ | |||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| struct TextureHandle { | ||||
|     explicit TextureHandle(u32 data, bool via_header_index) { | ||||
|         [[likely]] if (via_header_index) { | ||||
|             image = data; | ||||
|             sampler = data; | ||||
|         } | ||||
|         else { | ||||
|             const Tegra::Texture::TextureHandle handle{data}; | ||||
|             image = handle.tic_id; | ||||
|             sampler = via_header_index ? image : handle.tsc_id.Value(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     u32 image; | ||||
|     u32 sampler; | ||||
| }; | ||||
| 
 | ||||
| class DescriptorLayoutBuilder { | ||||
| public: | ||||
|     DescriptorLayoutBuilder(const vk::Device& device_) : device{&device_} {} | ||||
|  |  | |||
|  | @ -140,8 +140,8 @@ struct BufferCacheParams { | |||
|     static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = false; | ||||
|     static constexpr bool NEEDS_BIND_UNIFORM_INDEX = false; | ||||
|     static constexpr bool NEEDS_BIND_STORAGE_INDEX = false; | ||||
|     static constexpr bool NEEDS_BIND_TEXTURE_BUFFER_INDEX = false; | ||||
|     static constexpr bool USE_MEMORY_MAPS = true; | ||||
|     static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; | ||||
| }; | ||||
| 
 | ||||
| using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; | ||||
|  |  | |||
|  | @ -18,6 +18,9 @@ | |||
| 
 | ||||
| namespace Vulkan { | ||||
| 
 | ||||
| using Shader::ImageBufferDescriptor; | ||||
| using Tegra::Texture::TexturePair; | ||||
| 
 | ||||
| ComputePipeline::ComputePipeline(const Device& device_, DescriptorPool& descriptor_pool, | ||||
|                                  VKUpdateDescriptorQueue& update_descriptor_queue_, | ||||
|                                  Common::ThreadWorker* thread_worker, const Shader::Info& info_, | ||||
|  | @ -106,25 +109,25 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
|                                              secondary_offset}; | ||||
|                 const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||||
|                 const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||||
|                 return TextureHandle{lhs_raw | rhs_raw, via_header_index}; | ||||
|                 return TexturePair(lhs_raw | rhs_raw, via_header_index); | ||||
|             } | ||||
|         } | ||||
|         return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; | ||||
|         return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||||
|     }}; | ||||
|     const auto add_image{[&](const auto& desc) { | ||||
|         for (u32 index = 0; index < desc.count; ++index) { | ||||
|             const TextureHandle handle{read_handle(desc, index)}; | ||||
|             image_view_indices.push_back(handle.image); | ||||
|             const auto handle{read_handle(desc, index)}; | ||||
|             image_view_indices.push_back(handle.first); | ||||
|         } | ||||
|     }}; | ||||
|     std::ranges::for_each(info.texture_buffer_descriptors, add_image); | ||||
|     std::ranges::for_each(info.image_buffer_descriptors, add_image); | ||||
|     for (const auto& desc : info.texture_descriptors) { | ||||
|         for (u32 index = 0; index < desc.count; ++index) { | ||||
|             const TextureHandle handle{read_handle(desc, index)}; | ||||
|             image_view_indices.push_back(handle.image); | ||||
|             const auto handle{read_handle(desc, index)}; | ||||
|             image_view_indices.push_back(handle.first); | ||||
| 
 | ||||
|             Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler); | ||||
|             Sampler* const sampler = texture_cache.GetComputeSampler(handle.second); | ||||
|             samplers.push_back(sampler->Handle()); | ||||
|         } | ||||
|     } | ||||
|  | @ -137,15 +140,16 @@ void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute, | |||
|     ImageId* texture_buffer_ids{image_view_ids.data()}; | ||||
|     size_t index{}; | ||||
|     const auto add_buffer{[&](const auto& desc) { | ||||
|         constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||||
|         for (u32 i = 0; i < desc.count; ++i) { | ||||
|             bool is_written{false}; | ||||
|             if constexpr (std::is_same_v<decltype(desc), const Shader::ImageBufferDescriptor&>) { | ||||
|             if constexpr (is_image) { | ||||
|                 is_written = desc.is_written; | ||||
|             } | ||||
|             ImageView& image_view = texture_cache.GetImageView(*texture_buffer_ids); | ||||
|             buffer_cache.BindComputeTextureBuffer(index, image_view.GpuAddr(), | ||||
|                                                   image_view.BufferSize(), image_view.format, | ||||
|                                                   is_written); | ||||
|                                                   is_written, is_image); | ||||
|             ++texture_buffer_ids; | ||||
|             ++index; | ||||
|         } | ||||
|  |  | |||
|  | @ -19,7 +19,7 @@ | |||
| #include "video_core/renderer_vulkan/vk_update_descriptor.h" | ||||
| #include "video_core/vulkan_common/vulkan_device.h" | ||||
| 
 | ||||
| #ifdef _MSC_VER | ||||
| #if defined(_MSC_VER) && defined(NDEBUG) | ||||
| #define LAMBDA_FORCEINLINE [[msvc::forceinline]] | ||||
| #else | ||||
| #define LAMBDA_FORCEINLINE | ||||
|  | @ -30,6 +30,7 @@ namespace { | |||
| using boost::container::small_vector; | ||||
| using boost::container::static_vector; | ||||
| using Shader::ImageBufferDescriptor; | ||||
| using Tegra::Texture::TexturePair; | ||||
| using VideoCore::Surface::PixelFormat; | ||||
| using VideoCore::Surface::PixelFormatFromDepthFormat; | ||||
| using VideoCore::Surface::PixelFormatFromRenderTargetFormat; | ||||
|  | @ -289,15 +290,15 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
|                     const u32 lhs_raw{gpu_memory.Read<u32>(addr)}; | ||||
|                     const u32 rhs_raw{gpu_memory.Read<u32>(separate_addr)}; | ||||
|                     const u32 raw{lhs_raw | rhs_raw}; | ||||
|                     return TextureHandle{raw, via_header_index}; | ||||
|                     return TexturePair(raw, via_header_index); | ||||
|                 } | ||||
|             } | ||||
|             return TextureHandle{gpu_memory.Read<u32>(addr), via_header_index}; | ||||
|             return TexturePair(gpu_memory.Read<u32>(addr), via_header_index); | ||||
|         }}; | ||||
|         const auto add_image{[&](const auto& desc) { | ||||
|             for (u32 index = 0; index < desc.count; ++index) { | ||||
|                 const TextureHandle handle{read_handle(desc, index)}; | ||||
|                 image_view_indices[image_index++] = handle.image; | ||||
|                 const auto handle{read_handle(desc, index)}; | ||||
|                 image_view_indices[image_index++] = handle.first; | ||||
|             } | ||||
|         }}; | ||||
|         if constexpr (Spec::has_texture_buffers) { | ||||
|  | @ -312,10 +313,10 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
|         } | ||||
|         for (const auto& desc : info.texture_descriptors) { | ||||
|             for (u32 index = 0; index < desc.count; ++index) { | ||||
|                 const TextureHandle handle{read_handle(desc, index)}; | ||||
|                 image_view_indices[image_index++] = handle.image; | ||||
|                 const auto handle{read_handle(desc, index)}; | ||||
|                 image_view_indices[image_index++] = handle.first; | ||||
| 
 | ||||
|                 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.sampler)}; | ||||
|                 Sampler* const sampler{texture_cache.GetGraphicsSampler(handle.second)}; | ||||
|                 samplers[sampler_index++] = sampler->Handle(); | ||||
|             } | ||||
|         } | ||||
|  | @ -347,15 +348,16 @@ void GraphicsPipeline::ConfigureImpl(bool is_indexed) { | |||
|     const auto bind_stage_info{[&](size_t stage) LAMBDA_FORCEINLINE { | ||||
|         size_t index{}; | ||||
|         const auto add_buffer{[&](const auto& desc) { | ||||
|             constexpr bool is_image = std::is_same_v<decltype(desc), const ImageBufferDescriptor&>; | ||||
|             for (u32 i = 0; i < desc.count; ++i) { | ||||
|                 bool is_written{false}; | ||||
|                 if constexpr (std::is_same_v<decltype(desc), const ImageBufferDescriptor&>) { | ||||
|                 if constexpr (is_image) { | ||||
|                     is_written = desc.is_written; | ||||
|                 } | ||||
|                 ImageView& image_view{texture_cache.GetImageView(*texture_buffer_index)}; | ||||
|                 buffer_cache.BindGraphicsTextureBuffer(stage, index, image_view.GpuAddr(), | ||||
|                                                        image_view.BufferSize(), image_view.format, | ||||
|                                                        is_written); | ||||
|                                                        is_written, is_image); | ||||
|                 ++index; | ||||
|                 ++texture_buffer_index; | ||||
|             } | ||||
|  |  | |||
|  | @ -342,28 +342,15 @@ std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline( | |||
| } | ||||
| 
 | ||||
| std::unique_ptr<GraphicsPipeline> PipelineCache::CreateGraphicsPipeline() { | ||||
|     GraphicsEnvironments environments; | ||||
|     GetGraphicsEnvironments(environments, graphics_key.unique_hashes); | ||||
| 
 | ||||
|     main_pools.ReleaseContents(); | ||||
| 
 | ||||
|     std::array<GraphicsEnvironment, Maxwell::MaxShaderProgram> graphics_envs; | ||||
|     boost::container::static_vector<Shader::Environment*, Maxwell::MaxShaderProgram> envs; | ||||
| 
 | ||||
|     const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | ||||
|     for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         if (graphics_key.unique_hashes[index] == 0) { | ||||
|             continue; | ||||
|         } | ||||
|         const auto program{static_cast<Maxwell::ShaderProgram>(index)}; | ||||
|         auto& env{graphics_envs[index]}; | ||||
|         const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | ||||
|         env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||||
|         env.SetCachedSize(shader_infos[index]->size_bytes); | ||||
|         envs.push_back(&env); | ||||
|     } | ||||
|     auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, MakeSpan(envs), true)}; | ||||
|     auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(), true)}; | ||||
|     if (pipeline_cache_filename.empty()) { | ||||
|         return pipeline; | ||||
|     } | ||||
|     serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(graphics_envs)] { | ||||
|     serialization_thread.QueueWork([this, key = graphics_key, envs = std::move(environments.envs)] { | ||||
|         boost::container::static_vector<const GenericEnvironment*, Maxwell::MaxShaderProgram> | ||||
|             env_ptrs; | ||||
|         for (size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|  |  | |||
|  | @ -96,17 +96,6 @@ VkRect2D GetScissorState(const Maxwell& regs, size_t index) { | |||
|     return scissor; | ||||
| } | ||||
| 
 | ||||
| struct TextureHandle { | ||||
|     constexpr TextureHandle(u32 data, bool via_header_index) { | ||||
|         const Tegra::Texture::TextureHandle handle{data}; | ||||
|         image = handle.tic_id; | ||||
|         sampler = via_header_index ? image : handle.tsc_id.Value(); | ||||
|     } | ||||
| 
 | ||||
|     u32 image; | ||||
|     u32 sampler; | ||||
| }; | ||||
| 
 | ||||
| DrawParams MakeDrawParams(const Maxwell& regs, u32 num_instances, bool is_instanced, | ||||
|                           bool is_indexed) { | ||||
|     DrawParams params{ | ||||
|  |  | |||
|  | @ -91,6 +91,23 @@ const ShaderInfo* ShaderCache::ComputeShader() { | |||
|     return MakeShaderInfo(env, *cpu_shader_addr); | ||||
| } | ||||
| 
 | ||||
| void ShaderCache::GetGraphicsEnvironments(GraphicsEnvironments& result, | ||||
|                                           const std::array<u64, NUM_PROGRAMS>& unique_hashes) { | ||||
|     size_t env_index{}; | ||||
|     const GPUVAddr base_addr{maxwell3d.regs.code_address.CodeAddress()}; | ||||
|     for (size_t index = 0; index < NUM_PROGRAMS; ++index) { | ||||
|         if (unique_hashes[index] == 0) { | ||||
|             continue; | ||||
|         } | ||||
|         const auto program{static_cast<Tegra::Engines::Maxwell3D::Regs::ShaderProgram>(index)}; | ||||
|         auto& env{result.envs[index]}; | ||||
|         const u32 start_address{maxwell3d.regs.shader_config[index].offset}; | ||||
|         env = GraphicsEnvironment{maxwell3d, gpu_memory, program, base_addr, start_address}; | ||||
|         env.SetCachedSize(shader_infos[index]->size_bytes); | ||||
|         result.env_ptrs[env_index++] = &env; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| ShaderInfo* ShaderCache::TryGet(VAddr addr) const { | ||||
|     std::scoped_lock lock{lookup_mutex}; | ||||
| 
 | ||||
|  |  | |||
|  | @ -4,14 +4,18 @@ | |||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| #include <span> | ||||
| #include <unordered_map> | ||||
| #include <utility> | ||||
| #include <vector> | ||||
| 
 | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/shader_environment.h" | ||||
| 
 | ||||
| namespace Tegra { | ||||
| class MemoryManager; | ||||
|  | @ -30,6 +34,8 @@ class ShaderCache { | |||
|     static constexpr u64 PAGE_BITS = 14; | ||||
|     static constexpr u64 PAGE_SIZE = u64(1) << PAGE_BITS; | ||||
| 
 | ||||
|     static constexpr size_t NUM_PROGRAMS = 6; | ||||
| 
 | ||||
|     struct Entry { | ||||
|         VAddr addr_start; | ||||
|         VAddr addr_end; | ||||
|  | @ -58,6 +64,15 @@ public: | |||
|     void SyncGuestHost(); | ||||
| 
 | ||||
| protected: | ||||
|     struct GraphicsEnvironments { | ||||
|         std::array<GraphicsEnvironment, NUM_PROGRAMS> envs; | ||||
|         std::array<Shader::Environment*, NUM_PROGRAMS> env_ptrs; | ||||
| 
 | ||||
|         std::span<Shader::Environment* const> Span() const noexcept { | ||||
|             return std::span(env_ptrs.begin(), std::ranges::find(env_ptrs, nullptr)); | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     explicit ShaderCache(VideoCore::RasterizerInterface& rasterizer_, | ||||
|                          Tegra::MemoryManager& gpu_memory_, Tegra::Engines::Maxwell3D& maxwell3d_, | ||||
|                          Tegra::Engines::KeplerCompute& kepler_compute_); | ||||
|  | @ -65,17 +80,21 @@ protected: | |||
|     /// @brief Update the hashes and information of shader stages
 | ||||
|     /// @param unique_hashes Shader hashes to store into when a stage is enabled
 | ||||
|     /// @return True no success, false on error
 | ||||
|     bool RefreshStages(std::array<u64, 6>& unique_hashes); | ||||
|     bool RefreshStages(std::array<u64, NUM_PROGRAMS>& unique_hashes); | ||||
| 
 | ||||
|     /// @brief Returns information about the current compute shader
 | ||||
|     /// @return Pointer to a valid shader, nullptr on error
 | ||||
|     const ShaderInfo* ComputeShader(); | ||||
| 
 | ||||
|     /// @brief Collect the current graphics environments
 | ||||
|     void GetGraphicsEnvironments(GraphicsEnvironments& result, | ||||
|                                  const std::array<u64, NUM_PROGRAMS>& unique_hashes); | ||||
| 
 | ||||
|     Tegra::MemoryManager& gpu_memory; | ||||
|     Tegra::Engines::Maxwell3D& maxwell3d; | ||||
|     Tegra::Engines::KeplerCompute& kepler_compute; | ||||
| 
 | ||||
|     std::array<const ShaderInfo*, 6> shader_infos{}; | ||||
|     std::array<const ShaderInfo*, NUM_PROGRAMS> shader_infos{}; | ||||
|     bool last_shaders_valid = false; | ||||
| 
 | ||||
| private: | ||||
|  |  | |||
|  | @ -187,8 +187,8 @@ std::optional<u64> GenericEnvironment::TryFindSize() { | |||
| 
 | ||||
| Shader::TextureType GenericEnvironment::ReadTextureTypeImpl(GPUVAddr tic_addr, u32 tic_limit, | ||||
|                                                             bool via_header_index, u32 raw) { | ||||
|     const TextureHandle handle{raw, via_header_index}; | ||||
|     const GPUVAddr descriptor_addr{tic_addr + handle.image * sizeof(Tegra::Texture::TICEntry)}; | ||||
|     const auto handle{Tegra::Texture::TexturePair(raw, via_header_index)}; | ||||
|     const GPUVAddr descriptor_addr{tic_addr + handle.first * sizeof(Tegra::Texture::TICEntry)}; | ||||
|     Tegra::Texture::TICEntry entry; | ||||
|     gpu_memory->ReadBlock(descriptor_addr, &entry, sizeof(entry)); | ||||
|     const Shader::TextureType result{ConvertType(entry)}; | ||||
|  |  | |||
|  | @ -29,22 +29,6 @@ class Memorymanager; | |||
| 
 | ||||
| namespace VideoCommon { | ||||
| 
 | ||||
| struct TextureHandle { | ||||
|     explicit TextureHandle(u32 data, bool via_header_index) { | ||||
|         if (via_header_index) { | ||||
|             image = data; | ||||
|             sampler = data; | ||||
|         } else { | ||||
|             const Tegra::Texture::TextureHandle handle{data}; | ||||
|             image = handle.tic_id; | ||||
|             sampler = via_header_index ? image : handle.tsc_id.Value(); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     u32 image; | ||||
|     u32 sampler; | ||||
| }; | ||||
| 
 | ||||
| class GenericEnvironment : public Shader::Environment { | ||||
| public: | ||||
|     explicit GenericEnvironment() = default; | ||||
|  |  | |||
|  | @ -43,7 +43,7 @@ std::string Name(const ImageBase& image) { | |||
|     return "Invalid"; | ||||
| } | ||||
| 
 | ||||
| std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> type) { | ||||
| std::string Name(const ImageViewBase& image_view) { | ||||
|     const u32 width = image_view.size.width; | ||||
|     const u32 height = image_view.size.height; | ||||
|     const u32 depth = image_view.size.depth; | ||||
|  | @ -51,7 +51,7 @@ std::string Name(const ImageViewBase& image_view, std::optional<ImageViewType> t | |||
|     const u32 num_layers = image_view.range.extent.layers; | ||||
| 
 | ||||
|     const std::string level = num_levels > 1 ? fmt::format(":{}", num_levels) : ""; | ||||
|     switch (type.value_or(image_view.type)) { | ||||
|     switch (image_view.type) { | ||||
|     case ImageViewType::e1D: | ||||
|         return fmt::format("ImageView 1D {}{}", width, level); | ||||
|     case ImageViewType::e2D: | ||||
|  |  | |||
|  | @ -255,8 +255,7 @@ struct RenderTargets; | |||
| 
 | ||||
| [[nodiscard]] std::string Name(const ImageBase& image); | ||||
| 
 | ||||
| [[nodiscard]] std::string Name(const ImageViewBase& image_view, | ||||
|                                std::optional<ImageViewType> type = std::nullopt); | ||||
| [[nodiscard]] std::string Name(const ImageViewBase& image_view); | ||||
| 
 | ||||
| [[nodiscard]] std::string Name(const RenderTargets& render_targets); | ||||
| 
 | ||||
|  |  | |||
|  | @ -154,6 +154,15 @@ union TextureHandle { | |||
| }; | ||||
| static_assert(sizeof(TextureHandle) == 4, "TextureHandle has wrong size"); | ||||
| 
 | ||||
| [[nodiscard]] inline std::pair<u32, u32> TexturePair(u32 raw, bool via_header_index) { | ||||
|     if (via_header_index) { | ||||
|         return {raw, raw}; | ||||
|     } else { | ||||
|         const Tegra::Texture::TextureHandle handle{raw}; | ||||
|         return {handle.tic_id, via_header_index ? handle.tic_id : handle.tsc_id}; | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| struct TICEntry { | ||||
|     union { | ||||
|         struct { | ||||
|  |  | |||
|  | @ -282,7 +282,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR | |||
|     VkPhysicalDevice16BitStorageFeaturesKHR bit16_storage{ | ||||
|         .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR, | ||||
|         .pNext = nullptr, | ||||
|         .storageBuffer16BitAccess = false, | ||||
|         .storageBuffer16BitAccess = true, | ||||
|         .uniformAndStorageBuffer16BitAccess = true, | ||||
|         .storagePushConstant16 = false, | ||||
|         .storageInputOutput16 = false, | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ReinUsesLisp
						ReinUsesLisp