forked from eden-emu/eden
		
	gl_buffer_cache: Drop interop based parameter buffer workarounds
Sacrify runtime performance to avoid generating kernel exceptions on Windows due to our abusive aliasing of interop buffer objects.
This commit is contained in:
		
							parent
							
								
									2b95c137ff
								
							
						
					
					
						commit
						3da87d3f12
					
				
					 3 changed files with 45 additions and 65 deletions
				
			
		|  | @ -91,7 +91,7 @@ class BufferCache { | ||||||
|     }; |     }; | ||||||
| 
 | 
 | ||||||
| public: | public: | ||||||
|     static constexpr size_t SKIP_CACHE_SIZE = 4096; |     static constexpr u32 SKIP_CACHE_SIZE = 4096; | ||||||
| 
 | 
 | ||||||
|     explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, |     explicit BufferCache(VideoCore::RasterizerInterface& rasterizer_, | ||||||
|                          Tegra::Engines::Maxwell3D& maxwell3d_, |                          Tegra::Engines::Maxwell3D& maxwell3d_, | ||||||
|  | @ -671,7 +671,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 | ||||||
|     const VAddr cpu_addr = binding.cpu_addr; |     const VAddr cpu_addr = binding.cpu_addr; | ||||||
|     const u32 size = binding.size; |     const u32 size = binding.size; | ||||||
|     Buffer& buffer = slot_buffers[binding.buffer_id]; |     Buffer& buffer = slot_buffers[binding.buffer_id]; | ||||||
|     if (size <= runtime.SkipCacheSize() && !buffer.IsRegionGpuModified(cpu_addr, size)) { |     if (size <= SKIP_CACHE_SIZE && !buffer.IsRegionGpuModified(cpu_addr, size)) { | ||||||
|         if constexpr (IS_OPENGL) { |         if constexpr (IS_OPENGL) { | ||||||
|             if (runtime.HasFastBufferSubData()) { |             if (runtime.HasFastBufferSubData()) { | ||||||
|                 // Fast path for Nvidia
 |                 // Fast path for Nvidia
 | ||||||
|  |  | ||||||
|  | @ -36,13 +36,8 @@ Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rast | ||||||
|     buffer.Create(); |     buffer.Create(); | ||||||
|     const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); |     const std::string name = fmt::format("Buffer 0x{:x}", CpuAddr()); | ||||||
|     glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); |     glObjectLabel(GL_BUFFER, buffer.handle, static_cast<GLsizei>(name.size()), name.data()); | ||||||
|     if (runtime.device.UseAssemblyShaders()) { |  | ||||||
|         CreateMemoryObjects(runtime); |  | ||||||
|         glNamedBufferStorageMemEXT(buffer.handle, SizeBytes(), memory_commit.ExportOpenGLHandle(), |  | ||||||
|                                    memory_commit.Offset()); |  | ||||||
|     } else { |  | ||||||
|     glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); |     glNamedBufferData(buffer.handle, SizeBytes(), nullptr, GL_DYNAMIC_DRAW); | ||||||
|     } | 
 | ||||||
|     if (runtime.has_unified_vertex_buffers) { |     if (runtime.has_unified_vertex_buffers) { | ||||||
|         glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); |         glGetNamedBufferParameterui64vNV(buffer.handle, GL_BUFFER_GPU_ADDRESS_NV, &address); | ||||||
|     } |     } | ||||||
|  | @ -71,61 +66,33 @@ void Buffer::MakeResident(GLenum access) noexcept { | ||||||
|     glMakeNamedBufferResidentNV(buffer.handle, access); |     glMakeNamedBufferResidentNV(buffer.handle, access); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| GLuint Buffer::SubBuffer(u32 offset) { |  | ||||||
|     if (offset == 0) { |  | ||||||
|         return buffer.handle; |  | ||||||
|     } |  | ||||||
|     for (const auto& [sub_buffer, sub_offset] : subs) { |  | ||||||
|         if (sub_offset == offset) { |  | ||||||
|             return sub_buffer.handle; |  | ||||||
|         } |  | ||||||
|     } |  | ||||||
|     OGLBuffer sub_buffer; |  | ||||||
|     sub_buffer.Create(); |  | ||||||
|     glNamedBufferStorageMemEXT(sub_buffer.handle, SizeBytes() - offset, |  | ||||||
|                                memory_commit.ExportOpenGLHandle(), memory_commit.Offset() + offset); |  | ||||||
|     return subs.emplace_back(std::move(sub_buffer), offset).first.handle; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| void Buffer::CreateMemoryObjects(BufferCacheRuntime& runtime) { |  | ||||||
|     auto& allocator = runtime.vulkan_memory_allocator; |  | ||||||
|     auto& device = runtime.vulkan_device->GetLogical(); |  | ||||||
|     auto vulkan_buffer = device.CreateBuffer(VkBufferCreateInfo{ |  | ||||||
|         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, |  | ||||||
|         .pNext = nullptr, |  | ||||||
|         .flags = 0, |  | ||||||
|         .size = SizeBytes(), |  | ||||||
|         .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT | |  | ||||||
|                  VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | |  | ||||||
|                  VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | |  | ||||||
|                  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_INDEX_BUFFER_BIT | |  | ||||||
|                  VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, |  | ||||||
|         .sharingMode = VK_SHARING_MODE_EXCLUSIVE, |  | ||||||
|         .queueFamilyIndexCount = 0, |  | ||||||
|         .pQueueFamilyIndices = nullptr, |  | ||||||
|     }); |  | ||||||
|     const VkMemoryRequirements requirements = device.GetBufferMemoryRequirements(*vulkan_buffer); |  | ||||||
|     memory_commit = allocator->Commit(requirements, Vulkan::MemoryUsage::DeviceLocal); |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, | BufferCacheRuntime::BufferCacheRuntime(const Device& device_, const Vulkan::Device* vulkan_device_, | ||||||
|                                        Vulkan::MemoryAllocator* vulkan_memory_allocator_) |                                        Vulkan::MemoryAllocator* vulkan_memory_allocator_) | ||||||
|     : device{device_}, vulkan_device{vulkan_device_}, |     : device{device_}, vulkan_device{vulkan_device_}, | ||||||
|       vulkan_memory_allocator{vulkan_memory_allocator_}, |       vulkan_memory_allocator{vulkan_memory_allocator_}, | ||||||
|       stream_buffer{device.HasFastBufferSubData() ? std::nullopt |       has_fast_buffer_sub_data{device.HasFastBufferSubData()}, | ||||||
|                                                   : std::make_optional<StreamBuffer>()} { |       use_assembly_shaders{device.UseAssemblyShaders()}, | ||||||
|  |       has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, | ||||||
|  |       stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { | ||||||
|     GLint gl_max_attributes; |     GLint gl_max_attributes; | ||||||
|     glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); |     glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &gl_max_attributes); | ||||||
|     max_attributes = static_cast<u32>(gl_max_attributes); |     max_attributes = static_cast<u32>(gl_max_attributes); | ||||||
|     use_assembly_shaders = device.UseAssemblyShaders(); |  | ||||||
|     has_unified_vertex_buffers = device.HasVertexBufferUnifiedMemory(); |  | ||||||
| 
 |  | ||||||
|     for (auto& stage_uniforms : fast_uniforms) { |     for (auto& stage_uniforms : fast_uniforms) { | ||||||
|         for (OGLBuffer& buffer : stage_uniforms) { |         for (OGLBuffer& buffer : stage_uniforms) { | ||||||
|             buffer.Create(); |             buffer.Create(); | ||||||
|             glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); |             glNamedBufferData(buffer.handle, BufferCache::SKIP_CACHE_SIZE, nullptr, GL_STREAM_DRAW); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|  |     for (auto& stage_uniforms : copy_uniforms) { | ||||||
|  |         for (OGLBuffer& buffer : stage_uniforms) { | ||||||
|  |             buffer.Create(); | ||||||
|  |             glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||||||
|  |         } | ||||||
|  |     } | ||||||
|  |     for (OGLBuffer& buffer : copy_compute_uniforms) { | ||||||
|  |         buffer.Create(); | ||||||
|  |         glNamedBufferData(buffer.handle, 0x10'000, nullptr, GL_STREAM_COPY); | ||||||
|  |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, | ||||||
|  | @ -167,8 +134,14 @@ void BufferCacheRuntime::BindVertexBuffer(u32 index, Buffer& buffer, u32 offset, | ||||||
| void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, | void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buffer& buffer, | ||||||
|                                            u32 offset, u32 size) { |                                            u32 offset, u32 size) { | ||||||
|     if (use_assembly_shaders) { |     if (use_assembly_shaders) { | ||||||
|         const GLuint sub_buffer = buffer.SubBuffer(offset); |         GLuint handle; | ||||||
|         glBindBufferRangeNV(PABO_LUT[stage], binding_index, sub_buffer, 0, |         if (offset != 0) { | ||||||
|  |             handle = copy_uniforms[stage][binding_index].handle; | ||||||
|  |             glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); | ||||||
|  |         } else { | ||||||
|  |             handle = buffer.Handle(); | ||||||
|  |         } | ||||||
|  |         glBindBufferRangeNV(PABO_LUT[stage], binding_index, handle, 0, | ||||||
|                             static_cast<GLsizeiptr>(size)); |                             static_cast<GLsizeiptr>(size)); | ||||||
|     } else { |     } else { | ||||||
|         const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; |         const GLuint base_binding = device.GetBaseBindings(stage).uniform_buffer; | ||||||
|  | @ -181,8 +154,15 @@ void BufferCacheRuntime::BindUniformBuffer(size_t stage, u32 binding_index, Buff | ||||||
| void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, | void BufferCacheRuntime::BindComputeUniformBuffer(u32 binding_index, Buffer& buffer, u32 offset, | ||||||
|                                                   u32 size) { |                                                   u32 size) { | ||||||
|     if (use_assembly_shaders) { |     if (use_assembly_shaders) { | ||||||
|         glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, |         GLuint handle; | ||||||
|                             buffer.SubBuffer(offset), 0, static_cast<GLsizeiptr>(size)); |         if (offset != 0) { | ||||||
|  |             handle = copy_compute_uniforms[binding_index].handle; | ||||||
|  |             glCopyNamedBufferSubData(buffer.Handle(), handle, offset, 0, size); | ||||||
|  |         } else { | ||||||
|  |             handle = buffer.Handle(); | ||||||
|  |         } | ||||||
|  |         glBindBufferRangeNV(GL_COMPUTE_PROGRAM_PARAMETER_BUFFER_NV, binding_index, handle, 0, | ||||||
|  |                             static_cast<GLsizeiptr>(size)); | ||||||
|     } else { |     } else { | ||||||
|         glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), |         glBindBufferRange(GL_UNIFORM_BUFFER, binding_index, buffer.Handle(), | ||||||
|                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); |                           static_cast<GLintptr>(offset), static_cast<GLsizeiptr>(size)); | ||||||
|  |  | ||||||
|  | @ -39,8 +39,6 @@ public: | ||||||
| 
 | 
 | ||||||
|     void MakeResident(GLenum access) noexcept; |     void MakeResident(GLenum access) noexcept; | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] GLuint SubBuffer(u32 offset); |  | ||||||
| 
 |  | ||||||
|     [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { |     [[nodiscard]] GLuint64EXT HostGpuAddr() const noexcept { | ||||||
|         return address; |         return address; | ||||||
|     } |     } | ||||||
|  | @ -50,13 +48,9 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|     void CreateMemoryObjects(BufferCacheRuntime& runtime); |  | ||||||
| 
 |  | ||||||
|     GLuint64EXT address = 0; |     GLuint64EXT address = 0; | ||||||
|     Vulkan::MemoryCommit memory_commit; |  | ||||||
|     OGLBuffer buffer; |     OGLBuffer buffer; | ||||||
|     GLenum current_residency_access = GL_NONE; |     GLenum current_residency_access = GL_NONE; | ||||||
|     std::vector<std::pair<OGLBuffer, u32>> subs; |  | ||||||
| }; | }; | ||||||
| 
 | 
 | ||||||
| class BufferCacheRuntime { | class BufferCacheRuntime { | ||||||
|  | @ -127,7 +121,7 @@ public: | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     [[nodiscard]] bool HasFastBufferSubData() const noexcept { |     [[nodiscard]] bool HasFastBufferSubData() const noexcept { | ||||||
|         return device.HasFastBufferSubData(); |         return has_fast_buffer_sub_data; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
| private: | private: | ||||||
|  | @ -140,16 +134,22 @@ private: | ||||||
|     const Device& device; |     const Device& device; | ||||||
|     const Vulkan::Device* vulkan_device; |     const Vulkan::Device* vulkan_device; | ||||||
|     Vulkan::MemoryAllocator* vulkan_memory_allocator; |     Vulkan::MemoryAllocator* vulkan_memory_allocator; | ||||||
|     std::optional<StreamBuffer> stream_buffer; | 
 | ||||||
|  |     bool has_fast_buffer_sub_data = false; | ||||||
|  |     bool use_assembly_shaders = false; | ||||||
|  |     bool has_unified_vertex_buffers = false; | ||||||
| 
 | 
 | ||||||
|     u32 max_attributes = 0; |     u32 max_attributes = 0; | ||||||
| 
 | 
 | ||||||
|     bool use_assembly_shaders = false; |     std::optional<StreamBuffer> stream_buffer; | ||||||
|     bool has_unified_vertex_buffers = false; |  | ||||||
| 
 | 
 | ||||||
|     std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, |     std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||||||
|                VideoCommon::NUM_STAGES> |                VideoCommon::NUM_STAGES> | ||||||
|         fast_uniforms; |         fast_uniforms; | ||||||
|  |     std::array<std::array<OGLBuffer, VideoCommon::NUM_GRAPHICS_UNIFORM_BUFFERS>, | ||||||
|  |                VideoCommon::NUM_STAGES> | ||||||
|  |         copy_uniforms; | ||||||
|  |     std::array<OGLBuffer, VideoCommon::NUM_COMPUTE_UNIFORM_BUFFERS> copy_compute_uniforms; | ||||||
| 
 | 
 | ||||||
|     u32 index_buffer_offset = 0; |     u32 index_buffer_offset = 0; | ||||||
| }; | }; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 ReinUsesLisp
						ReinUsesLisp