From 6544cdd4466797d575284178d2d4d839ecdce474 Mon Sep 17 00:00:00 2001 From: wildcard Date: Wed, 8 Oct 2025 18:58:20 +0200 Subject: [PATCH] [Vulkan] Introduce Ring buffers for UB Create 3 ring buffers which rotates between buffers each frame to avoid GPU/CPU conflicts BindMappedUniformBuffer first tries to allocate from the ring buffer and falls back to staging pool only if allocation is too large. Note to testers:- please test the performance since it is primarily a performance optimization and also look for visual bugs. --- .../renderer_vulkan/vk_buffer_cache.cpp | 42 +++++++++++++++++++ .../renderer_vulkan/vk_buffer_cache.h | 29 ++++++++++++- 2 files changed, 69 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 55565e3d79..399b61f0a4 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -337,6 +337,11 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m uint8_pass = std::make_unique(device, scheduler, descriptor_pool, staging_pool, compute_pass_descriptor_queue); } + const u32 ubo_align = static_cast( + device.GetUniformBufferAlignment() /* if you have it */ + ); + // add the ability to change the size in settings in future + uniform_ring.Init(device, memory_allocator, 8 * 1024 * 1024 /* 8 MiB */, ubo_align ? ubo_align : 256); quad_array_index_buffer = std::make_shared(device_, memory_allocator_, scheduler_, staging_pool_); quad_strip_index_buffer = std::make_shared(device_, memory_allocator_, @@ -355,6 +360,42 @@ void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) { staging_pool.FreeDeferred(ref); } +void BufferCacheRuntime::UniformRing::Init(const Device& device, + MemoryAllocator& alloc, + u64 bytes, u32 alignment) { + for (size_t i = 0; i < NUM_FRAMES; ++i) { + VkBufferCreateInfo ci{ + .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, + .pNext = nullptr, + .flags = 0, + .size = bytes, + .usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, + .sharingMode = VK_SHARING_MODE_EXCLUSIVE, + .queueFamilyIndexCount = 0, + .pQueueFamilyIndices = nullptr, + }; + buffers[i] = alloc.CreateBuffer(ci, MemoryUsage::Upload); + mapped[i] = buffers[i].Mapped().data(); + } + size = bytes; + align = alignment ? alignment : 256; + head = 0; + current_frame = 0; +} + +std::span BufferCacheRuntime::UniformRing::Alloc(u32 bytes, u32& out_offset) { + const u64 aligned = Common::AlignUp(head, static_cast(align)); + u64 end = aligned + bytes; + + if (end > size) { + return {}; // Fallback to staging pool + } + + out_offset = static_cast(aligned); + head = end; + return {mapped[current_frame] + out_offset, bytes}; +} + u64 BufferCacheRuntime::GetDeviceLocalMemory() const { return device.GetDeviceLocalMemory(); } @@ -375,6 +416,7 @@ void BufferCacheRuntime::TickFrame(Common::SlotVector& slot_buffers) noe for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) { it->ResetUsageTracking(); } + uniform_ring.BeginFrame(); } void BufferCacheRuntime::Finish() { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index efe960258c..185259a6db 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -124,8 +124,15 @@ public: void BindTransformFeedbackBuffers(VideoCommon::HostBindings& bindings); - std::span BindMappedUniformBuffer([[maybe_unused]] size_t stage, - [[maybe_unused]] u32 binding_index, u32 size) { + std::span BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/, + [[maybe_unused]] u32 /*binding_index*/, + u32 size) { + u32 offset = 0; + if (auto span = uniform_ring.Alloc(size, offset); !span.empty()) { + BindBuffer(*uniform_ring.buffers[uniform_ring.current_frame], offset, size); + return span; + } + // Fallback for giant requests const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload); BindBuffer(ref.buffer, static_cast(ref.offset), size); return ref.mapped_span; @@ -153,6 +160,24 @@ private: void ReserveNullBuffer(); vk::Buffer CreateNullBuffer(); + struct UniformRing { + static constexpr size_t NUM_FRAMES = 3; + std::array buffers{}; + std::array mapped{}; + u64 size = 0; + u64 head = 0; + u32 align = 256; + size_t current_frame = 0; + + void Init(const Device& device, MemoryAllocator& alloc, u64 bytes, u32 alignment); + void BeginFrame() { + current_frame = (current_frame + 1) % NUM_FRAMES; + head = 0; + } + std::span Alloc(u32 bytes, u32& out_offset); + }; + UniformRing uniform_ring; + const Device& device; MemoryAllocator& memory_allocator; Scheduler& scheduler;