From 460c7829cf3aabdd601f1f8ac3ea8b3fa4e9f7e5 Mon Sep 17 00:00:00 2001 From: wildcard Date: Thu, 25 Sep 2025 12:23:24 +0200 Subject: [PATCH] [Vulkan] speed up bank lookup with LRU cache, batch growth to 32 Adds a tiny 8 entry LRU cache(should be tested for optimal number) Increase batch growth to 32 (should be tested to determine the difference between 16 and 32) --- .../renderer_vulkan/vk_descriptor_pool.cpp | 76 +++++++++++++++++-- .../renderer_vulkan/vk_descriptor_pool.h | 11 ++- 2 files changed, 79 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp index 600003953d..53e36f8129 100644 --- a/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_descriptor_pool.cpp @@ -20,7 +20,7 @@ namespace Vulkan { // Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines -constexpr size_t SETS_GROW_RATE = 16; +constexpr size_t SETS_GROW_RATE = 32; //test difference between 16 and 32 constexpr s32 SCORE_THRESHOLD = 3; struct DescriptorBank { @@ -29,9 +29,12 @@ struct DescriptorBank { }; bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept { - return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers && - texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers && - textures >= subset.textures && images >= subset.image_buffers; + return uniform_buffers >= subset.uniform_buffers && + storage_buffers >= subset.storage_buffers && + texture_buffers >= subset.texture_buffers && + image_buffers >= subset.image_buffers && + textures >= subset.textures && + images >= subset.images; } template @@ -45,6 +48,19 @@ static u32 Accumulate(const Descriptors& descriptors) { static DescriptorBankInfo MakeBankInfo(std::span infos) { DescriptorBankInfo bank; + if (infos.size() == 1) { + const auto& info = infos.front(); + const auto acc = [](const auto& ds){ u32 c=0; for (const auto& d: ds) c+=d.count; return c; }; + bank.uniform_buffers += acc(info.constant_buffer_descriptors); + bank.storage_buffers += acc(info.storage_buffers_descriptors); + bank.texture_buffers += acc(info.texture_buffer_descriptors); + bank.image_buffers += acc(info.image_buffer_descriptors); + bank.textures += acc(info.texture_descriptors); + bank.images += acc(info.image_descriptors); + bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers + + bank.image_buffers + bank.textures + bank.images; + return bank; + } for (const Shader::Info& info : infos) { bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors); bank.storage_buffers += Accumulate(info.storage_buffers_descriptors); @@ -102,13 +118,22 @@ void DescriptorAllocator::Allocate(size_t begin, size_t end) { } vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) { - const std::vector layouts(count, layout); + std::array stack{}; + const VkDescriptorSetLayout* p_layouts = nullptr; + std::vector heap; + if (count <= stack.size()) { + stack.fill(layout); + p_layouts = stack.data(); + } else { + heap.assign(count, layout); + p_layouts = heap.data(); + } VkDescriptorSetAllocateInfo allocate_info{ .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, .pNext = nullptr, .descriptorPool = *bank->pools.back(), .descriptorSetCount = static_cast(count), - .pSetLayouts = layouts.data(), + .pSetLayouts = p_layouts, }; vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info); if (!new_sets.IsOutOfPoolMemory()) { @@ -146,21 +171,58 @@ DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout, } DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) { + { + std::scoped_lock lk(cache_mutex); + DescriptorBank* best = nullptr; u64 best_stamp = 0; + for (const auto& e : cache_) { + if (!e.bank) continue; + if (std::abs(e.info.score - reqs.score) < SCORE_THRESHOLD && e.info.IsSuperset(reqs)) { + if (e.stamp >= best_stamp) { best_stamp = e.stamp; best = e.bank; } + } + } + if (best) return *best; + } std::shared_lock read_lock{banks_mutex}; const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) { return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs); }); if (it != bank_infos.end()) { - return *banks[std::distance(bank_infos.begin(), it)].get(); + DescriptorBank& found = *banks[std::distance(bank_infos.begin(), it)].get(); + read_lock.unlock(); + // update cache + std::scoped_lock lk(cache_mutex); + size_t victim = 0; u64 oldest = UINT64_MAX; + for (size_t i=0;i()); bank.info = reqs; AllocatePool(device, bank); + // update cache + { + std::scoped_lock lk(cache_mutex); + size_t victim = 0; u64 oldest = UINT64_MAX; + for (size_t i=0;i #include #include - +#include +#include #include "shader_recompiler/shader_info.h" #include "video_core/renderer_vulkan/vk_resource_pool.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -75,6 +76,14 @@ public: private: DescriptorBank& Bank(const DescriptorBankInfo& reqs); + struct CacheEntry { + DescriptorBankInfo info{}; + DescriptorBank* bank{nullptr}; + u64 stamp{0}; + }; + std::mutex cache_mutex{}; + std::array cache_{}; //test and then adjust + u64 cache_tick_{0}; const Device& device; MasterSemaphore& master_semaphore;