[Vulkan] speed up bank lookup with LRU cache, batch growth to 32
Some checks failed
eden-license / license-header (pull_request) Failing after 18s

Adds a tiny 8 entry LRU cache(should be tested for optimal number)
Increase batch growth to 32 (should be tested to determine the difference between 16 and 32)
This commit is contained in:
wildcard 2025-09-25 12:23:24 +02:00
parent 42280f34d6
commit 460c7829cf
2 changed files with 79 additions and 8 deletions

View file

@ -20,7 +20,7 @@
namespace Vulkan {
// Prefer small grow rates to avoid saturating the descriptor pool with barely used pipelines
constexpr size_t SETS_GROW_RATE = 16;
constexpr size_t SETS_GROW_RATE = 32; //test difference between 16 and 32
constexpr s32 SCORE_THRESHOLD = 3;
struct DescriptorBank {
@ -29,9 +29,12 @@ struct DescriptorBank {
};
bool DescriptorBankInfo::IsSuperset(const DescriptorBankInfo& subset) const noexcept {
return uniform_buffers >= subset.uniform_buffers && storage_buffers >= subset.storage_buffers &&
texture_buffers >= subset.texture_buffers && image_buffers >= subset.image_buffers &&
textures >= subset.textures && images >= subset.image_buffers;
return uniform_buffers >= subset.uniform_buffers &&
storage_buffers >= subset.storage_buffers &&
texture_buffers >= subset.texture_buffers &&
image_buffers >= subset.image_buffers &&
textures >= subset.textures &&
images >= subset.images;
}
template <typename Descriptors>
@ -45,6 +48,19 @@ static u32 Accumulate(const Descriptors& descriptors) {
static DescriptorBankInfo MakeBankInfo(std::span<const Shader::Info> infos) {
DescriptorBankInfo bank;
if (infos.size() == 1) {
const auto& info = infos.front();
const auto acc = [](const auto& ds){ u32 c=0; for (const auto& d: ds) c+=d.count; return c; };
bank.uniform_buffers += acc(info.constant_buffer_descriptors);
bank.storage_buffers += acc(info.storage_buffers_descriptors);
bank.texture_buffers += acc(info.texture_buffer_descriptors);
bank.image_buffers += acc(info.image_buffer_descriptors);
bank.textures += acc(info.texture_descriptors);
bank.images += acc(info.image_descriptors);
bank.score = bank.uniform_buffers + bank.storage_buffers + bank.texture_buffers +
bank.image_buffers + bank.textures + bank.images;
return bank;
}
for (const Shader::Info& info : infos) {
bank.uniform_buffers += Accumulate(info.constant_buffer_descriptors);
bank.storage_buffers += Accumulate(info.storage_buffers_descriptors);
@ -102,13 +118,22 @@ void DescriptorAllocator::Allocate(size_t begin, size_t end) {
}
vk::DescriptorSets DescriptorAllocator::AllocateDescriptors(size_t count) {
const std::vector<VkDescriptorSetLayout> layouts(count, layout);
std::array<VkDescriptorSetLayout, 64> stack{};
const VkDescriptorSetLayout* p_layouts = nullptr;
std::vector<VkDescriptorSetLayout> heap;
if (count <= stack.size()) {
stack.fill(layout);
p_layouts = stack.data();
} else {
heap.assign(count, layout);
p_layouts = heap.data();
}
VkDescriptorSetAllocateInfo allocate_info{
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
.pNext = nullptr,
.descriptorPool = *bank->pools.back(),
.descriptorSetCount = static_cast<u32>(count),
.pSetLayouts = layouts.data(),
.pSetLayouts = p_layouts,
};
vk::DescriptorSets new_sets = bank->pools.back().Allocate(allocate_info);
if (!new_sets.IsOutOfPoolMemory()) {
@ -146,21 +171,58 @@ DescriptorAllocator DescriptorPool::Allocator(VkDescriptorSetLayout layout,
}
DescriptorBank& DescriptorPool::Bank(const DescriptorBankInfo& reqs) {
{
std::scoped_lock lk(cache_mutex);
DescriptorBank* best = nullptr; u64 best_stamp = 0;
for (const auto& e : cache_) {
if (!e.bank) continue;
if (std::abs(e.info.score - reqs.score) < SCORE_THRESHOLD && e.info.IsSuperset(reqs)) {
if (e.stamp >= best_stamp) { best_stamp = e.stamp; best = e.bank; }
}
}
if (best) return *best;
}
std::shared_lock read_lock{banks_mutex};
const auto it = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
});
if (it != bank_infos.end()) {
return *banks[std::distance(bank_infos.begin(), it)].get();
DescriptorBank& found = *banks[std::distance(bank_infos.begin(), it)].get();
read_lock.unlock();
// update cache
std::scoped_lock lk(cache_mutex);
size_t victim = 0; u64 oldest = UINT64_MAX;
for (size_t i=0;i<cache_.size();++i) if (cache_[i].stamp < oldest) { oldest = cache_[i].stamp; victim = i; }
cache_[victim] = CacheEntry{found.info, &found, ++cache_tick_};
return found;
}
read_lock.unlock();
std::unique_lock write_lock{banks_mutex};
auto it2 = std::ranges::find_if(bank_infos, [&reqs](const DescriptorBankInfo& bank) {
return std::abs(bank.score - reqs.score) < SCORE_THRESHOLD && bank.IsSuperset(reqs);
});
if (it2 != bank_infos.end()) {
DescriptorBank& found = *banks[std::distance(bank_infos.begin(), it2)].get();
// update cache
std::scoped_lock lk(cache_mutex);
size_t victim = 0; u64 oldest = UINT64_MAX;
for (size_t i=0;i<cache_.size();++i) if (cache_[i].stamp < oldest) { oldest = cache_[i].stamp; victim = i; }
cache_[victim] = CacheEntry{found.info, &found, ++cache_tick_};
return found;
}
bank_infos.push_back(reqs);
auto& bank = *banks.emplace_back(std::make_unique<DescriptorBank>());
bank.info = reqs;
AllocatePool(device, bank);
// update cache
{
std::scoped_lock lk(cache_mutex);
size_t victim = 0; u64 oldest = UINT64_MAX;
for (size_t i=0;i<cache_.size();++i) if (cache_[i].stamp < oldest) { oldest = cache_[i].stamp; victim = i; }
cache_[victim] = CacheEntry{bank.info, &bank, ++cache_tick_};
}
return bank;
}

View file

@ -6,7 +6,8 @@
#include <shared_mutex>
#include <span>
#include <vector>
#include <array>
#include <mutex>
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_resource_pool.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -75,6 +76,14 @@ public:
private:
DescriptorBank& Bank(const DescriptorBankInfo& reqs);
struct CacheEntry {
DescriptorBankInfo info{};
DescriptorBank* bank{nullptr};
u64 stamp{0};
};
std::mutex cache_mutex{};
std::array<CacheEntry, 8> cache_{}; //test and then adjust
u64 cache_tick_{0};
const Device& device;
MasterSemaphore& master_semaphore;