[Vulkan] Introduce Ring buffers for UB
Create 3 ring buffers which rotates between buffers each frame to avoid GPU/CPU conflicts BindMappedUniformBuffer first tries to allocate from the ring buffer and falls back to staging pool only if allocation is too large. Note to testers:- please test the performance since it is primarily a performance optimization and also look for visual bugs.
This commit is contained in:
parent
b6241e4148
commit
6544cdd446
2 changed files with 69 additions and 2 deletions
|
@ -337,6 +337,11 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
|
|||
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
|
||||
compute_pass_descriptor_queue);
|
||||
}
|
||||
const u32 ubo_align = static_cast<u32>(
|
||||
device.GetUniformBufferAlignment() /* if you have it */
|
||||
);
|
||||
// add the ability to change the size in settings in future
|
||||
uniform_ring.Init(device, memory_allocator, 8 * 1024 * 1024 /* 8 MiB */, ubo_align ? ubo_align : 256);
|
||||
quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
|
||||
scheduler_, staging_pool_);
|
||||
quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
|
||||
|
@ -355,6 +360,42 @@ void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
|
|||
staging_pool.FreeDeferred(ref);
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::UniformRing::Init(const Device& device,
|
||||
MemoryAllocator& alloc,
|
||||
u64 bytes, u32 alignment) {
|
||||
for (size_t i = 0; i < NUM_FRAMES; ++i) {
|
||||
VkBufferCreateInfo ci{
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.flags = 0,
|
||||
.size = bytes,
|
||||
.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
|
||||
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
|
||||
.queueFamilyIndexCount = 0,
|
||||
.pQueueFamilyIndices = nullptr,
|
||||
};
|
||||
buffers[i] = alloc.CreateBuffer(ci, MemoryUsage::Upload);
|
||||
mapped[i] = buffers[i].Mapped().data();
|
||||
}
|
||||
size = bytes;
|
||||
align = alignment ? alignment : 256;
|
||||
head = 0;
|
||||
current_frame = 0;
|
||||
}
|
||||
|
||||
std::span<u8> BufferCacheRuntime::UniformRing::Alloc(u32 bytes, u32& out_offset) {
|
||||
const u64 aligned = Common::AlignUp(head, static_cast<u64>(align));
|
||||
u64 end = aligned + bytes;
|
||||
|
||||
if (end > size) {
|
||||
return {}; // Fallback to staging pool
|
||||
}
|
||||
|
||||
out_offset = static_cast<u32>(aligned);
|
||||
head = end;
|
||||
return {mapped[current_frame] + out_offset, bytes};
|
||||
}
|
||||
|
||||
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
|
||||
return device.GetDeviceLocalMemory();
|
||||
}
|
||||
|
@ -375,6 +416,7 @@ void BufferCacheRuntime::TickFrame(Common::SlotVector<Buffer>& slot_buffers) noe
|
|||
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
|
||||
it->ResetUsageTracking();
|
||||
}
|
||||
uniform_ring.BeginFrame();
|
||||
}
|
||||
|
||||
void BufferCacheRuntime::Finish() {
|
||||
|
|
|
@ -124,8 +124,15 @@ public:
|
|||
|
||||
void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
|
||||
|
||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
|
||||
[[maybe_unused]] u32 binding_index, u32 size) {
|
||||
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/,
|
||||
[[maybe_unused]] u32 /*binding_index*/,
|
||||
u32 size) {
|
||||
u32 offset = 0;
|
||||
if (auto span = uniform_ring.Alloc(size, offset); !span.empty()) {
|
||||
BindBuffer(*uniform_ring.buffers[uniform_ring.current_frame], offset, size);
|
||||
return span;
|
||||
}
|
||||
// Fallback for giant requests
|
||||
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
|
||||
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
|
||||
return ref.mapped_span;
|
||||
|
@ -153,6 +160,24 @@ private:
|
|||
void ReserveNullBuffer();
|
||||
vk::Buffer CreateNullBuffer();
|
||||
|
||||
struct UniformRing {
|
||||
static constexpr size_t NUM_FRAMES = 3;
|
||||
std::array<vk::Buffer, NUM_FRAMES> buffers{};
|
||||
std::array<u8*, NUM_FRAMES> mapped{};
|
||||
u64 size = 0;
|
||||
u64 head = 0;
|
||||
u32 align = 256;
|
||||
size_t current_frame = 0;
|
||||
|
||||
void Init(const Device& device, MemoryAllocator& alloc, u64 bytes, u32 alignment);
|
||||
void BeginFrame() {
|
||||
current_frame = (current_frame + 1) % NUM_FRAMES;
|
||||
head = 0;
|
||||
}
|
||||
std::span<u8> Alloc(u32 bytes, u32& out_offset);
|
||||
};
|
||||
UniformRing uniform_ring;
|
||||
|
||||
const Device& device;
|
||||
MemoryAllocator& memory_allocator;
|
||||
Scheduler& scheduler;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue