[vk] Experimental Descriptor Changes
Some checks failed
eden-license / license-header (pull_request) Failing after 21s

This commit is contained in:
Ribbit 2025-10-05 21:37:04 -07:00
parent bc1d093fe9
commit 9fbb776dc6
5 changed files with 60 additions and 15 deletions

View file

@ -321,7 +321,7 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -379,7 +379,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -420,7 +420,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
u32 src_offset, bool compare_to_zero) {
const size_t compare_size = compare_to_zero ? 8 : 24;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(2);
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size);
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32));
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
@ -476,7 +476,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
static constexpr size_t DISPATCH_SIZE = 2048U;
size_t runs_to_do = std::min<size_t>(current_runs, DISPATCH_SIZE);
current_runs -= runs_to_do;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(3);
compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64));
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64));
compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64));
@ -583,7 +583,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
const u32 num_dispatches_z = image.info.resources.layers;
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(2);
compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset,
image.guest_size_bytes - swizzle.buffer_offset);
compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
@ -688,7 +688,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image,
ASSERT(copy.dst_subresource.base_layer == 0);
ASSERT(copy.dst_subresource.num_layers == 1);
compute_pass_descriptor_queue.Acquire();
compute_pass_descriptor_queue.Acquire(2);
compute_pass_descriptor_queue.AddImage(
src_image.StorageImageView(copy.src_subresource.base_level));
compute_pass_descriptor_queue.AddImage(

View file

@ -102,7 +102,16 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
BufferCache& buffer_cache, TextureCache& texture_cache) {
guest_descriptor_queue.Acquire();
{
size_t required_entries = 0;
required_entries += Shader::NumDescriptors(info.constant_buffer_descriptors);
required_entries += Shader::NumDescriptors(info.storage_buffers_descriptors);
required_entries += Shader::NumDescriptors(info.texture_buffer_descriptors);
required_entries += Shader::NumDescriptors(info.image_buffer_descriptors);
required_entries += Shader::NumDescriptors(info.texture_descriptors);
required_entries += Shader::NumDescriptors(info.image_descriptors);
guest_descriptor_queue.Acquire(required_entries);
}
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
buffer_cache.UnbindComputeStorageBuffers();

View file

@ -460,7 +460,25 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
guest_descriptor_queue.Acquire();
// Compute exact number of descriptor entries required for this draw.
size_t required_entries = 0;
const auto acc_descriptors = [](const Shader::Info& info) -> size_t {
size_t n = 0;
n += Shader::NumDescriptors(info.constant_buffer_descriptors);
n += Shader::NumDescriptors(info.storage_buffers_descriptors);
n += Shader::NumDescriptors(info.texture_buffer_descriptors);
n += Shader::NumDescriptors(info.image_buffer_descriptors);
n += Shader::NumDescriptors(info.texture_descriptors);
n += Shader::NumDescriptors(info.image_descriptors);
return n;
};
if constexpr (Spec::enabled_stages[0]) required_entries += acc_descriptors(stage_infos[0]);
if constexpr (Spec::enabled_stages[1]) required_entries += acc_descriptors(stage_infos[1]);
if constexpr (Spec::enabled_stages[2]) required_entries += acc_descriptors(stage_infos[2]);
if constexpr (Spec::enabled_stages[3]) required_entries += acc_descriptors(stage_infos[3]);
if constexpr (Spec::enabled_stages[4]) required_entries += acc_descriptors(stage_infos[4]);
guest_descriptor_queue.Acquire(required_entries);
RescalingPushConstant rescaling;
RenderAreaPushConstant render_area;

View file

@ -28,17 +28,21 @@ void UpdateDescriptorQueue::TickFrame() {
payload_cursor = payload_start;
}
void UpdateDescriptorQueue::Acquire() {
// Minimum number of entries required.
// This is the maximum number of entries a single draw call might use.
static constexpr size_t MIN_ENTRIES = 0x400;
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
void UpdateDescriptorQueue::Acquire(size_t required_entries) {
const size_t used = static_cast<size_t>(std::distance(payload_start, payload_cursor));
if (used + required_entries > FRAME_PAYLOAD_SIZE) {
LOG_WARNING(Render_Vulkan, "Descriptor payload near overflow (used={} req={}), waiting",
used, required_entries);
scheduler.WaitWorker();
payload_cursor = payload_start;
}
upload_start = payload_cursor;
}
void UpdateDescriptorQueue::Acquire() {
// Conservative legacy reservation for backward callers.
static constexpr size_t MIN_ENTRIES = 0x400;
Acquire(MIN_ENTRIES);
}
} // namespace Vulkan

View file

@ -4,6 +4,7 @@
#pragma once
#include <array>
#include "common/assert.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -41,6 +42,11 @@ public:
void TickFrame();
// Ensure the queue has at least 'required_entries' free slots for this draw/dispatch.
// Prefer using this overload to avoid underestimations that can cause overflows.
void Acquire(size_t required_entries);
// Legacy fallback that reserves a conservative number of entries.
void Acquire();
const DescriptorUpdateEntry* UpdateData() const noexcept {
@ -48,6 +54,8 @@ public:
}
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
ASSERT(static_cast<size_t>(std::distance(payload_start, payload_cursor)) <
FRAME_PAYLOAD_SIZE);
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
@ -56,6 +64,8 @@ public:
}
void AddImage(VkImageView image_view) {
ASSERT(static_cast<size_t>(std::distance(payload_start, payload_cursor)) <
FRAME_PAYLOAD_SIZE);
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
@ -64,6 +74,8 @@ public:
}
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
ASSERT(static_cast<size_t>(std::distance(payload_start, payload_cursor)) <
FRAME_PAYLOAD_SIZE);
*(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,
.offset = offset,
@ -72,6 +84,8 @@ public:
}
void AddTexelBuffer(VkBufferView texel_buffer) {
ASSERT(static_cast<size_t>(std::distance(payload_start, payload_cursor)) <
FRAME_PAYLOAD_SIZE);
*(payload_cursor++) = texel_buffer;
}