[vk] Experimental Descriptor Changes
Some checks failed
eden-license / license-header (pull_request) Failing after 21s
Some checks failed
eden-license / license-header (pull_request) Failing after 21s
This commit is contained in:
parent
bc1d093fe9
commit
9fbb776dc6
5 changed files with 60 additions and 15 deletions
|
@ -321,7 +321,7 @@ std::pair<VkBuffer, VkDeviceSize> Uint8Pass::Assemble(u32 num_vertices, VkBuffer
|
|||
const u32 staging_size = static_cast<u32>(num_vertices * sizeof(u16));
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
|
||||
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
@ -379,7 +379,7 @@ std::pair<VkBuffer, VkDeviceSize> QuadIndexedPass::Assemble(
|
|||
const std::size_t staging_size = num_tri_vertices * sizeof(u32);
|
||||
const auto staging = staging_buffer_pool.Request(staging_size, MemoryUsage::DeviceLocal);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, input_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(staging.buffer, staging.offset, staging_size);
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
@ -420,7 +420,7 @@ void ConditionalRenderingResolvePass::Resolve(VkBuffer dst_buffer, VkBuffer src_
|
|||
u32 src_offset, bool compare_to_zero) {
|
||||
const size_t compare_size = compare_to_zero ? 8 : 24;
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(2);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, src_offset, compare_size);
|
||||
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, sizeof(u32));
|
||||
const void* const descriptor_data{compute_pass_descriptor_queue.UpdateData()};
|
||||
|
@ -476,7 +476,7 @@ void QueriesPrefixScanPass::Run(VkBuffer accumulation_buffer, VkBuffer dst_buffe
|
|||
static constexpr size_t DISPATCH_SIZE = 2048U;
|
||||
size_t runs_to_do = std::min<size_t>(current_runs, DISPATCH_SIZE);
|
||||
current_runs -= runs_to_do;
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(3);
|
||||
compute_pass_descriptor_queue.AddBuffer(src_buffer, 0, number_of_sums * sizeof(u64));
|
||||
compute_pass_descriptor_queue.AddBuffer(dst_buffer, 0, number_of_sums * sizeof(u64));
|
||||
compute_pass_descriptor_queue.AddBuffer(accumulation_buffer, 0, sizeof(u64));
|
||||
|
@ -583,7 +583,7 @@ void ASTCDecoderPass::Assemble(Image& image, const StagingBufferRef& map,
|
|||
const u32 num_dispatches_y = Common::DivCeil(swizzle.num_tiles.height, 8U);
|
||||
const u32 num_dispatches_z = image.info.resources.layers;
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(2);
|
||||
compute_pass_descriptor_queue.AddBuffer(map.buffer, input_offset,
|
||||
image.guest_size_bytes - swizzle.buffer_offset);
|
||||
compute_pass_descriptor_queue.AddImage(image.StorageImageView(swizzle.level));
|
||||
|
@ -688,7 +688,7 @@ void MSAACopyPass::CopyImage(Image& dst_image, Image& src_image,
|
|||
ASSERT(copy.dst_subresource.base_layer == 0);
|
||||
ASSERT(copy.dst_subresource.num_layers == 1);
|
||||
|
||||
compute_pass_descriptor_queue.Acquire();
|
||||
compute_pass_descriptor_queue.Acquire(2);
|
||||
compute_pass_descriptor_queue.AddImage(
|
||||
src_image.StorageImageView(copy.src_subresource.base_level));
|
||||
compute_pass_descriptor_queue.AddImage(
|
||||
|
|
|
@ -102,7 +102,16 @@ ComputePipeline::ComputePipeline(const Device& device_, vk::PipelineCache& pipel
|
|||
void ComputePipeline::Configure(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, Scheduler& scheduler,
|
||||
BufferCache& buffer_cache, TextureCache& texture_cache) {
|
||||
guest_descriptor_queue.Acquire();
|
||||
{
|
||||
size_t required_entries = 0;
|
||||
required_entries += Shader::NumDescriptors(info.constant_buffer_descriptors);
|
||||
required_entries += Shader::NumDescriptors(info.storage_buffers_descriptors);
|
||||
required_entries += Shader::NumDescriptors(info.texture_buffer_descriptors);
|
||||
required_entries += Shader::NumDescriptors(info.image_buffer_descriptors);
|
||||
required_entries += Shader::NumDescriptors(info.texture_descriptors);
|
||||
required_entries += Shader::NumDescriptors(info.image_descriptors);
|
||||
guest_descriptor_queue.Acquire(required_entries);
|
||||
}
|
||||
|
||||
buffer_cache.SetComputeUniformBufferState(info.constant_buffer_mask, &uniform_buffer_sizes);
|
||||
buffer_cache.UnbindComputeStorageBuffers();
|
||||
|
|
|
@ -460,7 +460,25 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
|
|||
buffer_cache.UpdateGraphicsBuffers(is_indexed);
|
||||
buffer_cache.BindHostGeometryBuffers(is_indexed);
|
||||
|
||||
guest_descriptor_queue.Acquire();
|
||||
// Compute exact number of descriptor entries required for this draw.
|
||||
size_t required_entries = 0;
|
||||
const auto acc_descriptors = [](const Shader::Info& info) -> size_t {
|
||||
size_t n = 0;
|
||||
n += Shader::NumDescriptors(info.constant_buffer_descriptors);
|
||||
n += Shader::NumDescriptors(info.storage_buffers_descriptors);
|
||||
n += Shader::NumDescriptors(info.texture_buffer_descriptors);
|
||||
n += Shader::NumDescriptors(info.image_buffer_descriptors);
|
||||
n += Shader::NumDescriptors(info.texture_descriptors);
|
||||
n += Shader::NumDescriptors(info.image_descriptors);
|
||||
return n;
|
||||
};
|
||||
if constexpr (Spec::enabled_stages[0]) required_entries += acc_descriptors(stage_infos[0]);
|
||||
if constexpr (Spec::enabled_stages[1]) required_entries += acc_descriptors(stage_infos[1]);
|
||||
if constexpr (Spec::enabled_stages[2]) required_entries += acc_descriptors(stage_infos[2]);
|
||||
if constexpr (Spec::enabled_stages[3]) required_entries += acc_descriptors(stage_infos[3]);
|
||||
if constexpr (Spec::enabled_stages[4]) required_entries += acc_descriptors(stage_infos[4]);
|
||||
|
||||
guest_descriptor_queue.Acquire(required_entries);
|
||||
|
||||
RescalingPushConstant rescaling;
|
||||
RenderAreaPushConstant render_area;
|
||||
|
|
|
@ -28,17 +28,21 @@ void UpdateDescriptorQueue::TickFrame() {
|
|||
payload_cursor = payload_start;
|
||||
}
|
||||
|
||||
void UpdateDescriptorQueue::Acquire() {
|
||||
// Minimum number of entries required.
|
||||
// This is the maximum number of entries a single draw call might use.
|
||||
static constexpr size_t MIN_ENTRIES = 0x400;
|
||||
|
||||
if (std::distance(payload_start, payload_cursor) + MIN_ENTRIES >= FRAME_PAYLOAD_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Payload overflow, waiting for worker thread");
|
||||
void UpdateDescriptorQueue::Acquire(size_t required_entries) {
|
||||
const size_t used = static_cast<size_t>(std::distance(payload_start, payload_cursor));
|
||||
if (used + required_entries > FRAME_PAYLOAD_SIZE) {
|
||||
LOG_WARNING(Render_Vulkan, "Descriptor payload near overflow (used={} req={}), waiting",
|
||||
used, required_entries);
|
||||
scheduler.WaitWorker();
|
||||
payload_cursor = payload_start;
|
||||
}
|
||||
upload_start = payload_cursor;
|
||||
}
|
||||
|
||||
void UpdateDescriptorQueue::Acquire() {
|
||||
// Conservative legacy reservation for backward callers.
|
||||
static constexpr size_t MIN_ENTRIES = 0x400;
|
||||
Acquire(MIN_ENTRIES);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -4,6 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include "common/assert.h"
|
||||
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
|
@ -41,6 +42,11 @@ public:
|
|||
|
||||
void TickFrame();
|
||||
|
||||
// Ensure the queue has at least 'required_entries' free slots for this draw/dispatch.
|
||||
// Prefer using this overload to avoid underestimations that can cause overflows.
|
||||
void Acquire(size_t required_entries);
|
||||
|
||||
// Legacy fallback that reserves a conservative number of entries.
|
||||
void Acquire();
|
||||
|
||||
const DescriptorUpdateEntry* UpdateData() const noexcept {
|
||||
|
@ -48,6 +54,8 @@ public:
|
|||
}
|
||||
|
||||
void AddSampledImage(VkImageView image_view, VkSampler sampler) {
|
||||
ASSERT(static_cast<size_t>(std::distance(payload_start, payload_cursor)) <
|
||||
FRAME_PAYLOAD_SIZE);
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = sampler,
|
||||
.imageView = image_view,
|
||||
|
@ -56,6 +64,8 @@ public:
|
|||
}
|
||||
|
||||
void AddImage(VkImageView image_view) {
|
||||
ASSERT(static_cast<size_t>(std::distance(payload_start, payload_cursor)) <
|
||||
FRAME_PAYLOAD_SIZE);
|
||||
*(payload_cursor++) = VkDescriptorImageInfo{
|
||||
.sampler = VK_NULL_HANDLE,
|
||||
.imageView = image_view,
|
||||
|
@ -64,6 +74,8 @@ public:
|
|||
}
|
||||
|
||||
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
|
||||
ASSERT(static_cast<size_t>(std::distance(payload_start, payload_cursor)) <
|
||||
FRAME_PAYLOAD_SIZE);
|
||||
*(payload_cursor++) = VkDescriptorBufferInfo{
|
||||
.buffer = buffer,
|
||||
.offset = offset,
|
||||
|
@ -72,6 +84,8 @@ public:
|
|||
}
|
||||
|
||||
void AddTexelBuffer(VkBufferView texel_buffer) {
|
||||
ASSERT(static_cast<size_t>(std::distance(payload_start, payload_cursor)) <
|
||||
FRAME_PAYLOAD_SIZE);
|
||||
*(payload_cursor++) = texel_buffer;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue