[vulkan] Enable Reset Query Pool per spec

Vulkan, by spec, requires that every query must be reset before use now for that we require vkCmdResetQueryPool and it must be called outside the renderpass but once we reset the query, it throws us outside the render pass which we must resume using scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
This commit is contained in:
Shinmegumi 2025-08-03 02:28:55 +02:00 committed by crueter
parent 5fb54f5bc2
commit 94ffebe6ea
8 changed files with 110 additions and 83 deletions

View file

@ -502,7 +502,6 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
const RenderAreaPushConstant& render_area) {
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
if (!is_built.load(std::memory_order::relaxed)) {
// Wait for the pipeline to be built
scheduler.Record([this](vk::CommandBuffer) {

View file

@ -13,7 +13,7 @@
#include <unordered_map>
#include <utility>
#include <vector>
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "common/bit_util.h"
#include "common/common_types.h"
#include "video_core/engines/draw_manager.h"
@ -116,11 +116,11 @@ struct HostSyncValues {
class SamplesStreamer : public BaseStreamer {
public:
explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_,
VideoCore::RasterizerInterface* rasterizer_, const Device& device_,
VideoCore::RasterizerInterface* rasterizer_, TextureCache& texture_cache_, const Device& device_,
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool)
: BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
: BaseStreamer(id_), texture_cache{texture_cache_}, runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
scheduler{scheduler_}, memory_allocator{memory_allocator_} {
current_bank = nullptr;
current_query = nullptr;
@ -153,16 +153,33 @@ public:
if (has_started) {
return;
}
ReserveHostQuery();
// Ensure outside render pass
scheduler.RequestOutsideRenderPassOperationContext();
// Reset query pool outside render pass
scheduler.Record([query_pool = current_query_pool,
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
cmdbuf.ResetQueryPool(query_pool, static_cast<u32>(query_index), 1);
});
// Manually restart the render pass (required for vkCmdClearAttachments, etc.)
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
// Begin query inside the newly started render pass
scheduler.Record([query_pool = current_query_pool,
query_index = current_bank_slot](vk::CommandBuffer cmdbuf) {
const bool use_precise = Settings::IsGPULevelHigh();
cmdbuf.BeginQuery(query_pool, static_cast<u32>(query_index),
use_precise ? VK_QUERY_CONTROL_PRECISE_BIT : 0);
});
has_started = true;
}
void PauseCounter() override {
if (!has_started) {
return;
@ -404,7 +421,7 @@ private:
size_slots -= amount;
}
}
TextureCache& texture_cache;
template <bool is_ordered, typename Func>
void ApplyBanksWideOp(std::vector<size_t>& queries, Func&& func) {
std::conditional_t<is_ordered, std::map<size_t, std::pair<size_t, size_t>>,
@ -1163,13 +1180,13 @@ struct QueryCacheRuntimeImpl {
const MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
StagingBufferPool& staging_pool_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool)
DescriptorPool& descriptor_pool, TextureCache& texture_cache_)
: rasterizer{rasterizer_}, device_memory{device_memory_}, buffer_cache{buffer_cache_},
device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, guest_streamer(0, runtime),
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
device, scheduler, memory_allocator, compute_pass_descriptor_queue,
descriptor_pool),
texture_cache_, device, scheduler, memory_allocator,
compute_pass_descriptor_queue, descriptor_pool),
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
scheduler, memory_allocator, staging_pool),
primitives_succeeded_streamer(
@ -1240,10 +1257,10 @@ QueryCacheRuntime::QueryCacheRuntime(VideoCore::RasterizerInterface* rasterizer,
const MemoryAllocator& memory_allocator_,
Scheduler& scheduler_, StagingBufferPool& staging_pool_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool) {
DescriptorPool& descriptor_pool, TextureCache& texture_cache_) {
impl = std::make_unique<QueryCacheRuntimeImpl>(
*this, rasterizer, device_memory_, buffer_cache_, device_, memory_allocator_, scheduler_,
staging_pool_, compute_pass_descriptor_queue, descriptor_pool);
staging_pool_, compute_pass_descriptor_queue, descriptor_pool, texture_cache_);
}
void QueryCacheRuntime::Bind3DEngine(Maxwell3D* maxwell3d) {

View file

@ -7,7 +7,7 @@
#include "video_core/query_cache/query_cache_base.h"
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
namespace VideoCore {
class RasterizerInterface;
}
@ -17,7 +17,6 @@ class StreamerInterface;
}
namespace Vulkan {
class Device;
class Scheduler;
class StagingBufferPool;
@ -32,7 +31,7 @@ public:
const MemoryAllocator& memory_allocator_, Scheduler& scheduler_,
StagingBufferPool& staging_pool_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool);
DescriptorPool& descriptor_pool, TextureCache& texture_cache_);
~QueryCacheRuntime();
template <typename SyncValuesType>

View file

@ -189,7 +189,7 @@ RasterizerVulkan::RasterizerVulkan(Core::Frontend::EmuWindow& emu_window_, Tegra
guest_descriptor_queue, compute_pass_descriptor_queue, descriptor_pool),
buffer_cache(device_memory, buffer_cache_runtime),
query_cache_runtime(this, device_memory, buffer_cache, device, memory_allocator, scheduler,
staging_pool, compute_pass_descriptor_queue, descriptor_pool),
staging_pool, compute_pass_descriptor_queue, descriptor_pool, texture_cache),
query_cache(gpu, *this, device_memory, query_cache_runtime),
pipeline_cache(device_memory, device, scheduler, descriptor_pool, guest_descriptor_queue,
render_pass_cache, buffer_cache, texture_cache, gpu.ShaderNotify()),

View file

@ -136,7 +136,6 @@ public:
void BindChannel(Tegra::Control::ChannelState& channel) override;
void ReleaseChannel(s32 channel_id) override;
std::optional<FramebufferTextureInfo> AccelerateDisplay(const Tegra::FramebufferConfig& config,
VAddr framebuffer_addr,
u32 pixel_stride);
@ -147,7 +146,6 @@ private:
0x0100E95004038000ULL, // XC2
0x0100A6301214E000ULL, // FE:Engage
};
static constexpr size_t MAX_TEXTURES = 192;
static constexpr size_t MAX_IMAGES = 48;
static constexpr size_t MAX_IMAGE_VIEWS = MAX_TEXTURES + MAX_IMAGES;

View file

@ -507,58 +507,84 @@ TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t bu
return value;
}
}
struct RangedBarrierRange {
u32 min_mip = std::numeric_limits<u32>::max();
u32 max_mip = std::numeric_limits<u32>::min();
u32 min_layer = std::numeric_limits<u32>::max();
u32 max_layer = std::numeric_limits<u32>::min();
void AddLayers(const VkImageSubresourceLayers& layers) {
min_mip = std::min(min_mip, layers.mipLevel);
max_mip = std::max(max_mip, layers.mipLevel + 1);
min_layer = std::min(min_layer, layers.baseArrayLayer);
max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
}
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
return VkImageSubresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = min_mip,
.levelCount = max_mip - min_mip,
.baseArrayLayer = min_layer,
.layerCount = max_layer - min_layer,
};
}
};
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
std::span<const VkBufferImageCopy> copies) {
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
// Compute exact mip/layer range being written to
RangedBarrierRange range;
for (const auto& region : copies) {
range.AddLayers(region.imageSubresource);
}
const VkImageSubresourceRange subresource_range = range.SubresourceRange(aspect_mask);
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = WRITE_ACCESS_FLAGS,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = WRITE_ACCESS_FLAGS,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = subresource_range,
};
const VkImageMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange = subresource_range,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
// TODO: Move this to another API
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
write_barrier);
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
0, nullptr, nullptr, write_barrier);
}
[[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region,
@ -651,29 +677,7 @@ void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4
}
}
struct RangedBarrierRange {
u32 min_mip = std::numeric_limits<u32>::max();
u32 max_mip = std::numeric_limits<u32>::min();
u32 min_layer = std::numeric_limits<u32>::max();
u32 max_layer = std::numeric_limits<u32>::min();
void AddLayers(const VkImageSubresourceLayers& layers) {
min_mip = std::min(min_mip, layers.mipLevel);
max_mip = std::max(max_mip, layers.mipLevel + 1);
min_layer = std::min(min_layer, layers.baseArrayLayer);
max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
}
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
return VkImageSubresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = min_mip,
.levelCount = max_mip - min_mip,
.baseArrayLayer = min_layer,
.layerCount = max_layer - min_layer,
};
}
};
[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
switch (format) {
@ -1457,12 +1461,18 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, {}, {}, pre_barriers);
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, nullptr, nullptr, pre_barriers);
cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, {}, {}, post_barriers);
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
0, nullptr, nullptr, post_barriers);
});
}
@ -2352,7 +2362,7 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) {
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([barrier](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
});
}

View file

@ -120,6 +120,7 @@ void Load(VkDevice device, DeviceDispatch& dld) noexcept {
X(vkCmdEndConditionalRenderingEXT);
X(vkCmdEndQuery);
X(vkCmdEndRenderPass);
X(vkCmdResetQueryPool);
X(vkCmdEndTransformFeedbackEXT);
X(vkCmdEndDebugUtilsLabelEXT);
X(vkCmdFillBuffer);

View file

@ -219,6 +219,7 @@ struct DeviceDispatch : InstanceDispatch {
PFN_vkCmdEndConditionalRenderingEXT vkCmdEndConditionalRenderingEXT{};
PFN_vkCmdEndDebugUtilsLabelEXT vkCmdEndDebugUtilsLabelEXT{};
PFN_vkCmdEndQuery vkCmdEndQuery{};
PFN_vkCmdResetQueryPool vkCmdResetQueryPool{};
PFN_vkCmdEndRenderPass vkCmdEndRenderPass{};
PFN_vkCmdEndTransformFeedbackEXT vkCmdEndTransformFeedbackEXT{};
PFN_vkCmdFillBuffer vkCmdFillBuffer{};
@ -1137,7 +1138,9 @@ public:
VkCommandBuffer operator*() const noexcept {
return handle;
}
void ResetQueryPool(VkQueryPool query_pool, uint32_t first, uint32_t count) const noexcept {
dld->vkCmdResetQueryPool(handle, query_pool, first, count);
}
void Begin(const VkCommandBufferBeginInfo& begin_info) const {
Check(dld->vkBeginCommandBuffer(handle, &begin_info));
}