[vulkan] Align transform feedback, sample count query and conditional rendering commands with render pass commands.

Fixed query cache leak in the case of a missed query report sync.
This commit is contained in:
weakboson 2025-07-26 23:04:35 +08:00
parent 78c138d35f
commit e18c7ba41d
4 changed files with 83 additions and 86 deletions

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
@ -115,8 +118,8 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_,
Tegra::GPU& gpu_) Tegra::GPU& gpu_)
: owner{owner_}, rasterizer{rasterizer_}, : owner{owner_}, rasterizer{rasterizer_}, device_memory{device_memory_}, runtime{runtime_},
device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} { gpu{gpu_} {
streamer_mask = 0; streamer_mask = 0;
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@ -267,7 +270,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
return; return;
} }
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
ASSERT(false); LOG_ERROR(HW_GPU,
"Query report value not synchronized. Consider increasing GPU accuracy.");
if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location);
}
return; return;
} }
query_base->value += streamer->GetAmendValue(); query_base->value += streamer->GetAmendValue();
@ -370,8 +377,6 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) {
if (resume) { if (resume) {
impl->runtime.ResumeHostConditionalRendering(); impl->runtime.ResumeHostConditionalRendering();
} else { } else {
CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
CounterClose(VideoCommon::QueryType::StreamingByteCount);
impl->runtime.PauseHostConditionalRendering(); impl->runtime.PauseHostConditionalRendering();
} }
} }

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
@ -1161,10 +1164,9 @@ struct QueryCacheRuntimeImpl {
StagingBufferPool& staging_pool_, StagingBufferPool& staging_pool_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue, ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool) DescriptorPool& descriptor_pool)
: rasterizer{rasterizer_}, device_memory{device_memory_}, : rasterizer{rasterizer_}, device_memory{device_memory_}, buffer_cache{buffer_cache_},
buffer_cache{buffer_cache_}, device{device_}, device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, staging_pool{staging_pool_}, guest_streamer(0, runtime),
guest_streamer(0, runtime),
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer, sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
device, scheduler, memory_allocator, compute_pass_descriptor_queue, device, scheduler, memory_allocator, compute_pass_descriptor_queue,
descriptor_pool), descriptor_pool),
@ -1300,9 +1302,11 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
if (impl->hcr_is_set) { if (impl->hcr_is_set) {
if (impl->hcr_setup.buffer == impl->hcr_buffer && if (impl->hcr_setup.buffer == impl->hcr_buffer &&
impl->hcr_setup.offset == impl->hcr_offset) { impl->hcr_setup.offset == impl->hcr_offset) {
ResumeHostConditionalRendering();
return; return;
} }
}
bool was_running = impl->is_hcr_running;
if (was_running) {
PauseHostConditionalRendering(); PauseHostConditionalRendering();
} }
impl->hcr_setup.buffer = impl->hcr_buffer; impl->hcr_setup.buffer = impl->hcr_buffer;
@ -1310,7 +1314,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0; impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0;
impl->hcr_is_set = true; impl->hcr_is_set = true;
impl->is_hcr_running = false; impl->is_hcr_running = false;
ResumeHostConditionalRendering(); if (was_running) {
ResumeHostConditionalRendering();
}
} }
void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) { void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) {
@ -1325,7 +1331,8 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
to_resolve = buffer->Handle(); to_resolve = buffer->Handle();
to_resolve_offset = static_cast<u32>(offset); to_resolve_offset = static_cast<u32>(offset);
} }
if (impl->is_hcr_running) { bool was_running = impl->is_hcr_running;
if (was_running) {
PauseHostConditionalRendering(); PauseHostConditionalRendering();
} }
impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve, impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve,
@ -1335,7 +1342,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
impl->hcr_is_set = true; impl->hcr_is_set = true;
impl->is_hcr_running = false; impl->is_hcr_running = false;
ResumeHostConditionalRendering(); if (was_running) {
ResumeHostConditionalRendering();
}
} }
bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1,

View file

@ -217,8 +217,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork(); FlushWork();
gpu_memory->FlushCaching(); gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) { if (!pipeline) {
return; return;
@ -232,9 +230,13 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
UpdateDynamicStates(); UpdateDynamicStates();
HandleTransformFeedback(); HandleTransformFeedback();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable); maxwell3d->regs.zpass_pixel_count_enable);
draw_func(); draw_func();
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
} }
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
@ -311,8 +313,6 @@ void RasterizerVulkan::DrawTexture() {
}; };
FlushWork(); FlushWork();
query_cache.NotifySegment(true);
std::scoped_lock l{texture_cache.mutex}; std::scoped_lock l{texture_cache.mutex};
texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
@ -359,10 +359,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
FlushWork(); FlushWork();
gpu_memory->FlushCaching(); gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
auto& regs = maxwell3d->regs; auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
regs.clear_surface.A; regs.clear_surface.A;
@ -378,6 +374,10 @@ void RasterizerVulkan::Clear(u32 layer_count) {
const VkExtent2D render_area = framebuffer->RenderArea(); const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer); scheduler.RequestRenderpass(framebuffer);
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
u32 up_scale = 1; u32 up_scale = 1;
u32 down_shift = 0; u32 down_shift = 0;
if (texture_cache.IsRescaling()) { if (texture_cache.IsRescaling()) {
@ -824,6 +824,7 @@ std::optional<FramebufferTextureInfo> RasterizerVulkan::AccelerateDisplay(
if (!image_view) { if (!image_view) {
return {}; return {};
} }
query_cache.NotifySegment(false); query_cache.NotifySegment(false);
const auto& resolution = Settings::values.resolution_info; const auto& resolution = Settings::values.resolution_info;
@ -939,16 +940,16 @@ void RasterizerVulkan::UpdateDynamicStates() {
const u8 dynamic_state = Settings::values.dyna_state.GetValue(); const u8 dynamic_state = Settings::values.dyna_state.GetValue();
auto features = DynamicFeatures{ auto features = DynamicFeatures{
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported() .has_extended_dynamic_state =
&& dynamic_state > 0, device.IsExtExtendedDynamicStateSupported() && dynamic_state > 0,
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported() .has_extended_dynamic_state_2 =
&& dynamic_state > 1, device.IsExtExtendedDynamicState2Supported() && dynamic_state > 1,
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported() .has_extended_dynamic_state_2_extra =
&& dynamic_state > 1, device.IsExtExtendedDynamicState2ExtrasSupported() && dynamic_state > 1,
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported() .has_extended_dynamic_state_3_blend =
&& dynamic_state > 2, device.IsExtExtendedDynamicState3BlendingSupported() && dynamic_state > 2,
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported() .has_extended_dynamic_state_3_enables =
&& dynamic_state > 2, device.IsExtExtendedDynamicState3EnablesSupported() && dynamic_state > 2,
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(), .has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
}; };
@ -973,16 +974,12 @@ void RasterizerVulkan::UpdateDynamicStates() {
if (features.has_extended_dynamic_state_3_enables) { if (features.has_extended_dynamic_state_3_enables) {
using namespace Tegra::Engines; using namespace Tegra::Engines;
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|| device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) { device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
struct In struct In {
{
const Maxwell3D::Regs::VertexAttribute::Type d; const Maxwell3D::Regs::VertexAttribute::Type d;
In(Maxwell3D::Regs::VertexAttribute::Type n) In(Maxwell3D::Regs::VertexAttribute::Type n) : d(n) {}
: d(n) bool operator()(Maxwell3D::Regs::VertexAttribute n) const {
{}
bool operator()(Maxwell3D::Regs::VertexAttribute n) const
{
return n.type == d; return n.type == d;
} }
}; };
@ -1133,36 +1130,36 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
if (is_d24 && !device.SupportsD24DepthBuffer()) { if (is_d24 && !device.SupportsD24DepthBuffer()) {
static constexpr const size_t length = sizeof(NEEDS_D24) / sizeof(NEEDS_D24[0]); static constexpr const size_t length = sizeof(NEEDS_D24) / sizeof(NEEDS_D24[0]);
static constexpr const u64 *start = NEEDS_D24; static constexpr const u64* start = NEEDS_D24;
static constexpr const u64 *end = NEEDS_D24 + length; static constexpr const u64* end = NEEDS_D24 + length;
const u64 *it = std::find(start, end, program_id); const u64* it = std::find(start, end, program_id);
if (it != end) { if (it != end) {
// the base formulas can be obtained from here: // the base formulas can be obtained from here:
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
const double rescale_factor = static_cast<double>(1ULL << (32 - 24)) const double rescale_factor =
/ (static_cast<double>(0x1.ep+127)); static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
units = static_cast<float>(static_cast<double>(units) * rescale_factor); units = static_cast<float>(static_cast<double>(units) * rescale_factor);
} }
} }
scheduler.Record( scheduler.Record([constant = units, clamp = regs.depth_bias_clamp,
[constant = units, clamp = regs.depth_bias_clamp, factor = regs.slope_scale_depth_bias, this]( factor = regs.slope_scale_depth_bias, this](vk::CommandBuffer cmdbuf) {
vk::CommandBuffer cmdbuf) { if (device.IsExtDepthBiasControlSupported()) {
if (device.IsExtDepthBiasControlSupported()) { static VkDepthBiasRepresentationInfoEXT bias_info{
static VkDepthBiasRepresentationInfoEXT bias_info{ .sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT,
.sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT, .pNext = nullptr,
.pNext = nullptr, .depthBiasRepresentation =
.depthBiasRepresentation = VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT, VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT,
.depthBiasExact = VK_FALSE, .depthBiasExact = VK_FALSE,
}; };
cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info); cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info);
} else { } else {
cmdbuf.SetDepthBias(constant, clamp, factor); cmdbuf.SetDepthBias(constant, clamp, factor);
} }
}); });
} }
void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) { void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
@ -1344,21 +1341,19 @@ void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::
}); });
} }
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
{
if (!state_tracker.TouchConservativeRasterizationMode()) { if (!state_tracker.TouchConservativeRasterizationMode()) {
return; return;
} }
scheduler.Record([enable = regs.conservative_raster_enable](vk::CommandBuffer cmdbuf) { scheduler.Record([enable = regs.conservative_raster_enable](vk::CommandBuffer cmdbuf) {
cmdbuf.SetConservativeRasterizationModeEXT( cmdbuf.SetConservativeRasterizationModeEXT(
enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT enable ? VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT
: VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT); : VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
}); });
} }
void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
{
if (!state_tracker.TouchLineStippleEnable()) { if (!state_tracker.TouchLineStippleEnable()) {
return; return;
} }
@ -1368,8 +1363,7 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs&
}); });
} }
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
{
// if (!state_tracker.TouchLi()) { // if (!state_tracker.TouchLi()) {
// return; // return;
// } // }

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -257,16 +260,6 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
void Scheduler::AllocateNewContext() { void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished. // Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
query_cache->NotifySegment(true);
}
#else
query_cache->NotifySegment(true);
#endif
}
} }
void Scheduler::InvalidateState() { void Scheduler::InvalidateState() {
@ -276,15 +269,7 @@ void Scheduler::InvalidateState() {
} }
void Scheduler::EndPendingOperations() { void Scheduler::EndPendingOperations() {
#if ANDROID query_cache->CounterReset(VideoCommon::QueryType::ZPassPixelCount64);
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
// query_cache->DisableStreams();
}
#else
// query_cache->DisableStreams();
#endif
query_cache->NotifySegment(false);
EndRenderPass(); EndRenderPass();
} }
@ -292,6 +277,10 @@ void Scheduler::EndRenderPass() {
if (!state.renderpass) { if (!state.renderpass) {
return; return;
} }
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
query_cache->NotifySegment(false);
Record([num_images = num_renderpass_images, images = renderpass_images, Record([num_images = num_renderpass_images, images = renderpass_images,
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
std::array<VkImageMemoryBarrier, 9> barriers; std::array<VkImageMemoryBarrier, 9> barriers;