From 6d219d968544e9904c6922f0f3ac76da95cb871e Mon Sep 17 00:00:00 2001 From: Ribbit Date: Sun, 5 Oct 2025 15:12:18 -0700 Subject: [PATCH] this is pushing my vk knowledge to its limits --- .../renderer_opengl/gl_texture_cache.h | 3 + .../renderer_vulkan/pipeline_helper.h | 105 +++++++++++++++++- .../renderer_vulkan/vk_graphics_pipeline.cpp | 35 +++--- .../renderer_vulkan/vk_render_pass_cache.cpp | 41 +++++-- .../renderer_vulkan/vk_texture_cache.cpp | 52 +++++++++ .../renderer_vulkan/vk_texture_cache.h | 11 ++ .../renderer_vulkan/vk_update_descriptor.h | 17 +++ src/video_core/texture_cache/texture_cache.h | 6 + .../texture_cache/texture_cache_base.h | 3 + .../vulkan_common/vulkan_device.cpp | 6 + 10 files changed, 251 insertions(+), 28 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index fcb81c1774..f8bf1a7e1e 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -155,6 +155,9 @@ public: void SetFeedbackLoopRequest(u8 color_mask, bool depth, bool supported); FeedbackLoopRequest ConsumeFeedbackLoopRequest(); + const FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept { + return pending_feedback_request; + } bool SupportsAttachmentFeedbackLoopFormat(VideoCore::Surface::PixelFormat format, bool is_depth) const; private: diff --git a/src/video_core/renderer_vulkan/pipeline_helper.h b/src/video_core/renderer_vulkan/pipeline_helper.h index 910e07a606..694ec6d2b6 100644 --- a/src/video_core/renderer_vulkan/pipeline_helper.h +++ b/src/video_core/renderer_vulkan/pipeline_helper.h @@ -4,6 +4,7 @@ #pragma once #include +#include #include @@ -19,6 +20,62 @@ namespace Vulkan { using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS; +namespace detail { + +inline VkImageAspectFlags AttachmentAspectMask(VideoCore::Surface::PixelFormat format) { + switch (VideoCore::Surface::GetFormatType(format)) { + case VideoCore::Surface::SurfaceType::ColorTexture: + return VK_IMAGE_ASPECT_COLOR_BIT; + case VideoCore::Surface::SurfaceType::Depth: + return VK_IMAGE_ASPECT_DEPTH_BIT; + case VideoCore::Surface::SurfaceType::Stencil: + return VK_IMAGE_ASPECT_STENCIL_BIT; + case VideoCore::Surface::SurfaceType::DepthStencil: + return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT; + default: + return 0; + } +} + +inline VkImageSubresourceRange NormalizeImageViewRange(const ImageView& image_view) { + const VkImageAspectFlags aspect_mask = AttachmentAspectMask(image_view.format); + VkImageSubresourceRange range{ + .aspectMask = aspect_mask, + .baseMipLevel = static_cast(image_view.range.base.level), + .levelCount = static_cast(image_view.range.extent.levels), + .baseArrayLayer = static_cast(image_view.range.base.layer), + .layerCount = static_cast(image_view.range.extent.layers), + }; + if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) { + range.baseArrayLayer = 0; + range.layerCount = 1; + } + return range; +} + +inline bool SubresourceRangeIntersects(const VkImageSubresourceRange& lhs, + const VkImageSubresourceRange& rhs) { + if ((lhs.aspectMask & rhs.aspectMask) == 0) { + return false; + } + const auto range_end = [](u32 base, u32 count, u32 remaining_value) -> u32 { + return count == remaining_value ? std::numeric_limits::max() : base + count; + }; + const u32 lhs_level_end = range_end(lhs.baseMipLevel, lhs.levelCount, VK_REMAINING_MIP_LEVELS); + const u32 rhs_level_end = range_end(rhs.baseMipLevel, rhs.levelCount, VK_REMAINING_MIP_LEVELS); + if (lhs_level_end <= rhs.baseMipLevel || rhs_level_end <= lhs.baseMipLevel) { + return false; + } + const u32 lhs_layer_end = range_end(lhs.baseArrayLayer, lhs.layerCount, VK_REMAINING_ARRAY_LAYERS); + const u32 rhs_layer_end = range_end(rhs.baseArrayLayer, rhs.layerCount, VK_REMAINING_ARRAY_LAYERS); + if (lhs_layer_end <= rhs.baseArrayLayer || rhs_layer_end <= lhs.baseArrayLayer) { + return false; + } + return true; +} + +} // namespace detail + class DescriptorLayoutBuilder { public: DescriptorLayoutBuilder(const Device& device_) : device{&device_} {} @@ -180,6 +237,48 @@ inline void PushImageDescriptors(TextureCache& texture_cache, const Shader::Info& info, RescalingPushConstant& rescaling, const VideoCommon::SamplerId*& samplers, const VideoCommon::ImageViewInOut*& views) { + const Framebuffer* framebuffer = texture_cache.GetFramebuffer(); + const auto& feedback_req = texture_cache.PeekFeedbackLoopRequest(); + + const auto choose_layout = [&](const ImageView& image_view) -> VkImageLayout { + if (!feedback_req.active || !feedback_req.supported || framebuffer == nullptr) { + return VK_IMAGE_LAYOUT_GENERAL; + } + const VkImage descriptor_image = image_view.ImageHandle(); + if (descriptor_image == VK_NULL_HANDLE) { + return VK_IMAGE_LAYOUT_GENERAL; + } + + const VkImageSubresourceRange view_range = detail::NormalizeImageViewRange(image_view); + + for (u32 slot = 0; slot < VideoCommon::NUM_RT; ++slot) { + if (((feedback_req.color_mask >> slot) & 1u) == 0) { + continue; + } + if (framebuffer->ColorImage(slot) != descriptor_image) { + continue; + } + const VkImageSubresourceRange* attachment_range = framebuffer->ColorSubresourceRange(slot); + if (attachment_range == nullptr) { + continue; + } + if (detail::SubresourceRangeIntersects(view_range, *attachment_range)) { + return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + } + } + if (feedback_req.depth) { + const VkImage depth_image = framebuffer->DepthStencilImage(); + if (depth_image == descriptor_image) { + const VkImageSubresourceRange* attachment_range = + framebuffer->DepthStencilSubresourceRange(); + if (attachment_range != nullptr && + detail::SubresourceRangeIntersects(view_range, *attachment_range)) { + return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT; + } + } + } + return VK_IMAGE_LAYOUT_GENERAL; + }; const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors); const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors); views += num_texture_buffers; @@ -195,7 +294,8 @@ inline void PushImageDescriptors(TextureCache& texture_cache, !image_view.SupportsAnisotropy()}; const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy() : sampler.Handle()}; - guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler); + const VkImageLayout layout = choose_layout(image_view); + guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler, layout); rescaling.PushTexture(texture_cache.IsRescaling(image_view)); } } @@ -206,7 +306,8 @@ inline void PushImageDescriptors(TextureCache& texture_cache, texture_cache.MarkModification(image_view.image_id); } const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)}; - guest_descriptor_queue.AddImage(vk_image_view); + const VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL; // Storage images must remain in GENERAL layout + guest_descriptor_queue.AddImage(vk_image_view, layout); rescaling.PushImage(texture_cache.IsRescaling(image_view)); } } diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 33064dcfe0..9dd13a4eea 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -135,17 +135,6 @@ RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) { return key; } -size_t NumAttachments(const FixedPipelineState& state) { - size_t num{}; - for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { - const auto format{static_cast(state.color_formats[index])}; - if (format != Tegra::RenderTargetFormat::NONE) { - num = index + 1; - } - } - return num; -} - template bool Passes(const std::array& modules, const std::array& stage_infos) { @@ -460,6 +449,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { buffer_cache.UpdateGraphicsBuffers(is_indexed); buffer_cache.BindHostGeometryBuffers(is_indexed); + // Ensure framebuffer and feedback-loop state are ready before writing descriptors + texture_cache.UpdateRenderTargets(false); + texture_cache.CheckFeedbackLoop(views); + guest_descriptor_queue.Acquire(); RescalingPushConstant rescaling; @@ -492,8 +485,6 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) { if constexpr (Spec::enabled_stages[4]) { prepare_stage(4); } - texture_cache.UpdateRenderTargets(false); - texture_cache.CheckFeedbackLoop(views); ConfigureDraw(rescaling, render_area); return true; @@ -769,14 +760,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) { LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported"); } static_vector cb_attachments; - const size_t num_attachments{NumAttachments(key.state)}; - for (size_t index = 0; index < num_attachments; ++index) { + for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) { static constexpr std::array mask_table{ VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT, }; + const auto format{ + static_cast(key.state.color_formats[index])}; + if (format == Tegra::RenderTargetFormat::NONE) { + cb_attachments.push_back({ + .blendEnable = VK_FALSE, + .srcColorBlendFactor = VK_BLEND_FACTOR_ONE, + .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO, + .colorBlendOp = VK_BLEND_OP_ADD, + .srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE, + .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO, + .alphaBlendOp = VK_BLEND_OP_ADD, + .colorWriteMask = 0, + }); + continue; + } const auto& blend{key.state.attachments[index]}; const std::array mask{blend.Mask()}; VkColorComponentFlags write_mask{}; diff --git a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp index 56fcb8df03..7e58186191 100644 --- a/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_render_pass_cache.cpp @@ -116,14 +116,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .preserveAttachmentCount = 0, .pPreserveAttachments = nullptr, }; - VkDependencyFlags dependency_flags = VK_DEPENDENCY_BY_REGION_BIT; - if (device->IsAttachmentFeedbackLoopLayoutSupported() && - (key.color_feedback_mask != 0 || key.depth_feedback)) { - dependency_flags |= VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT; - } - const VkSubpassDependency dependency{ - .srcSubpass = 0, // Current subpass - .dstSubpass = 0, // Same subpass (self-dependency) + const bool feedback_enabled = device->IsAttachmentFeedbackLoopLayoutSupported() && + (key.color_feedback_mask != 0 || key.depth_feedback); + + std::array dependencies{ + VkSubpassDependency{ + .srcSubpass = 0, + .dstSubpass = 0, .srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, @@ -131,8 +130,28 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, .dstAccessMask = VK_ACCESS_SHADER_READ_BIT, - .dependencyFlags = dependency_flags + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT | + (feedback_enabled ? VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT : 0u), + }, + VkSubpassDependency{} }; + u32 dependency_count = 1; + if (feedback_enabled) { + dependencies[1] = VkSubpassDependency{ + .srcSubpass = 0, + .dstSubpass = 0, + .srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, + .dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, + .srcAccessMask = VK_ACCESS_SHADER_READ_BIT, + .dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, + .dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT, + }; + dependency_count = 2; + } + pair->second = device->GetLogical().CreateRenderPass({ .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, .pNext = nullptr, @@ -141,8 +160,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) { .pAttachments = descriptions.empty() ? nullptr : descriptions.data(), .subpassCount = 1, .pSubpasses = &subpass, - .dependencyCount = 1, - .pDependencies = &dependency, + .dependencyCount = dependency_count, + .pDependencies = dependencies.data(), }); return *pair->second; } diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index b5433008dd..3c69bf2195 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -2330,6 +2331,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime, s32 num_layers = 1; is_rescaled = is_rescaled_; + rt_map.fill(std::numeric_limits::max()); const auto& resolution = runtime.resolution; u32 width = (std::numeric_limits::max)(); @@ -2404,6 +2406,56 @@ VkRenderPass Framebuffer::RenderPass(std::uint8_t color_feedback_mask, bool dept return render_pass_cache->Get(key); } +VkImage Framebuffer::ColorImage(size_t slot) const noexcept { + if (slot >= NUM_RT) { + return VK_NULL_HANDLE; + } + const size_t mapped_index = rt_map[slot]; + if (mapped_index >= num_images) { + return VK_NULL_HANDLE; + } + if ((image_ranges[mapped_index].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) == 0) { + return VK_NULL_HANDLE; + } + return images[mapped_index]; +} + +VkImage Framebuffer::DepthStencilImage() const noexcept { + if (!has_depth && !has_stencil) { + return VK_NULL_HANDLE; + } + const size_t mapped_index = num_color_buffers; + if (mapped_index >= num_images) { + return VK_NULL_HANDLE; + } + return images[mapped_index]; +} + +const VkImageSubresourceRange* Framebuffer::ColorSubresourceRange(size_t slot) const noexcept { + if (slot >= NUM_RT) { + return nullptr; + } + const size_t mapped_index = rt_map[slot]; + if (mapped_index == std::numeric_limits::max()) { + return nullptr; + } + if (mapped_index >= num_color_buffers || mapped_index >= num_images) { + return nullptr; + } + return &image_ranges[mapped_index]; +} + +const VkImageSubresourceRange* Framebuffer::DepthStencilSubresourceRange() const noexcept { + if (!has_depth && !has_stencil) { + return nullptr; + } + const size_t mapped_index = num_color_buffers; + if (mapped_index >= num_images) { + return nullptr; + } + return &image_ranges[mapped_index]; +} + void TextureCacheRuntime::AccelerateImageUpload( Image& image, const StagingBufferRef& map, std::span swizzles) { diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index eb13556cb4..2a85ca64cf 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -121,6 +121,9 @@ public: void BarrierFeedbackLoop(); void SetFeedbackLoopRequest(u8 color_mask, bool depth, bool supported); FeedbackLoopRequest ConsumeFeedbackLoopRequest(); + const FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept { + return pending_feedback_request; + } bool IsFormatDitherable(VideoCore::Surface::PixelFormat format); bool IsFormatScalable(VideoCore::Surface::PixelFormat format); @@ -390,6 +393,13 @@ public: return is_rescaled; } + [[nodiscard]] VkImage ColorImage(size_t slot) const noexcept; + + [[nodiscard]] VkImage DepthStencilImage() const noexcept; + + [[nodiscard]] const VkImageSubresourceRange* ColorSubresourceRange(size_t slot) const noexcept; + [[nodiscard]] const VkImageSubresourceRange* DepthStencilSubresourceRange() const noexcept; + private: vk::Framebuffer framebuffer; VkRenderPass renderpass{}; @@ -427,3 +437,4 @@ struct TextureCacheParams { using TextureCache = VideoCommon::TextureCache; } // namespace Vulkan + diff --git a/src/video_core/renderer_vulkan/vk_update_descriptor.h b/src/video_core/renderer_vulkan/vk_update_descriptor.h index 82fce298da..2ffbba2e4b 100644 --- a/src/video_core/renderer_vulkan/vk_update_descriptor.h +++ b/src/video_core/renderer_vulkan/vk_update_descriptor.h @@ -55,6 +55,15 @@ public: }; } + // Overload allowing explicit image layout + void AddSampledImage(VkImageView image_view, VkSampler sampler, VkImageLayout layout) { + *(payload_cursor++) = VkDescriptorImageInfo{ + .sampler = sampler, + .imageView = image_view, + .imageLayout = layout, + }; + } + void AddImage(VkImageView image_view) { *(payload_cursor++) = VkDescriptorImageInfo{ .sampler = VK_NULL_HANDLE, @@ -63,6 +72,14 @@ public: }; } + void AddImage(VkImageView image_view, VkImageLayout layout) { + *(payload_cursor++) = VkDescriptorImageInfo{ + .sampler = VK_NULL_HANDLE, + .imageView = image_view, + .imageLayout = layout, + }; + } + void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) { *(payload_cursor++) = VkDescriptorBufferInfo{ .buffer = buffer, diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 72f9d39581..1a4a5febeb 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -267,6 +267,11 @@ void TextureCache

::CheckFeedbackLoop(std::span views) { runtime.SetFeedbackLoopRequest(color_mask, depth_feedback, supported); } +template +const typename P::Runtime::FeedbackLoopRequest& TextureCache

::PeekFeedbackLoopRequest() const noexcept { + return runtime.PeekFeedbackLoopRequest(); +} + template typename P::Sampler* TextureCache

::GetGraphicsSampler(u32 index) { return &slot_samplers[GetGraphicsSamplerId(index)]; @@ -2575,3 +2580,4 @@ void TextureCache

::OnGPUASRegister([[maybe_unused]] size_t map_id) { } } // namespace VideoCommon + diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index 63d4b4ae7c..f2a049e584 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -159,6 +159,9 @@ public: /// Handle feedback loops during draws. void CheckFeedbackLoop(std::span views); + /// Peek current pending attachment feedback loop request (without consuming it) + [[nodiscard]] const typename P::Runtime::FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept; + /// Get the sampler from the graphics descriptor table in the specified index Sampler* GetGraphicsSampler(u32 index); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index aae4c2cd9d..388be42fd0 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -1359,6 +1359,12 @@ void Device::RemoveUnsuitableExtensions() { if (extensions.attachment_feedback_loop_layout) { supports_attachment_feedback_loop_layout = features.attachment_feedback_loop_layout.attachmentFeedbackLoopLayout; + if (!supports_attachment_feedback_loop_layout) { + LOG_WARNING(Render_Vulkan, + "VK_EXT_attachment_feedback_loop_layout advertised without reported feature support; disabling"); + } + features.attachment_feedback_loop_layout.attachmentFeedbackLoopLayout = + supports_attachment_feedback_loop_layout ? VK_TRUE : VK_FALSE; extensions.attachment_feedback_loop_layout = supports_attachment_feedback_loop_layout; }