this is pushing my vk knowledge to its limits
Some checks failed
eden-license / license-header (pull_request) Failing after 22s

This commit is contained in:
Ribbit 2025-10-05 15:12:18 -07:00
parent 6125c4f9d9
commit 6d219d9685
10 changed files with 251 additions and 28 deletions

View file

@ -155,6 +155,9 @@ public:
void SetFeedbackLoopRequest(u8 color_mask, bool depth, bool supported);
FeedbackLoopRequest ConsumeFeedbackLoopRequest();
const FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept {
return pending_feedback_request;
}
bool SupportsAttachmentFeedbackLoopFormat(VideoCore::Surface::PixelFormat format, bool is_depth) const;
private:

View file

@ -4,6 +4,7 @@
#pragma once
#include <cstddef>
#include <limits>
#include <boost/container/small_vector.hpp>
@ -19,6 +20,62 @@ namespace Vulkan {
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
namespace detail {
inline VkImageAspectFlags AttachmentAspectMask(VideoCore::Surface::PixelFormat format) {
switch (VideoCore::Surface::GetFormatType(format)) {
case VideoCore::Surface::SurfaceType::ColorTexture:
return VK_IMAGE_ASPECT_COLOR_BIT;
case VideoCore::Surface::SurfaceType::Depth:
return VK_IMAGE_ASPECT_DEPTH_BIT;
case VideoCore::Surface::SurfaceType::Stencil:
return VK_IMAGE_ASPECT_STENCIL_BIT;
case VideoCore::Surface::SurfaceType::DepthStencil:
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
default:
return 0;
}
}
inline VkImageSubresourceRange NormalizeImageViewRange(const ImageView& image_view) {
const VkImageAspectFlags aspect_mask = AttachmentAspectMask(image_view.format);
VkImageSubresourceRange range{
.aspectMask = aspect_mask,
.baseMipLevel = static_cast<u32>(image_view.range.base.level),
.levelCount = static_cast<u32>(image_view.range.extent.levels),
.baseArrayLayer = static_cast<u32>(image_view.range.base.layer),
.layerCount = static_cast<u32>(image_view.range.extent.layers),
};
if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) {
range.baseArrayLayer = 0;
range.layerCount = 1;
}
return range;
}
inline bool SubresourceRangeIntersects(const VkImageSubresourceRange& lhs,
const VkImageSubresourceRange& rhs) {
if ((lhs.aspectMask & rhs.aspectMask) == 0) {
return false;
}
const auto range_end = [](u32 base, u32 count, u32 remaining_value) -> u32 {
return count == remaining_value ? std::numeric_limits<u32>::max() : base + count;
};
const u32 lhs_level_end = range_end(lhs.baseMipLevel, lhs.levelCount, VK_REMAINING_MIP_LEVELS);
const u32 rhs_level_end = range_end(rhs.baseMipLevel, rhs.levelCount, VK_REMAINING_MIP_LEVELS);
if (lhs_level_end <= rhs.baseMipLevel || rhs_level_end <= lhs.baseMipLevel) {
return false;
}
const u32 lhs_layer_end = range_end(lhs.baseArrayLayer, lhs.layerCount, VK_REMAINING_ARRAY_LAYERS);
const u32 rhs_layer_end = range_end(rhs.baseArrayLayer, rhs.layerCount, VK_REMAINING_ARRAY_LAYERS);
if (lhs_layer_end <= rhs.baseArrayLayer || rhs_layer_end <= lhs.baseArrayLayer) {
return false;
}
return true;
}
} // namespace detail
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
@ -180,6 +237,48 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
const Shader::Info& info, RescalingPushConstant& rescaling,
const VideoCommon::SamplerId*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const Framebuffer* framebuffer = texture_cache.GetFramebuffer();
const auto& feedback_req = texture_cache.PeekFeedbackLoopRequest();
const auto choose_layout = [&](const ImageView& image_view) -> VkImageLayout {
if (!feedback_req.active || !feedback_req.supported || framebuffer == nullptr) {
return VK_IMAGE_LAYOUT_GENERAL;
}
const VkImage descriptor_image = image_view.ImageHandle();
if (descriptor_image == VK_NULL_HANDLE) {
return VK_IMAGE_LAYOUT_GENERAL;
}
const VkImageSubresourceRange view_range = detail::NormalizeImageViewRange(image_view);
for (u32 slot = 0; slot < VideoCommon::NUM_RT; ++slot) {
if (((feedback_req.color_mask >> slot) & 1u) == 0) {
continue;
}
if (framebuffer->ColorImage(slot) != descriptor_image) {
continue;
}
const VkImageSubresourceRange* attachment_range = framebuffer->ColorSubresourceRange(slot);
if (attachment_range == nullptr) {
continue;
}
if (detail::SubresourceRangeIntersects(view_range, *attachment_range)) {
return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
}
}
if (feedback_req.depth) {
const VkImage depth_image = framebuffer->DepthStencilImage();
if (depth_image == descriptor_image) {
const VkImageSubresourceRange* attachment_range =
framebuffer->DepthStencilSubresourceRange();
if (attachment_range != nullptr &&
detail::SubresourceRangeIntersects(view_range, *attachment_range)) {
return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
}
}
}
return VK_IMAGE_LAYOUT_GENERAL;
};
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
views += num_texture_buffers;
@ -195,7 +294,8 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
!image_view.SupportsAnisotropy()};
const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
: sampler.Handle()};
guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
const VkImageLayout layout = choose_layout(image_view);
guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler, layout);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
@ -206,7 +306,8 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
texture_cache.MarkModification(image_view.image_id);
}
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
guest_descriptor_queue.AddImage(vk_image_view);
const VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL; // Storage images must remain in GENERAL layout
guest_descriptor_queue.AddImage(vk_image_view, layout);
rescaling.PushImage(texture_cache.IsRescaling(image_view));
}
}

View file

@ -135,17 +135,6 @@ RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
return key;
}
size_t NumAttachments(const FixedPipelineState& state) {
size_t num{};
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
if (format != Tegra::RenderTargetFormat::NONE) {
num = index + 1;
}
}
return num;
}
template <typename Spec>
bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
@ -460,6 +449,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
// Ensure framebuffer and feedback-loop state are ready before writing descriptors
texture_cache.UpdateRenderTargets(false);
texture_cache.CheckFeedbackLoop(views);
guest_descriptor_queue.Acquire();
RescalingPushConstant rescaling;
@ -492,8 +485,6 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
texture_cache.UpdateRenderTargets(false);
texture_cache.CheckFeedbackLoop(views);
ConfigureDraw(rescaling, render_area);
return true;
@ -769,14 +760,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
}
static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const size_t num_attachments{NumAttachments(key.state)};
for (size_t index = 0; index < num_attachments; ++index) {
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
static constexpr std::array mask_table{
VK_COLOR_COMPONENT_R_BIT,
VK_COLOR_COMPONENT_G_BIT,
VK_COLOR_COMPONENT_B_BIT,
VK_COLOR_COMPONENT_A_BIT,
};
const auto format{
static_cast<Tegra::RenderTargetFormat>(key.state.color_formats[index])};
if (format == Tegra::RenderTargetFormat::NONE) {
cb_attachments.push_back({
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ONE,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = 0,
});
continue;
}
const auto& blend{key.state.attachments[index]};
const std::array mask{blend.Mask()};
VkColorComponentFlags write_mask{};

View file

@ -116,14 +116,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.preserveAttachmentCount = 0,
.pPreserveAttachments = nullptr,
};
VkDependencyFlags dependency_flags = VK_DEPENDENCY_BY_REGION_BIT;
if (device->IsAttachmentFeedbackLoopLayoutSupported() &&
(key.color_feedback_mask != 0 || key.depth_feedback)) {
dependency_flags |= VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT;
}
const VkSubpassDependency dependency{
.srcSubpass = 0, // Current subpass
.dstSubpass = 0, // Same subpass (self-dependency)
const bool feedback_enabled = device->IsAttachmentFeedbackLoopLayoutSupported() &&
(key.color_feedback_mask != 0 || key.depth_feedback);
std::array<VkSubpassDependency, 2> dependencies{
VkSubpassDependency{
.srcSubpass = 0,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
@ -131,8 +130,28 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dependencyFlags = dependency_flags
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT |
(feedback_enabled ? VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT : 0u),
},
VkSubpassDependency{}
};
u32 dependency_count = 1;
if (feedback_enabled) {
dependencies[1] = VkSubpassDependency{
.srcSubpass = 0,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT,
};
dependency_count = 2;
}
pair->second = device->GetLogical().CreateRenderPass({
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.pNext = nullptr,
@ -141,8 +160,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 1,
.pDependencies = &dependency,
.dependencyCount = dependency_count,
.pDependencies = dependencies.data(),
});
return *pair->second;
}

View file

@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include <limits>
#include <span>
#include <memory>
#include <vector>
@ -2330,6 +2331,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
s32 num_layers = 1;
is_rescaled = is_rescaled_;
rt_map.fill(std::numeric_limits<size_t>::max());
const auto& resolution = runtime.resolution;
u32 width = (std::numeric_limits<u32>::max)();
@ -2404,6 +2406,56 @@ VkRenderPass Framebuffer::RenderPass(std::uint8_t color_feedback_mask, bool dept
return render_pass_cache->Get(key);
}
VkImage Framebuffer::ColorImage(size_t slot) const noexcept {
if (slot >= NUM_RT) {
return VK_NULL_HANDLE;
}
const size_t mapped_index = rt_map[slot];
if (mapped_index >= num_images) {
return VK_NULL_HANDLE;
}
if ((image_ranges[mapped_index].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) == 0) {
return VK_NULL_HANDLE;
}
return images[mapped_index];
}
VkImage Framebuffer::DepthStencilImage() const noexcept {
if (!has_depth && !has_stencil) {
return VK_NULL_HANDLE;
}
const size_t mapped_index = num_color_buffers;
if (mapped_index >= num_images) {
return VK_NULL_HANDLE;
}
return images[mapped_index];
}
const VkImageSubresourceRange* Framebuffer::ColorSubresourceRange(size_t slot) const noexcept {
if (slot >= NUM_RT) {
return nullptr;
}
const size_t mapped_index = rt_map[slot];
if (mapped_index == std::numeric_limits<size_t>::max()) {
return nullptr;
}
if (mapped_index >= num_color_buffers || mapped_index >= num_images) {
return nullptr;
}
return &image_ranges[mapped_index];
}
const VkImageSubresourceRange* Framebuffer::DepthStencilSubresourceRange() const noexcept {
if (!has_depth && !has_stencil) {
return nullptr;
}
const size_t mapped_index = num_color_buffers;
if (mapped_index >= num_images) {
return nullptr;
}
return &image_ranges[mapped_index];
}
void TextureCacheRuntime::AccelerateImageUpload(
Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {

View file

@ -121,6 +121,9 @@ public:
void BarrierFeedbackLoop();
void SetFeedbackLoopRequest(u8 color_mask, bool depth, bool supported);
FeedbackLoopRequest ConsumeFeedbackLoopRequest();
const FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept {
return pending_feedback_request;
}
bool IsFormatDitherable(VideoCore::Surface::PixelFormat format);
bool IsFormatScalable(VideoCore::Surface::PixelFormat format);
@ -390,6 +393,13 @@ public:
return is_rescaled;
}
[[nodiscard]] VkImage ColorImage(size_t slot) const noexcept;
[[nodiscard]] VkImage DepthStencilImage() const noexcept;
[[nodiscard]] const VkImageSubresourceRange* ColorSubresourceRange(size_t slot) const noexcept;
[[nodiscard]] const VkImageSubresourceRange* DepthStencilSubresourceRange() const noexcept;
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
@ -427,3 +437,4 @@ struct TextureCacheParams {
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace Vulkan

View file

@ -55,6 +55,15 @@ public:
};
}
// Overload allowing explicit image layout
void AddSampledImage(VkImageView image_view, VkSampler sampler, VkImageLayout layout) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = layout,
};
}
void AddImage(VkImageView image_view) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
@ -63,6 +72,14 @@ public:
};
}
void AddImage(VkImageView image_view, VkImageLayout layout) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = layout,
};
}
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
*(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,

View file

@ -267,6 +267,11 @@ void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
runtime.SetFeedbackLoopRequest(color_mask, depth_feedback, supported);
}
template <class P>
const typename P::Runtime::FeedbackLoopRequest& TextureCache<P>::PeekFeedbackLoopRequest() const noexcept {
return runtime.PeekFeedbackLoopRequest();
}
template <class P>
typename P::Sampler* TextureCache<P>::GetGraphicsSampler(u32 index) {
return &slot_samplers[GetGraphicsSamplerId(index)];
@ -2575,3 +2580,4 @@ void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) {
}
} // namespace VideoCommon

View file

@ -159,6 +159,9 @@ public:
/// Handle feedback loops during draws.
void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
/// Peek current pending attachment feedback loop request (without consuming it)
[[nodiscard]] const typename P::Runtime::FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept;
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);

View file

@ -1359,6 +1359,12 @@ void Device::RemoveUnsuitableExtensions() {
if (extensions.attachment_feedback_loop_layout) {
supports_attachment_feedback_loop_layout =
features.attachment_feedback_loop_layout.attachmentFeedbackLoopLayout;
if (!supports_attachment_feedback_loop_layout) {
LOG_WARNING(Render_Vulkan,
"VK_EXT_attachment_feedback_loop_layout advertised without reported feature support; disabling");
}
features.attachment_feedback_loop_layout.attachmentFeedbackLoopLayout =
supports_attachment_feedback_loop_layout ? VK_TRUE : VK_FALSE;
extensions.attachment_feedback_loop_layout =
supports_attachment_feedback_loop_layout;
}