WIP: [vk] Implement Attachment Feedback Loop Layout #2676

Closed
Ribbit wants to merge 2 commits from Ribbit/ribbitvulkanadditions:attachmentloop into master
24 changed files with 685 additions and 126 deletions

View file

@ -673,6 +673,23 @@ void TextureCacheRuntime::InsertUploadMemoryBarrier() {
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
void TextureCacheRuntime::SetFeedbackLoopRequest(u8 color_mask, bool depth, bool supported) {
pending_feedback_request.active = (color_mask != 0) || depth;
pending_feedback_request.color_mask = color_mask;
pending_feedback_request.depth = depth;
pending_feedback_request.supported = supported;
}
TextureCacheRuntime::FeedbackLoopRequest TextureCacheRuntime::ConsumeFeedbackLoopRequest() {
FeedbackLoopRequest request = pending_feedback_request;
pending_feedback_request = {};
return request;
}
bool TextureCacheRuntime::SupportsAttachmentFeedbackLoopFormat(VideoCore::Surface::PixelFormat, bool) const {
return true;
}
FormatProperties TextureCacheRuntime::FormatInfo(ImageType type, GLenum internal_format) const {
switch (type) {
case ImageType::e1D:

View file

@ -15,6 +15,7 @@
#include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "video_core/surface.h"
namespace Settings {
struct ResolutionScalingInfo;
@ -65,6 +66,13 @@ class TextureCacheRuntime {
friend Sampler;
public:
struct FeedbackLoopRequest {
bool active{};
u8 color_mask{};
bool depth{};
bool supported{};
};
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
StateTracker& state_tracker,
StagingBufferPool& staging_buffer_pool);
@ -145,6 +153,13 @@ public:
// OpenGL does not require a barrier for attachment feedback loops.
}
void SetFeedbackLoopRequest(u8 color_mask, bool depth, bool supported);
FeedbackLoopRequest ConsumeFeedbackLoopRequest();
const FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept {
return pending_feedback_request;
}
bool SupportsAttachmentFeedbackLoopFormat(VideoCore::Surface::PixelFormat format, bool is_depth) const;
private:
const Device& device;
StateTracker& state_tracker;
@ -170,6 +185,7 @@ private:
std::array<OGLFramebuffer, 4> rescale_read_fbos;
const Settings::ResolutionScalingInfo& resolution;
u64 device_access_memory;
FeedbackLoopRequest pending_feedback_request{};
};
class Image : public VideoCommon::ImageBase {

View file

@ -418,7 +418,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo
}
void BeginRenderPass(vk::CommandBuffer& cmdbuf, const Framebuffer* framebuffer) {
const VkRenderPass render_pass = framebuffer->RenderPass();
const VkRenderPass render_pass = framebuffer->RenderPass(0, false);
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
const VkExtent2D render_area = framebuffer->RenderArea();
const VkRenderPassBeginInfo renderpass_bi{
@ -490,13 +490,13 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
Tegra::Engines::Fermi2D::Operation operation) {
const bool is_linear = filter == Tegra::Engines::Fermi2D::Filter::Bilinear;
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.renderpass = dst_framebuffer->RenderPass(0, false),
.operation = operation,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
const VkSampler sampler = is_linear ? *linear_sampler : *nearest_sampler;
const VkPipeline pipeline = FindOrEmplaceColorPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.RequestRenderpass(dst_framebuffer, 0, false);
scheduler.Record([this, dst_region, src_region, pipeline, layout, sampler,
src_view](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
@ -516,7 +516,7 @@ void BlitImageHelper::BlitColor(const Framebuffer* dst_framebuffer, VkImageView
const Region2D& dst_region, const Region2D& src_region,
const Extent3D& src_size) {
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.renderpass = dst_framebuffer->RenderPass(0, false),
.operation = Tegra::Engines::Fermi2D::Operation::SrcCopy,
};
const VkPipelineLayout layout = *one_texture_pipeline_layout;
@ -548,13 +548,13 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
ASSERT(filter == Tegra::Engines::Fermi2D::Filter::Point);
ASSERT(operation == Tegra::Engines::Fermi2D::Operation::SrcCopy);
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.renderpass = dst_framebuffer->RenderPass(0, false),
.operation = operation,
};
const VkPipelineLayout layout = *two_textures_pipeline_layout;
const VkSampler sampler = *nearest_sampler;
const VkPipeline pipeline = FindOrEmplaceDepthStencilPipeline(key);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.RequestRenderpass(dst_framebuffer, 0, false);
scheduler.Record([dst_region, src_region, pipeline, layout, sampler, src_depth_view,
src_stencil_view, this](vk::CommandBuffer cmdbuf) {
// TODO: Barriers
@ -572,59 +572,59 @@ void BlitImageHelper::BlitDepthStencil(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertD32ToR32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass());
ConvertDepthToColorPipeline(convert_d32_to_r32_pipeline, dst_framebuffer->RenderPass(0, false));
Convert(*convert_d32_to_r32_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertR32ToD32(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass());
ConvertColorToDepthPipeline(convert_r32_to_d32_pipeline, dst_framebuffer->RenderPass(0, false));
Convert(*convert_r32_to_d32_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertD16ToR16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass());
ConvertDepthToColorPipeline(convert_d16_to_r16_pipeline, dst_framebuffer->RenderPass(0, false));
Convert(*convert_d16_to_r16_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertR16ToD16(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass());
ConvertColorToDepthPipeline(convert_r16_to_d16_pipeline, dst_framebuffer->RenderPass(0, false));
Convert(*convert_r16_to_d16_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertABGR8ToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(),
ConvertPipelineDepthTargetEx(convert_abgr8_to_d24s8_pipeline, dst_framebuffer->RenderPass(0, false),
convert_abgr8_to_d24s8_frag);
Convert(*convert_abgr8_to_d24s8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertABGR8ToD32F(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipelineDepthTargetEx(convert_abgr8_to_d32f_pipeline, dst_framebuffer->RenderPass(),
ConvertPipelineDepthTargetEx(convert_abgr8_to_d32f_pipeline, dst_framebuffer->RenderPass(0, false),
convert_abgr8_to_d32f_frag);
Convert(*convert_abgr8_to_d32f_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertD32FToABGR8(const Framebuffer* dst_framebuffer,
ImageView& src_image_view) {
ConvertPipelineColorTargetEx(convert_d32f_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
ConvertPipelineColorTargetEx(convert_d32f_to_abgr8_pipeline, dst_framebuffer->RenderPass(0, false),
convert_d32f_to_abgr8_frag);
ConvertDepthStencil(*convert_d32f_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertD24S8ToABGR8(const Framebuffer* dst_framebuffer,
ImageView& src_image_view) {
ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
ConvertPipelineColorTargetEx(convert_d24s8_to_abgr8_pipeline, dst_framebuffer->RenderPass(0, false),
convert_d24s8_to_abgr8_frag);
ConvertDepthStencil(*convert_d24s8_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
ImageView& src_image_view) {
ConvertPipelineColorTargetEx(convert_s8d24_to_abgr8_pipeline, dst_framebuffer->RenderPass(),
ConvertPipelineColorTargetEx(convert_s8d24_to_abgr8_pipeline, dst_framebuffer->RenderPass(0, false),
convert_s8d24_to_abgr8_frag);
ConvertDepthStencil(*convert_s8d24_to_abgr8_pipeline, dst_framebuffer, src_image_view);
}
@ -632,7 +632,7 @@ void BlitImageHelper::ConvertS8D24ToABGR8(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertABGR8SRGBToD24S8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipelineDepthTargetEx(convert_abgr8_srgb_to_d24s8_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
convert_abgr8_srgb_to_d24s8_frag);
Convert(*convert_abgr8_srgb_to_d24s8_pipeline, dst_framebuffer, src_image_view);
}
@ -641,12 +641,12 @@ void BlitImageHelper::ClearColor(const Framebuffer* dst_framebuffer, u8 color_ma
const std::array<f32, 4>& clear_color,
const Region2D& dst_region) {
const BlitImagePipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.renderpass = dst_framebuffer->RenderPass(0, false),
.operation = Tegra::Engines::Fermi2D::Operation::BlendPremult,
};
const VkPipeline pipeline = FindOrEmplaceClearColorPipeline(key);
const VkPipelineLayout layout = *clear_color_pipeline_layout;
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.RequestRenderpass(dst_framebuffer, 0, false);
scheduler.Record(
[pipeline, layout, color_mask, clear_color, dst_region](vk::CommandBuffer cmdbuf) {
cmdbuf.BindPipeline(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
@ -665,7 +665,7 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool
f32 clear_depth, u8 stencil_mask, u32 stencil_ref,
u32 stencil_compare_mask, const Region2D& dst_region) {
const BlitDepthStencilPipelineKey key{
.renderpass = dst_framebuffer->RenderPass(),
.renderpass = dst_framebuffer->RenderPass(0, false),
.depth_clear = depth_clear,
.stencil_mask = stencil_mask,
.stencil_compare_mask = stencil_compare_mask,
@ -673,7 +673,7 @@ void BlitImageHelper::ClearDepthStencil(const Framebuffer* dst_framebuffer, bool
};
const VkPipeline pipeline = FindOrEmplaceClearStencilPipeline(key);
const VkPipelineLayout layout = *clear_color_pipeline_layout;
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.RequestRenderpass(dst_framebuffer, 0, false);
scheduler.Record([pipeline, layout, clear_depth, dst_region](vk::CommandBuffer cmdbuf) {
constexpr std::array blend_constants{0.0f, 0.0f, 0.0f, 0.0f};
cmdbuf.SetBlendConstants(blend_constants.data());
@ -692,7 +692,7 @@ void BlitImageHelper::Convert(VkPipeline pipeline, const Framebuffer* dst_frameb
const VkSampler sampler = *nearest_sampler;
const VkExtent2D extent = GetConversionExtent(src_image_view);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.RequestRenderpass(dst_framebuffer, 0, false);
scheduler.Record([pipeline, layout, sampler, src_view, extent, this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
.x = 0,
@ -737,7 +737,7 @@ void BlitImageHelper::ConvertDepthStencil(VkPipeline pipeline, const Framebuffer
const VkSampler sampler = *nearest_sampler;
const VkExtent2D extent = GetConversionExtent(src_image_view);
scheduler.RequestRenderpass(dst_framebuffer);
scheduler.RequestRenderpass(dst_framebuffer, 0, false);
scheduler.Record([pipeline, layout, sampler, src_depth_view, src_stencil_view, extent,
this](vk::CommandBuffer cmdbuf) {
const VkOffset2D offset{
@ -1108,7 +1108,7 @@ void BlitImageHelper::ConvertPipeline(vk::Pipeline& pipeline, VkRenderPass rende
void BlitImageHelper::ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_rgba_to_bgra_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
false);
Convert(*convert_rgba_to_bgra_pipeline, dst_framebuffer, src_image_view);
}
@ -1116,7 +1116,7 @@ void BlitImageHelper::ConvertRGBAtoGBRA(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertYUV420toRGB(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_yuv420_to_rgb_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
false);
Convert(*convert_yuv420_to_rgb_pipeline, dst_framebuffer, src_image_view);
}
@ -1124,7 +1124,7 @@ void BlitImageHelper::ConvertYUV420toRGB(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_rgb_to_yuv420_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
false);
Convert(*convert_rgb_to_yuv420_pipeline, dst_framebuffer, src_image_view);
}
@ -1132,7 +1132,7 @@ void BlitImageHelper::ConvertRGBtoYUV420(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_bc7_to_rgba8_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
false);
Convert(*convert_bc7_to_rgba8_pipeline, dst_framebuffer, src_image_view);
}
@ -1140,7 +1140,7 @@ void BlitImageHelper::ConvertBC7toRGBA8(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_astc_hdr_to_rgba16f_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
false);
Convert(*convert_astc_hdr_to_rgba16f_pipeline, dst_framebuffer, src_image_view);
}
@ -1148,7 +1148,7 @@ void BlitImageHelper::ConvertASTCHDRtoRGBA16F(const Framebuffer* dst_framebuffer
void BlitImageHelper::ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(convert_rgba16f_to_rgba8_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
false);
Convert(*convert_rgba16f_to_rgba8_pipeline, dst_framebuffer, src_image_view);
}
@ -1156,7 +1156,7 @@ void BlitImageHelper::ConvertRGBA16FtoRGBA8(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ApplyDitherTemporal(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(dither_temporal_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
false);
Convert(*dither_temporal_pipeline, dst_framebuffer, src_image_view);
}
@ -1164,7 +1164,7 @@ void BlitImageHelper::ApplyDitherTemporal(const Framebuffer* dst_framebuffer,
void BlitImageHelper::ApplyDynamicResolutionScale(const Framebuffer* dst_framebuffer,
const ImageView& src_image_view) {
ConvertPipeline(dynamic_resolution_scale_pipeline,
dst_framebuffer->RenderPass(),
dst_framebuffer->RenderPass(0, false),
false);
Convert(*dynamic_resolution_scale_pipeline, dst_framebuffer, src_image_view);
}

View file

@ -214,6 +214,8 @@ struct FixedPipelineState {
BitField<15, 1, u32> alpha_to_coverage_enabled;
BitField<16, 1, u32> alpha_to_one_enabled;
BitField<17, 3, Tegra::Engines::Maxwell3D::EngineHint> app_stage;
BitField<20, 8, u32> color_feedback_mask;
BitField<28, 1, u32> depth_feedback;
};
std::array<u8, Maxwell::NumRenderTargets> color_formats;
@ -241,6 +243,19 @@ struct FixedPipelineState {
u32 line_stipple_factor;
u32 line_stipple_pattern;
void SetAttachmentFeedback(u8 mask, bool depth) noexcept {
color_feedback_mask.Assign(mask);
depth_feedback.Assign(depth ? 1u : 0u);
}
[[nodiscard]] u8 AttachmentFeedbackMask() const noexcept {
return static_cast<u8>(color_feedback_mask.Value());
}
[[nodiscard]] bool HasDepthAttachmentFeedback() const noexcept {
return depth_feedback != 0;
}
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, DynamicFeatures& features);
size_t Hash() const noexcept;

View file

@ -4,6 +4,7 @@
#pragma once
#include <cstddef>
#include <limits>
#include <boost/container/small_vector.hpp>
@ -19,6 +20,62 @@ namespace Vulkan {
using Shader::Backend::SPIRV::NUM_TEXTURE_AND_IMAGE_SCALING_WORDS;
namespace detail {
inline VkImageAspectFlags AttachmentAspectMask(VideoCore::Surface::PixelFormat format) {
switch (VideoCore::Surface::GetFormatType(format)) {
case VideoCore::Surface::SurfaceType::ColorTexture:
return VK_IMAGE_ASPECT_COLOR_BIT;
case VideoCore::Surface::SurfaceType::Depth:
return VK_IMAGE_ASPECT_DEPTH_BIT;
case VideoCore::Surface::SurfaceType::Stencil:
return VK_IMAGE_ASPECT_STENCIL_BIT;
case VideoCore::Surface::SurfaceType::DepthStencil:
return VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
default:
return 0;
}
}
inline VkImageSubresourceRange NormalizeImageViewRange(const ImageView& image_view) {
const VkImageAspectFlags aspect_mask = AttachmentAspectMask(image_view.format);
VkImageSubresourceRange range{
.aspectMask = aspect_mask,
.baseMipLevel = static_cast<u32>(image_view.range.base.level),
.levelCount = static_cast<u32>(image_view.range.extent.levels),
.baseArrayLayer = static_cast<u32>(image_view.range.base.layer),
.layerCount = static_cast<u32>(image_view.range.extent.layers),
};
if ((image_view.flags & VideoCommon::ImageViewFlagBits::Slice) != VideoCommon::ImageViewFlagBits{}) {
range.baseArrayLayer = 0;
range.layerCount = 1;
}
return range;
}
inline bool SubresourceRangeIntersects(const VkImageSubresourceRange& lhs,
const VkImageSubresourceRange& rhs) {
if ((lhs.aspectMask & rhs.aspectMask) == 0) {
return false;
}
const auto range_end = [](u32 base, u32 count, u32 remaining_value) -> u32 {
return count == remaining_value ? std::numeric_limits<u32>::max() : base + count;
};
const u32 lhs_level_end = range_end(lhs.baseMipLevel, lhs.levelCount, VK_REMAINING_MIP_LEVELS);
const u32 rhs_level_end = range_end(rhs.baseMipLevel, rhs.levelCount, VK_REMAINING_MIP_LEVELS);
if (lhs_level_end <= rhs.baseMipLevel || rhs_level_end <= lhs.baseMipLevel) {
return false;
}
const u32 lhs_layer_end = range_end(lhs.baseArrayLayer, lhs.layerCount, VK_REMAINING_ARRAY_LAYERS);
const u32 rhs_layer_end = range_end(rhs.baseArrayLayer, rhs.layerCount, VK_REMAINING_ARRAY_LAYERS);
if (lhs_layer_end <= rhs.baseArrayLayer || rhs_layer_end <= lhs.baseArrayLayer) {
return false;
}
return true;
}
} // namespace detail
class DescriptorLayoutBuilder {
public:
DescriptorLayoutBuilder(const Device& device_) : device{&device_} {}
@ -180,6 +237,48 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
const Shader::Info& info, RescalingPushConstant& rescaling,
const VideoCommon::SamplerId*& samplers,
const VideoCommon::ImageViewInOut*& views) {
const Framebuffer* framebuffer = texture_cache.GetFramebuffer();
const auto& feedback_req = texture_cache.PeekFeedbackLoopRequest();
const auto choose_layout = [&](const ImageView& image_view) -> VkImageLayout {
if (!feedback_req.active || !feedback_req.supported || framebuffer == nullptr) {
return VK_IMAGE_LAYOUT_GENERAL;
}
const VkImage descriptor_image = image_view.ImageHandle();
if (descriptor_image == VK_NULL_HANDLE) {
return VK_IMAGE_LAYOUT_GENERAL;
}
const VkImageSubresourceRange view_range = detail::NormalizeImageViewRange(image_view);
for (u32 slot = 0; slot < VideoCommon::NUM_RT; ++slot) {
if (((feedback_req.color_mask >> slot) & 1u) == 0) {
continue;
}
if (framebuffer->ColorImage(slot) != descriptor_image) {
continue;
}
const VkImageSubresourceRange* attachment_range = framebuffer->ColorSubresourceRange(slot);
if (attachment_range == nullptr) {
continue;
}
if (detail::SubresourceRangeIntersects(view_range, *attachment_range)) {
return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
}
}
if (feedback_req.depth) {
const VkImage depth_image = framebuffer->DepthStencilImage();
if (depth_image == descriptor_image) {
const VkImageSubresourceRange* attachment_range =
framebuffer->DepthStencilSubresourceRange();
if (attachment_range != nullptr &&
detail::SubresourceRangeIntersects(view_range, *attachment_range)) {
return VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT;
}
}
}
return VK_IMAGE_LAYOUT_GENERAL;
};
const u32 num_texture_buffers = Shader::NumDescriptors(info.texture_buffer_descriptors);
const u32 num_image_buffers = Shader::NumDescriptors(info.image_buffer_descriptors);
views += num_texture_buffers;
@ -195,7 +294,8 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
!image_view.SupportsAnisotropy()};
const VkSampler vk_sampler{use_fallback_sampler ? sampler.HandleWithDefaultAnisotropy()
: sampler.Handle()};
guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler);
const VkImageLayout layout = choose_layout(image_view);
guest_descriptor_queue.AddSampledImage(vk_image_view, vk_sampler, layout);
rescaling.PushTexture(texture_cache.IsRescaling(image_view));
}
}
@ -206,7 +306,8 @@ inline void PushImageDescriptors(TextureCache& texture_cache,
texture_cache.MarkModification(image_view.image_id);
}
const VkImageView vk_image_view{image_view.StorageView(desc.type, desc.format)};
guest_descriptor_queue.AddImage(vk_image_view);
const VkImageLayout layout = VK_IMAGE_LAYOUT_GENERAL; // Storage images must remain in GENERAL layout
guest_descriptor_queue.AddImage(vk_image_view, layout);
rescaling.PushImage(texture_cache.IsRescaling(image_view));
}
}

View file

@ -135,17 +135,6 @@ RenderPassKey MakeRenderPassKey(const FixedPipelineState& state) {
return key;
}
size_t NumAttachments(const FixedPipelineState& state) {
size_t num{};
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
const auto format{static_cast<Tegra::RenderTargetFormat>(state.color_formats[index])};
if (format != Tegra::RenderTargetFormat::NONE) {
num = index + 1;
}
}
return num;
}
template <typename Spec>
bool Passes(const std::array<vk::ShaderModule, NUM_STAGES>& modules,
const std::array<Shader::Info, NUM_STAGES>& stage_infos) {
@ -460,6 +449,10 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
buffer_cache.UpdateGraphicsBuffers(is_indexed);
buffer_cache.BindHostGeometryBuffers(is_indexed);
// Ensure framebuffer and feedback-loop state are ready before writing descriptors
texture_cache.UpdateRenderTargets(false);
texture_cache.CheckFeedbackLoop(views);
guest_descriptor_queue.Acquire();
RescalingPushConstant rescaling;
@ -492,8 +485,6 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
if constexpr (Spec::enabled_stages[4]) {
prepare_stage(4);
}
texture_cache.UpdateRenderTargets(false);
texture_cache.CheckFeedbackLoop(views);
ConfigureDraw(rescaling, render_area);
return true;
@ -501,7 +492,6 @@ bool GraphicsPipeline::ConfigureImpl(bool is_indexed) {
void GraphicsPipeline::ConfigureDraw(const RescalingPushConstant& rescaling,
const RenderAreaPushConstant& render_area) {
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
if (!is_built.load(std::memory_order::relaxed)) {
// Wait for the pipeline to be built
scheduler.Record([this](vk::CommandBuffer) {
@ -770,14 +760,28 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
LOG_WARNING(Render_Vulkan, "Depth bounds is enabled but not supported");
}
static_vector<VkPipelineColorBlendAttachmentState, Maxwell::NumRenderTargets> cb_attachments;
const size_t num_attachments{NumAttachments(key.state)};
for (size_t index = 0; index < num_attachments; ++index) {
for (size_t index = 0; index < Maxwell::NumRenderTargets; ++index) {
static constexpr std::array mask_table{
VK_COLOR_COMPONENT_R_BIT,
VK_COLOR_COMPONENT_G_BIT,
VK_COLOR_COMPONENT_B_BIT,
VK_COLOR_COMPONENT_A_BIT,
};
const auto format{
static_cast<Tegra::RenderTargetFormat>(key.state.color_formats[index])};
if (format == Tegra::RenderTargetFormat::NONE) {
cb_attachments.push_back({
.blendEnable = VK_FALSE,
.srcColorBlendFactor = VK_BLEND_FACTOR_ONE,
.dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
.colorBlendOp = VK_BLEND_OP_ADD,
.srcAlphaBlendFactor = VK_BLEND_FACTOR_ONE,
.dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
.alphaBlendOp = VK_BLEND_OP_ADD,
.colorWriteMask = 0,
});
continue;
}
const auto& blend{key.state.attachments[index]};
const std::array mask{blend.Mask()};
VkColorComponentFlags write_mask{};
@ -901,6 +905,14 @@ void GraphicsPipeline::MakePipeline(VkRenderPass render_pass) {
if (device.IsKhrPipelineExecutablePropertiesEnabled() && Settings::values.renderer_debug.GetValue()) {
flags |= VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR;
}
if (device.IsAttachmentFeedbackLoopLayoutSupported()) {
if (key.state.AttachmentFeedbackMask() != 0) {
flags |= VK_PIPELINE_CREATE_COLOR_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
}
if (key.state.HasDepthAttachmentFeedback()) {
flags |= VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
}
}
pipeline = device.GetLogical().CreateGraphicsPipeline(
{

View file

@ -82,6 +82,10 @@ public:
const std::array<const Shader::Info*, NUM_STAGES>& infos);
// True if this pipeline was created with VK_DYNAMIC_STATE_VERTEX_INPUT_EXT
bool HasDynamicVertexInput() const noexcept { return key.state.dynamic_vertex_input; }
u8 AttachmentFeedbackMask() const noexcept { return key.state.AttachmentFeedbackMask(); }
bool HasDepthAttachmentFeedback() const noexcept { return key.state.HasDepthAttachmentFeedback(); }
GraphicsPipeline& operator=(GraphicsPipeline&&) noexcept = delete;
GraphicsPipeline(GraphicsPipeline&&) noexcept = delete;

View file

@ -430,6 +430,11 @@ PipelineCache::~PipelineCache() {
}
}
void PipelineCache::SetAttachmentFeedback(u8 color_mask, bool depth_feedback) {
pending_feedback_mask = color_mask;
pending_depth_feedback = depth_feedback;
}
GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
if (!RefreshStages(graphics_key.unique_hashes)) {
@ -437,6 +442,7 @@ GraphicsPipeline* PipelineCache::CurrentGraphicsPipeline() {
return nullptr;
}
graphics_key.state.Refresh(*maxwell3d, dynamic_features);
graphics_key.state.SetAttachmentFeedback(pending_feedback_mask, pending_depth_feedback);
if (current_pipeline) {
GraphicsPipeline* const next{current_pipeline->Next(graphics_key)};

View file

@ -107,6 +107,7 @@ public:
~PipelineCache();
[[nodiscard]] GraphicsPipeline* CurrentGraphicsPipeline();
void SetAttachmentFeedback(u8 color_mask, bool depth_feedback);
[[nodiscard]] ComputePipeline* CurrentComputePipeline();
@ -154,6 +155,8 @@ private:
GraphicsPipelineCacheKey graphics_key{};
GraphicsPipeline* current_pipeline{};
u8 pending_feedback_mask{};
bool pending_depth_feedback{};
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<ComputePipeline>> compute_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;

View file

@ -166,7 +166,7 @@ public:
});
// Manually restart the render pass (required for vkCmdClearAttachments, etc.)
scheduler.RequestRenderpass(texture_cache.GetFramebuffer());
scheduler.RequestRenderpass(texture_cache.GetFramebuffer(), 0, false);
// Begin query inside the newly started render pass
scheduler.Record([query_pool = current_query_pool,

View file

@ -11,6 +11,7 @@
#include "video_core/renderer_vulkan/renderer_vulkan.h"
#include "common/common_types.h"
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/scope_exit.h"
@ -210,26 +211,62 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork();
gpu_memory->FlushCaching();
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
TextureCacheRuntime::FeedbackLoopRequest feedback_request{};
while (true) {
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
}
bool configured = false;
{
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
pipeline->SetEngine(maxwell3d, gpu_memory);
configured = pipeline->Configure(is_indexed);
feedback_request = texture_cache.ConsumeFeedbackLoopRequest();
if (feedback_request.active && feedback_request.supported) {
const bool mask_matches = pipeline->AttachmentFeedbackMask() == feedback_request.color_mask;
const bool depth_matches = pipeline->HasDepthAttachmentFeedback() == feedback_request.depth;
if (!configured || !mask_matches || !depth_matches) {
pipeline_cache.SetAttachmentFeedback(feedback_request.color_mask, feedback_request.depth);
continue;
}
pipeline_cache.SetAttachmentFeedback(feedback_request.color_mask, feedback_request.depth);
} else if (feedback_request.active && !feedback_request.supported) {
pipeline_cache.SetAttachmentFeedback(0, false);
LOG_WARNING(Render_Vulkan, "Falling back to feedback loop copy path");
texture_cache_runtime.BarrierFeedbackLoop();
feedback_request = {};
if (!configured) {
return;
}
} else {
pipeline_cache.SetAttachmentFeedback(0, false);
}
if (!configured) {
return;
}
const u8 feedback_mask =
(feedback_request.active && feedback_request.supported) ? feedback_request.color_mask : 0;
const bool depth_feedback =
(feedback_request.active && feedback_request.supported) ? feedback_request.depth : false;
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
scheduler.RequestRenderpass(framebuffer, feedback_mask, depth_feedback);
UpdateDynamicStates();
HandleTransformFeedback();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
draw_func();
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
return;
}
}
std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
// update engine as channel may be different.
pipeline->SetEngine(maxwell3d, gpu_memory);
if (!pipeline->Configure(is_indexed))
return;
UpdateDynamicStates();
HandleTransformFeedback();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
draw_func();
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
@ -363,7 +400,7 @@ void RasterizerVulkan::Clear(u32 layer_count) {
texture_cache.UpdateRenderTargets(true);
const Framebuffer* const framebuffer = texture_cache.GetFramebuffer();
const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer);
scheduler.RequestRenderpass(framebuffer, 0, false);
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,

View file

@ -82,9 +82,11 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
for (size_t index = 0; index < key.color_formats.size(); ++index) {
const PixelFormat format{key.color_formats[index]};
const bool is_valid{format != PixelFormat::Invalid};
const bool feedback = (key.color_feedback_mask & (1u << index)) != 0;
references[index] = VkAttachmentReference{
.attachment = is_valid ? num_colors : VK_ATTACHMENT_UNUSED,
.layout = VK_IMAGE_LAYOUT_GENERAL,
.layout = feedback ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT
: VK_IMAGE_LAYOUT_GENERAL,
};
if (is_valid) {
descriptions.push_back(AttachmentDescription(*device, format, key.samples));
@ -97,7 +99,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
if (key.depth_format != PixelFormat::Invalid) {
depth_reference = VkAttachmentReference{
.attachment = num_colors,
.layout = VK_IMAGE_LAYOUT_GENERAL,
.layout = key.depth_feedback ? VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT
: VK_IMAGE_LAYOUT_GENERAL,
};
descriptions.push_back(AttachmentDescription(*device, key.depth_format, key.samples));
}
@ -113,9 +116,13 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.preserveAttachmentCount = 0,
.pPreserveAttachments = nullptr,
};
const VkSubpassDependency dependency{
.srcSubpass = 0, // Current subpass
.dstSubpass = 0, // Same subpass (self-dependency)
const bool feedback_enabled = device->IsAttachmentFeedbackLoopLayoutSupported() &&
(key.color_feedback_mask != 0 || key.depth_feedback);
std::array<VkSubpassDependency, 2> dependencies{
VkSubpassDependency{
.srcSubpass = 0,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
@ -123,8 +130,28 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT |
(feedback_enabled ? VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT : 0u),
},
VkSubpassDependency{}
};
u32 dependency_count = 1;
if (feedback_enabled) {
dependencies[1] = VkSubpassDependency{
.srcSubpass = 0,
.dstSubpass = 0,
.srcStageMask = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
.dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
.srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
.dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT | VK_DEPENDENCY_FEEDBACK_LOOP_BIT_EXT,
};
dependency_count = 2;
}
pair->second = device->GetLogical().CreateRenderPass({
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
.pNext = nullptr,
@ -133,8 +160,8 @@ VkRenderPass RenderPassCache::Get(const RenderPassKey& key) {
.pAttachments = descriptions.empty() ? nullptr : descriptions.data(),
.subpassCount = 1,
.pSubpasses = &subpass,
.dependencyCount = 1,
.pDependencies = &dependency,
.dependencyCount = dependency_count,
.pDependencies = dependencies.data(),
});
return *pair->second;
}

View file

@ -3,11 +3,16 @@
#pragma once
#include <array>
#include <cstddef>
#include <cstdint>
#include <mutex>
#include <type_traits>
#include <unordered_map>
#include "video_core/surface.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#include "common/container_hash.h"
namespace Vulkan {
@ -17,6 +22,8 @@ struct RenderPassKey {
std::array<VideoCore::Surface::PixelFormat, 8> color_formats;
VideoCore::Surface::PixelFormat depth_format;
VkSampleCountFlagBits samples;
std::uint8_t color_feedback_mask{};
bool depth_feedback{};
};
} // namespace Vulkan
@ -24,11 +31,19 @@ struct RenderPassKey {
namespace std {
template <>
struct hash<Vulkan::RenderPassKey> {
[[nodiscard]] size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
size_t value = static_cast<size_t>(key.depth_format) << 48;
value ^= static_cast<size_t>(key.samples) << 52;
for (size_t i = 0; i < key.color_formats.size(); ++i) {
value ^= static_cast<size_t>(key.color_formats[i]) << (i * 6);
[[nodiscard]] std::size_t operator()(const Vulkan::RenderPassKey& key) const noexcept {
using PixelFormatUnderlying =
std::make_unsigned_t<std::underlying_type_t<VideoCore::Surface::PixelFormat>>;
using SampleCountUnderlying =
std::make_unsigned_t<std::underlying_type_t<VkSampleCountFlagBits>>;
std::size_t value = 0;
Common::HashCombine(value, static_cast<PixelFormatUnderlying>(key.depth_format));
Common::HashCombine(value, static_cast<SampleCountUnderlying>(key.samples));
Common::HashCombine(value, key.color_feedback_mask);
Common::HashCombine(value, static_cast<std::uint8_t>(key.depth_feedback));
for (const auto& format : key.color_formats) {
Common::HashCombine(value, static_cast<PixelFormatUnderlying>(format));
}
return value;
}

View file

@ -89,19 +89,24 @@ void Scheduler::DispatchWork() {
AcquireNewChunk();
}
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer) {
const VkRenderPass renderpass = framebuffer->RenderPass();
void Scheduler::RequestRenderpass(const Framebuffer* framebuffer, std::uint8_t color_feedback_mask,
bool depth_feedback) {
const VkRenderPass renderpass = framebuffer->RenderPass(color_feedback_mask, depth_feedback);
const VkFramebuffer framebuffer_handle = framebuffer->Handle();
const VkExtent2D render_area = framebuffer->RenderArea();
if (renderpass == state.renderpass && framebuffer_handle == state.framebuffer &&
render_area.width == state.render_area.width &&
render_area.height == state.render_area.height) {
render_area.height == state.render_area.height &&
state.renderpass_color_feedback_mask == color_feedback_mask &&
state.renderpass_depth_feedback == depth_feedback) {
return;
}
EndRenderPass();
state.renderpass = renderpass;
state.framebuffer = framebuffer_handle;
state.render_area = render_area;
state.renderpass_color_feedback_mask = color_feedback_mask;
state.renderpass_depth_feedback = depth_feedback;
Record([renderpass, framebuffer_handle, render_area](vk::CommandBuffer cmdbuf) {
const VkRenderPassBeginInfo renderpass_bi{
@ -338,6 +343,8 @@ void Scheduler::EndRenderPass()
});
state.renderpass = nullptr;
state.renderpass_color_feedback_mask = 0;
state.renderpass_depth_feedback = false;
num_renderpass_images = 0;
}

View file

@ -53,7 +53,8 @@ public:
void DispatchWork();
/// Requests to begin a renderpass.
void RequestRenderpass(const Framebuffer* framebuffer);
void RequestRenderpass(const Framebuffer* framebuffer, std::uint8_t color_feedback_mask,
bool depth_feedback);
/// Requests the current execution context to be able to execute operations only allowed outside
/// of a renderpass.
@ -211,6 +212,8 @@ private:
VkRenderPass renderpass = nullptr;
VkFramebuffer framebuffer = nullptr;
VkExtent2D render_area = {0, 0};
std::uint8_t renderpass_color_feedback_mask = 0;
bool renderpass_depth_feedback = false;
GraphicsPipeline* graphics_pipeline = nullptr;
bool is_rescaling = false;
bool rescaling_defined = false;

View file

@ -6,6 +6,7 @@
#include <algorithm>
#include <array>
#include <limits>
#include <span>
#include <memory>
#include <vector>
@ -137,7 +138,7 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
}
const auto [samples_x, samples_y] = VideoCommon::SamplesLog2(info.num_samples);
return VkImageCreateInfo{
VkImageCreateInfo image_ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = flags,
@ -158,6 +159,13 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
if (device.IsAttachmentFeedbackLoopLayoutSupported() && format_info.attachable &&
(image_ci.usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) != 0) {
if (device.SupportsAttachmentFeedbackLoop(format_info.format, FormatType::Optimal)) {
image_ci.usage |= VK_IMAGE_USAGE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
}
}
return image_ci;
}
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
@ -943,6 +951,19 @@ void TextureCacheRuntime::BarrierFeedbackLoop() {
scheduler.RequestOutsideRenderPassOperationContext();
}
void TextureCacheRuntime::SetFeedbackLoopRequest(u8 color_mask, bool depth, bool supported) {
pending_feedback_request.active = (color_mask != 0) || depth;
pending_feedback_request.color_mask = color_mask;
pending_feedback_request.depth = depth;
pending_feedback_request.supported = supported;
}
TextureCacheRuntime::FeedbackLoopRequest TextureCacheRuntime::ConsumeFeedbackLoopRequest() {
FeedbackLoopRequest request = pending_feedback_request;
pending_feedback_request = {};
return request;
}
void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src,
std::span<const VideoCommon::ImageCopy> copies) {
boost::container::small_vector<VkBufferImageCopy, 16> vk_in_copies(copies.size());
@ -1204,7 +1225,7 @@ void TextureCacheRuntime::BlitImage(Framebuffer* dst_framebuffer, ImageView& dst
}
void TextureCacheRuntime::ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) {
if (!dst->RenderPass()) {
if (!dst->RenderPass(0, false)) {
return;
}
@ -1347,6 +1368,20 @@ VkFormat TextureCacheRuntime::GetSupportedFormat(VkFormat requested_format,
return requested_format;
}
bool TextureCacheRuntime::SupportsAttachmentFeedbackLoopFormat(VideoCore::Surface::PixelFormat format,
bool is_depth) const {
if (!device.IsAttachmentFeedbackLoopLayoutSupported()) {
return false;
}
const bool wants_srgb = !is_depth && VideoCore::Surface::IsPixelFormatSRGB(format);
const auto format_info =
MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, wants_srgb, format);
if (!format_info.attachable) {
return false;
}
return device.SupportsAttachmentFeedbackLoop(format_info.format, FormatType::Optimal);
}
// Helper functions for format compatibility checks
bool TextureCacheRuntime::IsFormatDitherable(PixelFormat format) {
switch (format) {
@ -2296,6 +2331,7 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
s32 num_layers = 1;
is_rescaled = is_rescaled_;
rt_map.fill(std::numeric_limits<size_t>::max());
const auto& resolution = runtime.resolution;
u32 width = (std::numeric_limits<u32>::max)();
@ -2340,6 +2376,8 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
}
renderpass_key.samples = samples;
base_key = renderpass_key;
render_pass_cache = &runtime.render_pass_cache;
renderpass = runtime.render_pass_cache.Get(renderpass_key);
render_area.width = (std::min)(render_area.width, width);
render_area.height = (std::min)(render_area.height, height);
@ -2358,6 +2396,66 @@ void Framebuffer::CreateFramebuffer(TextureCacheRuntime& runtime,
});
}
VkRenderPass Framebuffer::RenderPass(std::uint8_t color_feedback_mask, bool depth_feedback) const noexcept {
if (color_feedback_mask == 0 && !depth_feedback) {
return renderpass;
}
RenderPassKey key = base_key;
key.color_feedback_mask = color_feedback_mask;
key.depth_feedback = depth_feedback;
return render_pass_cache->Get(key);
}
VkImage Framebuffer::ColorImage(size_t slot) const noexcept {
if (slot >= NUM_RT) {
return VK_NULL_HANDLE;
}
const size_t mapped_index = rt_map[slot];
if (mapped_index >= num_images) {
return VK_NULL_HANDLE;
}
if ((image_ranges[mapped_index].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) == 0) {
return VK_NULL_HANDLE;
}
return images[mapped_index];
}
VkImage Framebuffer::DepthStencilImage() const noexcept {
if (!has_depth && !has_stencil) {
return VK_NULL_HANDLE;
}
const size_t mapped_index = num_color_buffers;
if (mapped_index >= num_images) {
return VK_NULL_HANDLE;
}
return images[mapped_index];
}
const VkImageSubresourceRange* Framebuffer::ColorSubresourceRange(size_t slot) const noexcept {
if (slot >= NUM_RT) {
return nullptr;
}
const size_t mapped_index = rt_map[slot];
if (mapped_index == std::numeric_limits<size_t>::max()) {
return nullptr;
}
if (mapped_index >= num_color_buffers || mapped_index >= num_images) {
return nullptr;
}
return &image_ranges[mapped_index];
}
const VkImageSubresourceRange* Framebuffer::DepthStencilSubresourceRange() const noexcept {
if (!has_depth && !has_stencil) {
return nullptr;
}
const size_t mapped_index = num_color_buffers;
if (mapped_index >= num_images) {
return nullptr;
}
return &image_ranges[mapped_index];
}
void TextureCacheRuntime::AccelerateImageUpload(
Image& image, const StagingBufferRef& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) {

View file

@ -4,11 +4,14 @@
#pragma once
#include <span>
#include <cstdint>
#include "common/common_types.h"
#include "video_core/texture_cache/texture_cache_base.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/texture_cache/image_view_base.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
@ -38,6 +41,13 @@ class Scheduler;
class TextureCacheRuntime {
public:
struct FeedbackLoopRequest {
bool active{};
u8 color_mask{};
bool depth{};
bool supported{};
};
explicit TextureCacheRuntime(const Device& device_, Scheduler& scheduler_,
MemoryAllocator& memory_allocator_,
StagingBufferPool& staging_buffer_pool_,
@ -109,11 +119,17 @@ public:
}
void BarrierFeedbackLoop();
void SetFeedbackLoopRequest(u8 color_mask, bool depth, bool supported);
FeedbackLoopRequest ConsumeFeedbackLoopRequest();
const FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept {
return pending_feedback_request;
}
bool IsFormatDitherable(VideoCore::Surface::PixelFormat format);
bool IsFormatScalable(VideoCore::Surface::PixelFormat format);
VkFormat GetSupportedFormat(VkFormat requested_format, VkFormatFeatureFlags required_features) const;
bool SupportsAttachmentFeedbackLoopFormat(VideoCore::Surface::PixelFormat format, bool is_depth) const;
const Device& device;
Scheduler& scheduler;
@ -126,6 +142,8 @@ public:
const Settings::ResolutionScalingInfo& resolution;
std::array<std::vector<VkFormat>, VideoCore::Surface::MaxPixelFormat> view_formats;
FeedbackLoopRequest pending_feedback_request{};
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
};
@ -332,9 +350,8 @@ public:
return *framebuffer;
}
[[nodiscard]] VkRenderPass RenderPass() const noexcept {
return renderpass;
}
[[nodiscard]] VkRenderPass RenderPass(std::uint8_t color_feedback_mask,
bool depth_feedback) const noexcept;
[[nodiscard]] VkExtent2D RenderArea() const noexcept {
return render_area;
@ -376,9 +393,18 @@ public:
return is_rescaled;
}
[[nodiscard]] VkImage ColorImage(size_t slot) const noexcept;
[[nodiscard]] VkImage DepthStencilImage() const noexcept;
[[nodiscard]] const VkImageSubresourceRange* ColorSubresourceRange(size_t slot) const noexcept;
[[nodiscard]] const VkImageSubresourceRange* DepthStencilSubresourceRange() const noexcept;
private:
vk::Framebuffer framebuffer;
VkRenderPass renderpass{};
RenderPassKey base_key{};
RenderPassCache* render_pass_cache{};
VkExtent2D render_area{};
VkSampleCountFlagBits samples = VK_SAMPLE_COUNT_1_BIT;
u32 num_color_buffers = 0;
@ -411,3 +437,4 @@ struct TextureCacheParams {
using TextureCache = VideoCommon::TextureCache<TextureCacheParams>;
} // namespace Vulkan

View file

@ -55,6 +55,15 @@ public:
};
}
// Overload allowing explicit image layout
void AddSampledImage(VkImageView image_view, VkSampler sampler, VkImageLayout layout) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = sampler,
.imageView = image_view,
.imageLayout = layout,
};
}
void AddImage(VkImageView image_view) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
@ -63,6 +72,14 @@ public:
};
}
void AddImage(VkImageView image_view, VkImageLayout layout) {
*(payload_cursor++) = VkDescriptorImageInfo{
.sampler = VK_NULL_HANDLE,
.imageView = image_view,
.imageLayout = layout,
};
}
void AddBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
*(payload_cursor++) = VkDescriptorBufferInfo{
.buffer = buffer,

View file

@ -208,41 +208,68 @@ void TextureCache<P>::FillComputeImageViews(std::span<ImageViewInOut> views) {
template <class P>
void TextureCache<P>::CheckFeedbackLoop(std::span<const ImageViewInOut> views) {
if (!Settings::values.barrier_feedback_loops.GetValue()) {
runtime.SetFeedbackLoopRequest(0, false, true);
return;
}
const bool requires_barrier = [&] {
for (const auto& view : views) {
if (!view.id) {
u8 color_mask = 0;
bool depth_feedback = false;
for (const auto& view : views) {
if (!view.id) {
continue;
}
const auto& image_view = slot_image_views[view.id];
for (std::size_t index = 0; index < render_targets.color_buffer_ids.size(); ++index) {
const auto ct_view_id = render_targets.color_buffer_ids[index];
if (!ct_view_id) {
continue;
}
auto& image_view = slot_image_views[view.id];
// Check color targets
for (const auto& ct_view_id : render_targets.color_buffer_ids) {
if (ct_view_id) {
auto& ct_view = slot_image_views[ct_view_id];
if (image_view.image_id == ct_view.image_id) {
return true;
}
}
}
// Check zeta target
if (render_targets.depth_buffer_id) {
auto& zt_view = slot_image_views[render_targets.depth_buffer_id];
if (image_view.image_id == zt_view.image_id) {
return true;
}
const auto& ct_view = slot_image_views[ct_view_id];
if (image_view.image_id == ct_view.image_id) {
color_mask |= static_cast<u8>(1u << index);
}
}
return false;
}();
if (requires_barrier) {
runtime.BarrierFeedbackLoop();
if (render_targets.depth_buffer_id) {
const auto& depth_view = slot_image_views[render_targets.depth_buffer_id];
if (image_view.image_id == depth_view.image_id) {
depth_feedback = true;
}
}
}
if (color_mask == 0 && !depth_feedback) {
runtime.SetFeedbackLoopRequest(0, false, true);
return;
}
bool supported = true;
if (color_mask != 0) {
for (std::size_t index = 0; index < render_targets.color_buffer_ids.size(); ++index) {
if (((color_mask >> index) & 1u) == 0) {
continue;
}
const auto ct_view_id = render_targets.color_buffer_ids[index];
if (!ct_view_id) {
continue;
}
const auto& ct_view = slot_image_views[ct_view_id];
supported &= runtime.SupportsAttachmentFeedbackLoopFormat(ct_view.format, false);
}
}
if (depth_feedback && render_targets.depth_buffer_id) {
const auto& depth_view = slot_image_views[render_targets.depth_buffer_id];
supported &= runtime.SupportsAttachmentFeedbackLoopFormat(depth_view.format, true);
}
runtime.SetFeedbackLoopRequest(color_mask, depth_feedback, supported);
}
template <class P>
const typename P::Runtime::FeedbackLoopRequest& TextureCache<P>::PeekFeedbackLoopRequest() const noexcept {
return runtime.PeekFeedbackLoopRequest();
}
template <class P>
@ -472,6 +499,11 @@ typename P::Framebuffer* TextureCache<P>::GetFramebuffer() {
return &slot_framebuffers[GetFramebufferId(render_targets)];
}
template <class P>
typename TextureCache<P>::Runtime::FeedbackLoopRequest TextureCache<P>::ConsumeFeedbackLoopRequest() {
return runtime.ConsumeFeedbackLoopRequest();
}
template <class P>
template <bool has_blacklists>
void TextureCache<P>::FillImageViews(DescriptorTable<TICEntry>& table,
@ -2548,3 +2580,4 @@ void TextureCache<P>::OnGPUASRegister([[maybe_unused]] size_t map_id) {
}
} // namespace VideoCommon

View file

@ -159,6 +159,9 @@ public:
/// Handle feedback loops during draws.
void CheckFeedbackLoop(std::span<const ImageViewInOut> views);
/// Peek current pending attachment feedback loop request (without consuming it)
[[nodiscard]] const typename P::Runtime::FeedbackLoopRequest& PeekFeedbackLoopRequest() const noexcept;
/// Get the sampler from the graphics descriptor table in the specified index
Sampler* GetGraphicsSampler(u32 index);
@ -195,6 +198,9 @@ public:
/// UpdateRenderTargets should be called before this
Framebuffer* GetFramebuffer();
/// Consume pending attachment feedback loop request
typename Runtime::FeedbackLoopRequest ConsumeFeedbackLoopRequest();
/// Mark images in a range as modified from the CPU
void WriteMemory(DAddr cpu_addr, size_t size);

View file

@ -376,10 +376,10 @@ void Device::RemoveExtension(bool& extension, const std::string& extension_name)
loaded_extensions.erase(extension_name);
}
void Device::RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name) {
if (loaded_extensions.contains(extension_name) && !is_suitable) {
void Device::RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name) {
if (!extension && loaded_extensions.contains(extension_name)) {
LOG_WARNING(Render_Vulkan, "Removing unsuitable extension {}", extension_name);
this->RemoveExtension(is_suitable, extension_name);
RemoveExtension(extension, extension_name);
}
}
@ -400,11 +400,11 @@ void Device::RemoveExtensionFeature(bool& extension, Feature& feature,
}
template <typename Feature>
void Device::RemoveExtensionFeatureIfUnsuitable(bool is_suitable, Feature& feature,
void Device::RemoveExtensionFeatureIfUnsuitable(bool& extension, Feature& feature,
const std::string& extension_name) {
if (loaded_extensions.contains(extension_name) && !is_suitable) {
if (!extension && loaded_extensions.contains(extension_name)) {
LOG_WARNING(Render_Vulkan, "Removing features for unsuitable extension {}", extension_name);
this->RemoveExtensionFeature(is_suitable, feature, extension_name);
RemoveExtensionFeature(extension, feature, extension_name);
}
}
@ -886,6 +886,65 @@ bool Device::IsFormatSupported(VkFormat wanted_format, VkFormatFeatureFlags want
return (supported_usage & wanted_usage) == wanted_usage;
}
bool Device::SupportsAttachmentFeedbackLoop(VkFormat format, FormatType type) const noexcept {
if (!supports_attachment_feedback_loop_layout || format == VK_FORMAT_UNDEFINED) {
return false;
}
VkFormatProperties3 props3{
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3,
.pNext = nullptr,
};
VkFormatProperties2 props2{
.sType = VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_2,
.pNext = &props3,
};
physical.GetFormatProperties2(format, props2);
[[maybe_unused]] VkFormatFeatureFlags feature_flags = 0;
[[maybe_unused]] VkFormatFeatureFlags2 feature_flags2 = 0;
switch (type) {
case FormatType::Linear:
feature_flags = props2.formatProperties.linearTilingFeatures;
feature_flags2 = props3.linearTilingFeatures;
break;
case FormatType::Optimal:
feature_flags = props2.formatProperties.optimalTilingFeatures;
feature_flags2 = props3.optimalTilingFeatures;
break;
case FormatType::Buffer:
feature_flags = props2.formatProperties.bufferFeatures;
feature_flags2 = props3.bufferFeatures;
break;
default:
break;
}
#ifdef VK_FORMAT_FEATURE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT
const bool has_core_bit =
(feature_flags & VK_FORMAT_FEATURE_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0;
#else
const bool has_core_bit = false;
#endif
#ifdef VK_FORMAT_FEATURE_2_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT
const bool has_khr_bit =
(feature_flags2 & VK_FORMAT_FEATURE_2_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT) != 0;
#else
const bool has_khr_bit = false;
#endif
const bool supported = has_core_bit || has_khr_bit;
if (supported) {
static bool logged = false;
if (!logged) {
logged = true;
LOG_INFO(Render_Vulkan, "Attachment feedback loop layout successfully enabled");
}
}
return supported;
}
std::string Device::GetDriverName() const {
switch (properties.driver.driverID) {
case VK_DRIVER_ID_AMD_PROPRIETARY:
@ -1295,6 +1354,26 @@ void Device::RemoveUnsuitableExtensions() {
features.vertex_input_dynamic_state,
VK_EXT_VERTEX_INPUT_DYNAMIC_STATE_EXTENSION_NAME);
// VK_EXT_attachment_feedback_loop_layout
supports_attachment_feedback_loop_layout = false;
if (extensions.attachment_feedback_loop_layout) {
supports_attachment_feedback_loop_layout =
features.attachment_feedback_loop_layout.attachmentFeedbackLoopLayout;
if (!supports_attachment_feedback_loop_layout) {
LOG_WARNING(Render_Vulkan,
"VK_EXT_attachment_feedback_loop_layout advertised without reported feature support; disabling");
}
features.attachment_feedback_loop_layout.attachmentFeedbackLoopLayout =
supports_attachment_feedback_loop_layout ? VK_TRUE : VK_FALSE;
extensions.attachment_feedback_loop_layout =
supports_attachment_feedback_loop_layout;
}
RemoveExtensionFeatureIfUnsuitable(extensions.attachment_feedback_loop_layout,
features.attachment_feedback_loop_layout,
VK_EXT_ATTACHMENT_FEEDBACK_LOOP_LAYOUT_EXTENSION_NAME);
supports_attachment_feedback_loop_layout =
extensions.attachment_feedback_loop_layout;
// VK_KHR_pipeline_executable_properties
if (Settings::values.renderer_shader_feedback.GetValue()) {
extensions.pipeline_executable_properties =

View file

@ -58,6 +58,8 @@ VK_DEFINE_HANDLE(VmaAllocator)
FEATURE(EXT, Robustness2, ROBUSTNESS_2, robustness2) \
FEATURE(EXT, TransformFeedback, TRANSFORM_FEEDBACK, transform_feedback) \
FEATURE(EXT, VertexInputDynamicState, VERTEX_INPUT_DYNAMIC_STATE, vertex_input_dynamic_state) \
FEATURE(EXT, AttachmentFeedbackLoopLayout, ATTACHMENT_FEEDBACK_LOOP_LAYOUT, \
attachment_feedback_loop_layout) \
FEATURE(KHR, PipelineExecutableProperties, PIPELINE_EXECUTABLE_PROPERTIES, \
pipeline_executable_properties) \
FEATURE(KHR, WorkgroupMemoryExplicitLayout, WORKGROUP_MEMORY_EXPLICIT_LAYOUT, \
@ -515,6 +517,10 @@ public:
}
/// Returns true if the device supports VK_EXT_custom_border_color.
bool IsAttachmentFeedbackLoopLayoutSupported() const {
return supports_attachment_feedback_loop_layout;
}
bool IsExtCustomBorderColorSupported() const {
return extensions.custom_border_color;
}
@ -693,6 +699,9 @@ public:
return supports_conditional_barriers;
}
bool SupportsAttachmentFeedbackLoop(VkFormat format,
FormatType type = FormatType::Optimal) const noexcept;
bool SupportsMultiViewport() const {
return features2.features.multiViewport;
}
@ -734,13 +743,13 @@ private:
void RemoveUnsuitableExtensions();
void RemoveExtension(bool& extension, const std::string& extension_name);
void RemoveExtensionIfUnsuitable(bool is_suitable, const std::string& extension_name);
void RemoveExtensionIfUnsuitable(bool& extension, const std::string& extension_name);
template <typename Feature>
void RemoveExtensionFeature(bool& extension, Feature& feature,
const std::string& extension_name);
template <typename Feature>
void RemoveExtensionFeatureIfUnsuitable(bool is_suitable, Feature& feature,
void RemoveExtensionFeatureIfUnsuitable(bool& extension, Feature& feature,
const std::string& extension_name);
/// Sets up queue families.
@ -842,6 +851,7 @@ private:
bool must_emulate_bgr565{}; ///< Emulates BGR565 by swizzling RGB565 format.
bool dynamic_state3_blending{}; ///< Has all blending features of dynamic_state3.
bool dynamic_state3_enables{}; ///< Has all enables features of dynamic_state3.
bool supports_attachment_feedback_loop_layout{}; ///< Has attachment feedback loop layout support
bool supports_conditional_barriers{}; ///< Allows barriers in conditional control flow.
u64 device_access_memory{}; ///< Total size of device local memory in bytes.
u32 sets_per_pool{}; ///< Sets per Description Pool

View file

@ -300,7 +300,8 @@ bool Load(VkInstance instance, InstanceDispatch& dld) noexcept {
return X(vkCreateDevice) && X(vkDestroyDevice) && X(vkDestroyDevice) &&
X(vkEnumerateDeviceExtensionProperties) && X(vkEnumeratePhysicalDevices) &&
X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties) &&
X(vkGetDeviceProcAddr) && X(vkGetPhysicalDeviceFormatProperties2) &&
X(vkGetPhysicalDeviceFormatProperties) &&
X(vkGetPhysicalDeviceMemoryProperties) && X(vkGetPhysicalDeviceMemoryProperties2) &&
X(vkGetPhysicalDeviceProperties) && X(vkGetPhysicalDeviceQueueFamilyProperties);
#undef X
@ -905,6 +906,28 @@ VkFormatProperties PhysicalDevice::GetFormatProperties(VkFormat format) const no
return properties;
}
void PhysicalDevice::GetFormatProperties2(VkFormat format, VkFormatProperties2& properties) const noexcept {
if (dld->vkGetPhysicalDeviceFormatProperties2) {
dld->vkGetPhysicalDeviceFormatProperties2(physical_device, format, &properties);
return;
}
dld->vkGetPhysicalDeviceFormatProperties(physical_device, format, &properties.formatProperties);
if (!properties.pNext) {
return;
}
for (VkBaseOutStructure* next = reinterpret_cast<VkBaseOutStructure*>(properties.pNext); next; next = next->pNext) {
if (next->sType == VK_STRUCTURE_TYPE_FORMAT_PROPERTIES_3) {
auto* props3 = reinterpret_cast<VkFormatProperties3*>(next);
props3->linearTilingFeatures = 0;
props3->optimalTilingFeatures = 0;
props3->bufferFeatures = 0;
}
}
}
std::vector<VkExtensionProperties> PhysicalDevice::EnumerateDeviceExtensionProperties() const {
u32 num;
dld->vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &num, nullptr);

View file

@ -164,6 +164,7 @@ struct InstanceDispatch {
PFN_vkEnumeratePhysicalDevices vkEnumeratePhysicalDevices{};
PFN_vkGetDeviceProcAddr vkGetDeviceProcAddr{};
PFN_vkGetPhysicalDeviceFeatures2 vkGetPhysicalDeviceFeatures2{};
PFN_vkGetPhysicalDeviceFormatProperties2 vkGetPhysicalDeviceFormatProperties2{};
PFN_vkGetPhysicalDeviceFormatProperties vkGetPhysicalDeviceFormatProperties{};
PFN_vkGetPhysicalDeviceMemoryProperties vkGetPhysicalDeviceMemoryProperties{};
PFN_vkGetPhysicalDeviceMemoryProperties2 vkGetPhysicalDeviceMemoryProperties2{};
@ -1102,6 +1103,8 @@ public:
VkFormatProperties GetFormatProperties(VkFormat) const noexcept;
void GetFormatProperties2(VkFormat, VkFormatProperties2&) const noexcept;
std::vector<VkExtensionProperties> EnumerateDeviceExtensionProperties() const;
std::vector<VkQueueFamilyProperties> GetQueueFamilyProperties() const;