vk_texture_cache: Rewrite MSAA handling with blits and proper barriers

Refactor MSAA texture upload/download using intermediate blit with correct Vulkan barriers and layout handling.
This commit is contained in:
MrPurple666 2025-07-26 03:12:10 -03:00
parent b2b993b665
commit e46b1cd640

View file

@ -1457,7 +1457,7 @@ void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
if (msaa_copy_pass) {
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
}
UNIMPLEMENTED_MSG("Copying images with different samples is not supported.");
LOG_WARNING(Render_Vulkan, "Copying images with different samples is not supported.");
}
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
@ -1533,54 +1533,94 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
// Handle MSAA upload if necessary
/* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly
since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/
if (info.num_samples > 1 && runtime->CanUploadMSAA()) {
// Only use MSAA copy pass for color formats
// TODO: Depth/stencil formats need special handling
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
if (info.num_samples > 1) {
// Create a temporary non-MSAA image to upload the data first
ImageInfo temp_info = info;
temp_info.num_samples = 1;
// Create image with same usage flags as the target image to avoid validation errors
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info);
image_ci.usage = original_image.UsageFlags();
vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci);
VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info);
temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci);
// Upload to the temporary non-MSAA image
scheduler->RequestOutsideRenderPassOperationContext();
auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer;
const VkImage temp_vk_image = *temp_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, vk_copies);
});
auto vk_buffer_image_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
// Use MSAACopyPass to convert from non-MSAA to MSAA
std::vector<VideoCommon::ImageCopy> image_copies;
boost::container::small_vector<VkImageBlit, 16> blit_regions;
blit_regions.reserve(copies.size());
for (const auto& copy : copies) {
VideoCommon::ImageCopy image_copy;
image_copy.src_offset = {0, 0, 0}; // Use zero offset for source
image_copy.dst_offset = copy.image_offset;
image_copy.src_subresource = copy.image_subresource;
image_copy.dst_subresource = copy.image_subresource;
image_copy.extent = copy.image_extent;
image_copies.push_back(image_copy);
blit_regions.emplace_back(VkImageBlit{
.srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
.dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
});
}
// wrapper image for the temporary image
Image temp_wrapper(*runtime, temp_info, 0, 0);
temp_wrapper.original_image = std::move(temp_image);
temp_wrapper.current_image = &Image::original_image;
temp_wrapper.aspect_mask = aspect_mask;
temp_wrapper.initialized = true;
const VkImage dst_vk_image = Handle();
const bool is_initialized = std::exchange(initialized, true);
// Use MSAACopyPass to convert from non-MSAA to MSAA
runtime->msaa_copy_pass->CopyImage(*this, temp_wrapper, image_copies, false);
std::exchange(initialized, true);
return;
}
// For depth/stencil formats, fall back to regular upload
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([=, temp_image = std::move(temp_image)](vk::CommandBuffer cmdbuf) {
// Upload to the temporary non-MSAA image
CopyBufferToImage(cmdbuf, buffer, *temp_image, aspect_mask, false,
vk_buffer_image_copies);
// Transition layouts for blit
const VkAccessFlags src_access_mask =
is_initialized
? (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT)
: VK_ACCESS_NONE;
const std::array<VkImageMemoryBarrier, 2> pre_blit_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = *temp_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = src_access_mask,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout =
is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.image = dst_vk_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
}};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, {}, {}, pre_blit_barriers);
// Blit from temporary to MSAA image
cmdbuf.BlitImage(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_vk_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions,
VK_FILTER_NEAREST);
// Transition destination image to general layout
const VkImageMemoryBarrier post_blit_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.image = dst_vk_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, post_blit_barrier);
});
} else {
// Regular non-MSAA upload
scheduler->RequestOutsideRenderPassOperationContext();
@ -1591,7 +1631,8 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
const bool is_initialized = std::exchange(initialized, true);
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies);
});
}
@ -1623,35 +1664,31 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
}
// RE-USE MSAA UPLOAD CODE BUT NOW FOR DOWNLOAD
if (info.num_samples > 1 && runtime->msaa_copy_pass) {
// TODO: Depth/stencil formats need special handling
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
if (info.num_samples > 1) {
ImageInfo temp_info = info;
temp_info.num_samples = 1;
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info);
image_ci.usage = original_image.UsageFlags();
vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci);
VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info);
temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci);
Image temp_wrapper(*runtime, temp_info, 0, 0);
temp_wrapper.original_image = std::move(temp_image);
temp_wrapper.current_image = &Image::original_image;
temp_wrapper.aspect_mask = aspect_mask;
temp_wrapper.initialized = true;
std::vector<VideoCommon::ImageCopy> image_copies;
boost::container::small_vector<VkImageBlit, 16> blit_regions;
blit_regions.reserve(copies.size());
for (const auto& copy : copies) {
VideoCommon::ImageCopy image_copy;
image_copy.src_offset = copy.image_offset;
image_copy.dst_offset = copy.image_offset;
image_copy.src_subresource = copy.image_subresource;
image_copy.dst_subresource = copy.image_subresource;
image_copy.extent = copy.image_extent;
image_copies.push_back(image_copy);
blit_regions.emplace_back(VkImageBlit{
.srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
.dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
});
}
runtime->msaa_copy_pass->CopyImage(temp_wrapper, *this, image_copies, true);
boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
vk_copies;
@ -1661,64 +1698,66 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
}
const VkImage src_vk_image = Handle();
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffers = std::move(buffers_vector), image = *temp_wrapper.original_image,
aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
scheduler->Record([=, temp_image = std::move(temp_image),
buffers = std::move(buffers_vector)](vk::CommandBuffer cmdbuf) {
const std::array<VkImageMemoryBarrier, 2> pre_blit_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask_,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
.image = src_vk_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.image = *temp_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
}};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, {}, pre_blit_barriers);
cmdbuf.BlitImage(src_vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *temp_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions,
VK_FILTER_NEAREST);
const VkImageMemoryBarrier post_blit_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = *temp_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, post_blit_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
vk_copies[index]);
cmdbuf.CopyImageToBuffer(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
buffers[index], vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_HOST_READ_BIT,
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask_,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0,
memory_write_barrier, {}, {});
});
return;
}
} else {
boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
@ -1749,12 +1788,12 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
vk_copies[index]);
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
buffers[index], vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
@ -1781,8 +1820,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier,
nullptr, image_write_barrier);
});
}
@ -1978,6 +2018,11 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
}
}
if ((image.UsageFlags() & VK_IMAGE_USAGE_STORAGE_BIT) != 0) {
swizzle = {SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A};
}
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
const VkImageViewUsageCreateInfo image_view_usage{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,