From e46b1cd6408bcd0bed8dcc1c6be9dcc155cc5869 Mon Sep 17 00:00:00 2001 From: MrPurple666 Date: Sat, 26 Jul 2025 03:12:10 -0300 Subject: [PATCH] vk_texture_cache: Rewrite MSAA handling with blits and proper barriers Refactor MSAA texture upload/download using intermediate blit with correct Vulkan barriers and layout handling. --- .../renderer_vulkan/vk_texture_cache.cpp | 319 ++++++++++-------- 1 file changed, 182 insertions(+), 137 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 022a9ae572..7bbb366bee 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1457,7 +1457,7 @@ void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, if (msaa_copy_pass) { return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa); } - UNIMPLEMENTED_MSG("Copying images with different samples is not supported."); + LOG_WARNING(Render_Vulkan, "Copying images with different samples is not supported."); } u64 TextureCacheRuntime::GetDeviceLocalMemory() const { @@ -1533,54 +1533,94 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, // Handle MSAA upload if necessary /* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/ - if (info.num_samples > 1 && runtime->CanUploadMSAA()) { - // Only use MSAA copy pass for color formats - // TODO: Depth/stencil formats need special handling - if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - // Create a temporary non-MSAA image to upload the data first - ImageInfo temp_info = info; - temp_info.num_samples = 1; + if (info.num_samples > 1) { + // Create a temporary non-MSAA image to upload the data first + ImageInfo temp_info = info; + temp_info.num_samples = 1; + // Create image with same usage flags as the target image to avoid validation errors + VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info); + temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci); - // Create image with same usage flags as the target image to avoid validation errors - VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info); - image_ci.usage = original_image.UsageFlags(); - vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci); + auto vk_buffer_image_copies = TransformBufferImageCopies(copies, offset, aspect_mask); - // Upload to the temporary non-MSAA image - scheduler->RequestOutsideRenderPassOperationContext(); - auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); - const VkBuffer src_buffer = buffer; - const VkImage temp_vk_image = *temp_image; - const VkImageAspectFlags vk_aspect_mask = aspect_mask; - scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { - CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, vk_copies); + boost::container::small_vector blit_regions; + blit_regions.reserve(copies.size()); + for (const auto& copy : copies) { + blit_regions.emplace_back(VkImageBlit{ + .srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask), + .srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z}, + {static_cast(copy.image_offset.x + copy.image_extent.width), + static_cast(copy.image_offset.y + copy.image_extent.height), + static_cast(copy.image_offset.z + copy.image_extent.depth)}}, + .dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask), + .dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z}, + {static_cast(copy.image_offset.x + copy.image_extent.width), + static_cast(copy.image_offset.y + copy.image_extent.height), + static_cast(copy.image_offset.z + copy.image_extent.depth)}}, }); - - // Use MSAACopyPass to convert from non-MSAA to MSAA - std::vector image_copies; - for (const auto& copy : copies) { - VideoCommon::ImageCopy image_copy; - image_copy.src_offset = {0, 0, 0}; // Use zero offset for source - image_copy.dst_offset = copy.image_offset; - image_copy.src_subresource = copy.image_subresource; - image_copy.dst_subresource = copy.image_subresource; - image_copy.extent = copy.image_extent; - image_copies.push_back(image_copy); - } - - // wrapper image for the temporary image - Image temp_wrapper(*runtime, temp_info, 0, 0); - temp_wrapper.original_image = std::move(temp_image); - temp_wrapper.current_image = &Image::original_image; - temp_wrapper.aspect_mask = aspect_mask; - temp_wrapper.initialized = true; - - // Use MSAACopyPass to convert from non-MSAA to MSAA - runtime->msaa_copy_pass->CopyImage(*this, temp_wrapper, image_copies, false); - std::exchange(initialized, true); - return; } - // For depth/stencil formats, fall back to regular upload + + const VkImage dst_vk_image = Handle(); + const bool is_initialized = std::exchange(initialized, true); + + scheduler->RequestOutsideRenderPassOperationContext(); + scheduler->Record([=, temp_image = std::move(temp_image)](vk::CommandBuffer cmdbuf) { + // Upload to the temporary non-MSAA image + CopyBufferToImage(cmdbuf, buffer, *temp_image, aspect_mask, false, + vk_buffer_image_copies); + + // Transition layouts for blit + const VkAccessFlags src_access_mask = + is_initialized + ? (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT) + : VK_ACCESS_NONE; + const std::array pre_blit_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .image = *temp_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = src_access_mask, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = + is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .image = dst_vk_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }}; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, pre_blit_barriers); + + // Blit from temporary to MSAA image + cmdbuf.BlitImage(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_vk_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions, + VK_FILTER_NEAREST); + + // Transition destination image to general layout + const VkImageMemoryBarrier post_blit_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .image = dst_vk_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, post_blit_barrier); + }); } else { // Regular non-MSAA upload scheduler->RequestOutsideRenderPassOperationContext(); @@ -1591,7 +1631,8 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, const bool is_initialized = std::exchange(initialized, true); scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies](vk::CommandBuffer cmdbuf) { - CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); + CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, + vk_copies); }); } @@ -1623,102 +1664,100 @@ void Image::DownloadMemory(std::span buffers_span, std::span o } // RE-USE MSAA UPLOAD CODE BUT NOW FOR DOWNLOAD - if (info.num_samples > 1 && runtime->msaa_copy_pass) { - // TODO: Depth/stencil formats need special handling - if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - ImageInfo temp_info = info; - temp_info.num_samples = 1; + if (info.num_samples > 1) { + ImageInfo temp_info = info; + temp_info.num_samples = 1; - VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info); - image_ci.usage = original_image.UsageFlags(); - vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci); + VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info); + temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci); - Image temp_wrapper(*runtime, temp_info, 0, 0); - temp_wrapper.original_image = std::move(temp_image); - temp_wrapper.current_image = &Image::original_image; - temp_wrapper.aspect_mask = aspect_mask; - temp_wrapper.initialized = true; + boost::container::small_vector blit_regions; + blit_regions.reserve(copies.size()); + for (const auto& copy : copies) { + blit_regions.emplace_back(VkImageBlit{ + .srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask), + .srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z}, + {static_cast(copy.image_offset.x + copy.image_extent.width), + static_cast(copy.image_offset.y + copy.image_extent.height), + static_cast(copy.image_offset.z + copy.image_extent.depth)}}, + .dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask), + .dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z}, + {static_cast(copy.image_offset.x + copy.image_extent.width), + static_cast(copy.image_offset.y + copy.image_extent.height), + static_cast(copy.image_offset.z + copy.image_extent.depth)}}, + }); + } - std::vector image_copies; - for (const auto& copy : copies) { - VideoCommon::ImageCopy image_copy; - image_copy.src_offset = copy.image_offset; - image_copy.dst_offset = copy.image_offset; - image_copy.src_subresource = copy.image_subresource; - image_copy.dst_subresource = copy.image_subresource; - image_copy.extent = copy.image_extent; - image_copies.push_back(image_copy); - } + boost::container::small_vector buffers_vector{}; + boost::container::small_vector, 8> + vk_copies; + for (size_t index = 0; index < buffers_span.size(); index++) { + buffers_vector.emplace_back(buffers_span[index]); + vk_copies.emplace_back( + TransformBufferImageCopies(copies, offsets_span[index], aspect_mask)); + } - runtime->msaa_copy_pass->CopyImage(temp_wrapper, *this, image_copies, true); + const VkImage src_vk_image = Handle(); - boost::container::small_vector buffers_vector{}; - boost::container::small_vector, 8> - vk_copies; - for (size_t index = 0; index < buffers_span.size(); index++) { - buffers_vector.emplace_back(buffers_span[index]); - vk_copies.emplace_back( - TransformBufferImageCopies(copies, offsets_span[index], aspect_mask)); - } - - scheduler->RequestOutsideRenderPassOperationContext(); - scheduler->Record([buffers = std::move(buffers_vector), image = *temp_wrapper.original_image, - aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier read_barrier{ + scheduler->RequestOutsideRenderPassOperationContext(); + scheduler->Record([=, temp_image = std::move(temp_image), + buffers = std::move(buffers_vector)](vk::CommandBuffer cmdbuf) { + const std::array pre_blit_barriers{ + VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .oldLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask_, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - - for (size_t index = 0; index < buffers.size(); index++) { - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], - vk_copies[index]); - } - - const VkMemoryBarrier memory_write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - }; - const VkImageMemoryBarrier image_write_barrier{ + .image = src_vk_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }, + VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask_, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, memory_write_barrier, nullptr, image_write_barrier); - }); - return; - } + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .image = *temp_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }}; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, {}, pre_blit_barriers); + + cmdbuf.BlitImage(src_vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *temp_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions, + VK_FILTER_NEAREST); + + const VkImageMemoryBarrier post_blit_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .image = *temp_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, post_blit_barrier); + + for (size_t index = 0; index < buffers.size(); index++) { + cmdbuf.CopyImageToBuffer(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + buffers[index], vk_copies[index]); + } + + const VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, + memory_write_barrier, {}, {}); + }); } else { boost::container::small_vector buffers_vector{}; boost::container::small_vector, 8> @@ -1749,12 +1788,12 @@ void Image::DownloadMemory(std::span buffers_span, std::span o .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); for (size_t index = 0; index < buffers.size(); index++) { - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], - vk_copies[index]); + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + buffers[index], vk_copies[index]); } const VkMemoryBarrier memory_write_barrier{ @@ -1781,8 +1820,9 @@ void Image::DownloadMemory(std::span buffers_span, std::span o .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, memory_write_barrier, nullptr, image_write_barrier); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier, + nullptr, image_write_barrier); }); } @@ -1978,6 +2018,11 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); } } + + if ((image.UsageFlags() & VK_IMAGE_USAGE_STORAGE_BIT) != 0) { + swizzle = {SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A}; + } + const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); const VkImageViewUsageCreateInfo image_view_usage{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,