diff --git a/externals/xbyak b/externals/xbyak index 12557954c6..4e44f4614d 160000 --- a/externals/xbyak +++ b/externals/xbyak @@ -1 +1 @@ -Subproject commit 12557954c68a780563f9ab9fc24a3a156c96cba1 +Subproject commit 4e44f4614ddbf038f2a6296f5b906d5c72691e0f diff --git a/src/video_core/query_cache/query_cache.h b/src/video_core/query_cache/query_cache.h index 08b7790555..6e084cc079 100644 --- a/src/video_core/query_cache/query_cache.h +++ b/src/video_core/query_cache/query_cache.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -115,8 +118,8 @@ struct QueryCacheBase::QueryCacheBaseImpl { QueryCacheBaseImpl(QueryCacheBase* owner_, VideoCore::RasterizerInterface& rasterizer_, Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_, Tegra::GPU& gpu_) - : owner{owner_}, rasterizer{rasterizer_}, - device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} { + : owner{owner_}, rasterizer{rasterizer_}, device_memory{device_memory_}, runtime{runtime_}, + gpu{gpu_} { streamer_mask = 0; for (size_t i = 0; i < static_cast(QueryType::MaxQueryTypes); i++) { streamers[i] = runtime.GetStreamerInterface(static_cast(i)); @@ -267,7 +270,11 @@ void QueryCacheBase::CounterReport(GPUVAddr addr, QueryType counter_type return; } if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { - ASSERT(false); + LOG_ERROR(HW_GPU, + "Query report value not synchronized. Consider increasing GPU accuracy."); + if (!is_synced) [[likely]] { + impl->pending_unregister.push_back(query_location); + } return; } query_base->value += streamer->GetAmendValue(); @@ -370,8 +377,6 @@ void QueryCacheBase::NotifySegment(bool resume) { if (resume) { impl->runtime.ResumeHostConditionalRendering(); } else { - CounterClose(VideoCommon::QueryType::ZPassPixelCount64); - CounterClose(VideoCommon::QueryType::StreamingByteCount); impl->runtime.PauseHostConditionalRendering(); } } diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 44c06eddf3..1f71bc68c6 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -1161,10 +1164,9 @@ struct QueryCacheRuntimeImpl { StagingBufferPool& staging_pool_, ComputePassDescriptorQueue& compute_pass_descriptor_queue, DescriptorPool& descriptor_pool) - : rasterizer{rasterizer_}, device_memory{device_memory_}, - buffer_cache{buffer_cache_}, device{device_}, - memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, - guest_streamer(0, runtime), + : rasterizer{rasterizer_}, device_memory{device_memory_}, buffer_cache{buffer_cache_}, + device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, + staging_pool{staging_pool_}, guest_streamer(0, runtime), sample_streamer(static_cast(QueryType::ZPassPixelCount64), runtime, rasterizer, device, scheduler, memory_allocator, compute_pass_descriptor_queue, descriptor_pool), @@ -1300,9 +1302,11 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo if (impl->hcr_is_set) { if (impl->hcr_setup.buffer == impl->hcr_buffer && impl->hcr_setup.offset == impl->hcr_offset) { - ResumeHostConditionalRendering(); return; } + } + bool was_running = impl->is_hcr_running; + if (was_running) { PauseHostConditionalRendering(); } impl->hcr_setup.buffer = impl->hcr_buffer; @@ -1310,7 +1314,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0; impl->hcr_is_set = true; impl->is_hcr_running = false; - ResumeHostConditionalRendering(); + if (was_running) { + ResumeHostConditionalRendering(); + } } void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) { @@ -1325,7 +1331,8 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo to_resolve = buffer->Handle(); to_resolve_offset = static_cast(offset); } - if (impl->is_hcr_running) { + bool was_running = impl->is_hcr_running; + if (was_running) { PauseHostConditionalRendering(); } impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve, @@ -1335,7 +1342,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; impl->hcr_is_set = true; impl->is_hcr_running = false; - ResumeHostConditionalRendering(); + if (was_running) { + ResumeHostConditionalRendering(); + } } bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 92992c1ee7..c803b50e24 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -217,8 +217,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { FlushWork(); gpu_memory->FlushCaching(); - query_cache.NotifySegment(true); - GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; if (!pipeline) { return; @@ -232,9 +230,13 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) { UpdateDynamicStates(); HandleTransformFeedback(); + query_cache.NotifySegment(true); query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, maxwell3d->regs.zpass_pixel_count_enable); + draw_func(); + + query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false); } void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { @@ -311,8 +313,6 @@ void RasterizerVulkan::DrawTexture() { }; FlushWork(); - query_cache.NotifySegment(true); - std::scoped_lock l{texture_cache.mutex}; texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.UpdateRenderTargets(false); @@ -359,10 +359,6 @@ void RasterizerVulkan::Clear(u32 layer_count) { FlushWork(); gpu_memory->FlushCaching(); - query_cache.NotifySegment(true); - query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, - maxwell3d->regs.zpass_pixel_count_enable); - auto& regs = maxwell3d->regs; const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || regs.clear_surface.A; @@ -378,6 +374,10 @@ void RasterizerVulkan::Clear(u32 layer_count) { const VkExtent2D render_area = framebuffer->RenderArea(); scheduler.RequestRenderpass(framebuffer); + query_cache.NotifySegment(true); + query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, + maxwell3d->regs.zpass_pixel_count_enable); + u32 up_scale = 1; u32 down_shift = 0; if (texture_cache.IsRescaling()) { @@ -832,6 +832,7 @@ std::optional RasterizerVulkan::AccelerateDisplay( if (!image_view) { return {}; } + query_cache.NotifySegment(false); const auto& resolution = Settings::values.resolution_info; @@ -943,22 +944,20 @@ void RasterizerVulkan::UpdateDynamicStates() { UpdateDepthBounds(regs); UpdateStencilFaces(regs); UpdateLineWidth(regs); - // TODO: updating line stipple causes the cmdbuf to die - // UpdateLineStipple(regs); const u8 dynamic_state = Settings::values.dyna_state.GetValue(); auto features = DynamicFeatures{ - .has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported() - && dynamic_state > 0, - .has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported() - && dynamic_state > 1, - .has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported() - && dynamic_state > 1, - .has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported() - && dynamic_state > 2, - .has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported() - && dynamic_state > 2, + .has_extended_dynamic_state = + device.IsExtExtendedDynamicStateSupported() && dynamic_state > 0, + .has_extended_dynamic_state_2 = + device.IsExtExtendedDynamicState2Supported() && dynamic_state > 1, + .has_extended_dynamic_state_2_extra = + device.IsExtExtendedDynamicState2ExtrasSupported() && dynamic_state > 1, + .has_extended_dynamic_state_3_blend = + device.IsExtExtendedDynamicState3BlendingSupported() && dynamic_state > 2, + .has_extended_dynamic_state_3_enables = + device.IsExtExtendedDynamicState3EnablesSupported() && dynamic_state > 2, .has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(), }; @@ -983,16 +982,12 @@ void RasterizerVulkan::UpdateDynamicStates() { if (features.has_extended_dynamic_state_3_enables) { using namespace Tegra::Engines; - if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE - || device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) { - struct In - { + if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE || + device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) { + struct In { const Maxwell3D::Regs::VertexAttribute::Type d; - In(Maxwell3D::Regs::VertexAttribute::Type n) - : d(n) - {} - bool operator()(Maxwell3D::Regs::VertexAttribute n) const - { + In(Maxwell3D::Regs::VertexAttribute::Type n) : d(n) {} + bool operator()(Maxwell3D::Regs::VertexAttribute n) const { return n.type == d; } }; @@ -1143,36 +1138,36 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) { if (is_d24 && !device.SupportsD24DepthBuffer()) { static constexpr const size_t length = sizeof(NEEDS_D24) / sizeof(NEEDS_D24[0]); - static constexpr const u64 *start = NEEDS_D24; - static constexpr const u64 *end = NEEDS_D24 + length; + static constexpr const u64* start = NEEDS_D24; + static constexpr const u64* end = NEEDS_D24 + length; - const u64 *it = std::find(start, end, program_id); + const u64* it = std::find(start, end, program_id); if (it != end) { // the base formulas can be obtained from here: // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias - const double rescale_factor = static_cast(1ULL << (32 - 24)) - / (static_cast(0x1.ep+127)); + const double rescale_factor = + static_cast(1ULL << (32 - 24)) / (static_cast(0x1.ep+127)); units = static_cast(static_cast(units) * rescale_factor); } } - scheduler.Record( - [constant = units, clamp = regs.depth_bias_clamp, factor = regs.slope_scale_depth_bias, this]( - vk::CommandBuffer cmdbuf) { - if (device.IsExtDepthBiasControlSupported()) { - static VkDepthBiasRepresentationInfoEXT bias_info{ - .sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT, - .pNext = nullptr, - .depthBiasRepresentation = VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT, - .depthBiasExact = VK_FALSE, - }; + scheduler.Record([constant = units, clamp = regs.depth_bias_clamp, + factor = regs.slope_scale_depth_bias, this](vk::CommandBuffer cmdbuf) { + if (device.IsExtDepthBiasControlSupported()) { + static VkDepthBiasRepresentationInfoEXT bias_info{ + .sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT, + .pNext = nullptr, + .depthBiasRepresentation = + VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT, + .depthBiasExact = VK_FALSE, + }; - cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info); - } else { - cmdbuf.SetDepthBias(constant, clamp, factor); - } - }); + cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info); + } else { + cmdbuf.SetDepthBias(constant, clamp, factor); + } + }); } void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) { @@ -1354,8 +1349,7 @@ void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D:: }); } -void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) -{ +void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchConservativeRasterizationMode()) { return; } @@ -1367,8 +1361,7 @@ void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwe }); } -void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) -{ +void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) { if (!state_tracker.TouchLineStippleEnable()) { return; } @@ -1378,19 +1371,7 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& }); } -void RasterizerVulkan::UpdateLineStipple(Tegra::Engines::Maxwell3D::Regs& regs) -{ - if (!state_tracker.TouchLineStipple()) { - return; - } - - scheduler.Record([params = regs.line_stipple_params](vk::CommandBuffer cmdbuf) { - cmdbuf.SetLineStippleEXT(params.factor, static_cast(params.pattern)); - }); -} - -void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) -{ +void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) { // if (!state_tracker.TouchLi()) { // return; // } diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp index 146923db4d..7c556588b6 100644 --- a/src/video_core/renderer_vulkan/vk_scheduler.cpp +++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -257,16 +260,6 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se void Scheduler::AllocateNewContext() { // Enable counters once again. These are disabled when a command buffer is finished. - if (query_cache) { -#if ANDROID - if (Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - query_cache->NotifySegment(true); - } -#else - query_cache->NotifySegment(true); -#endif - } } void Scheduler::InvalidateState() { @@ -276,15 +269,7 @@ void Scheduler::InvalidateState() { } void Scheduler::EndPendingOperations() { -#if ANDROID - if (Settings::IsGPULevelHigh()) { - // This is problematic on Android, disable on GPU Normal. - // query_cache->DisableStreams(); - } -#else - // query_cache->DisableStreams(); -#endif - query_cache->NotifySegment(false); + query_cache->CounterReset(VideoCommon::QueryType::ZPassPixelCount64); EndRenderPass(); } @@ -292,6 +277,10 @@ void Scheduler::EndRenderPass() { if (!state.renderpass) { return; } + + query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false); + query_cache->NotifySegment(false); + Record([num_images = num_renderpass_images, images = renderpass_images, ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { std::array barriers; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 6272d6231a..d7feb69f74 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -1470,7 +1470,7 @@ void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, if (msaa_copy_pass) { return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa); } - UNIMPLEMENTED_MSG("Copying images with different samples is not supported."); + LOG_WARNING(Render_Vulkan, "Copying images with different samples is not supported."); } u64 TextureCacheRuntime::GetDeviceLocalMemory() const { @@ -1546,54 +1546,94 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, // Handle MSAA upload if necessary /* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/ - if (info.num_samples > 1 && runtime->CanUploadMSAA()) { - // Only use MSAA copy pass for color formats - // TODO: Depth/stencil formats need special handling - if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - // Create a temporary non-MSAA image to upload the data first - ImageInfo temp_info = info; - temp_info.num_samples = 1; + if (info.num_samples > 1) { + // Create a temporary non-MSAA image to upload the data first + ImageInfo temp_info = info; + temp_info.num_samples = 1; + // Create image with same usage flags as the target image to avoid validation errors + VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info); + temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci); - // Create image with same usage flags as the target image to avoid validation errors - VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info); - image_ci.usage = original_image.UsageFlags(); - vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci); + auto vk_buffer_image_copies = TransformBufferImageCopies(copies, offset, aspect_mask); - // Upload to the temporary non-MSAA image - scheduler->RequestOutsideRenderPassOperationContext(); - auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); - const VkBuffer src_buffer = buffer; - const VkImage temp_vk_image = *temp_image; - const VkImageAspectFlags vk_aspect_mask = aspect_mask; - scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { - CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, vk_copies); + boost::container::small_vector blit_regions; + blit_regions.reserve(copies.size()); + for (const auto& copy : copies) { + blit_regions.emplace_back(VkImageBlit{ + .srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask), + .srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z}, + {static_cast(copy.image_offset.x + copy.image_extent.width), + static_cast(copy.image_offset.y + copy.image_extent.height), + static_cast(copy.image_offset.z + copy.image_extent.depth)}}, + .dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask), + .dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z}, + {static_cast(copy.image_offset.x + copy.image_extent.width), + static_cast(copy.image_offset.y + copy.image_extent.height), + static_cast(copy.image_offset.z + copy.image_extent.depth)}}, }); - - // Use MSAACopyPass to convert from non-MSAA to MSAA - std::vector image_copies; - for (const auto& copy : copies) { - VideoCommon::ImageCopy image_copy; - image_copy.src_offset = {0, 0, 0}; // Use zero offset for source - image_copy.dst_offset = copy.image_offset; - image_copy.src_subresource = copy.image_subresource; - image_copy.dst_subresource = copy.image_subresource; - image_copy.extent = copy.image_extent; - image_copies.push_back(image_copy); - } - - // wrapper image for the temporary image - Image temp_wrapper(*runtime, temp_info, 0, 0); - temp_wrapper.original_image = std::move(temp_image); - temp_wrapper.current_image = &Image::original_image; - temp_wrapper.aspect_mask = aspect_mask; - temp_wrapper.initialized = true; - - // Use MSAACopyPass to convert from non-MSAA to MSAA - runtime->msaa_copy_pass->CopyImage(*this, temp_wrapper, image_copies, false); - std::exchange(initialized, true); - return; } - // For depth/stencil formats, fall back to regular upload + + const VkImage dst_vk_image = Handle(); + const bool is_initialized = std::exchange(initialized, true); + + scheduler->RequestOutsideRenderPassOperationContext(); + scheduler->Record([=, temp_image = std::move(temp_image)](vk::CommandBuffer cmdbuf) { + // Upload to the temporary non-MSAA image + CopyBufferToImage(cmdbuf, buffer, *temp_image, aspect_mask, false, + vk_buffer_image_copies); + + // Transition layouts for blit + const VkAccessFlags src_access_mask = + is_initialized + ? (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT) + : VK_ACCESS_NONE; + const std::array pre_blit_barriers{ + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_GENERAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .image = *temp_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }, + VkImageMemoryBarrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = src_access_mask, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = + is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .image = dst_vk_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }}; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, {}, {}, pre_blit_barriers); + + // Blit from temporary to MSAA image + cmdbuf.BlitImage(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_vk_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions, + VK_FILTER_NEAREST); + + // Transition destination image to general layout + const VkImageMemoryBarrier post_blit_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_SHADER_READ_BIT | + VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | + VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .image = dst_vk_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, post_blit_barrier); + }); } else { // Regular non-MSAA upload scheduler->RequestOutsideRenderPassOperationContext(); @@ -1604,7 +1644,8 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, const bool is_initialized = std::exchange(initialized, true); scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies](vk::CommandBuffer cmdbuf) { - CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); + CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, + vk_copies); }); } @@ -1636,102 +1677,100 @@ void Image::DownloadMemory(std::span buffers_span, std::span o } // RE-USE MSAA UPLOAD CODE BUT NOW FOR DOWNLOAD - if (info.num_samples > 1 && runtime->msaa_copy_pass) { - // TODO: Depth/stencil formats need special handling - if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { - ImageInfo temp_info = info; - temp_info.num_samples = 1; + if (info.num_samples > 1) { + ImageInfo temp_info = info; + temp_info.num_samples = 1; - VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info); - image_ci.usage = original_image.UsageFlags(); - vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci); + VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info); + temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT; + vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci); - Image temp_wrapper(*runtime, temp_info, 0, 0); - temp_wrapper.original_image = std::move(temp_image); - temp_wrapper.current_image = &Image::original_image; - temp_wrapper.aspect_mask = aspect_mask; - temp_wrapper.initialized = true; + boost::container::small_vector blit_regions; + blit_regions.reserve(copies.size()); + for (const auto& copy : copies) { + blit_regions.emplace_back(VkImageBlit{ + .srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask), + .srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z}, + {static_cast(copy.image_offset.x + copy.image_extent.width), + static_cast(copy.image_offset.y + copy.image_extent.height), + static_cast(copy.image_offset.z + copy.image_extent.depth)}}, + .dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask), + .dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z}, + {static_cast(copy.image_offset.x + copy.image_extent.width), + static_cast(copy.image_offset.y + copy.image_extent.height), + static_cast(copy.image_offset.z + copy.image_extent.depth)}}, + }); + } - std::vector image_copies; - for (const auto& copy : copies) { - VideoCommon::ImageCopy image_copy; - image_copy.src_offset = copy.image_offset; - image_copy.dst_offset = copy.image_offset; - image_copy.src_subresource = copy.image_subresource; - image_copy.dst_subresource = copy.image_subresource; - image_copy.extent = copy.image_extent; - image_copies.push_back(image_copy); - } + boost::container::small_vector buffers_vector{}; + boost::container::small_vector, 8> + vk_copies; + for (size_t index = 0; index < buffers_span.size(); index++) { + buffers_vector.emplace_back(buffers_span[index]); + vk_copies.emplace_back( + TransformBufferImageCopies(copies, offsets_span[index], aspect_mask)); + } - runtime->msaa_copy_pass->CopyImage(temp_wrapper, *this, image_copies, true); + const VkImage src_vk_image = Handle(); - boost::container::small_vector buffers_vector{}; - boost::container::small_vector, 8> - vk_copies; - for (size_t index = 0; index < buffers_span.size(); index++) { - buffers_vector.emplace_back(buffers_span[index]); - vk_copies.emplace_back( - TransformBufferImageCopies(copies, offsets_span[index], aspect_mask)); - } - - scheduler->RequestOutsideRenderPassOperationContext(); - scheduler->Record([buffers = std::move(buffers_vector), image = *temp_wrapper.original_image, - aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { - const VkImageMemoryBarrier read_barrier{ + scheduler->RequestOutsideRenderPassOperationContext(); + scheduler->Record([=, temp_image = std::move(temp_image), + buffers = std::move(buffers_vector)](vk::CommandBuffer cmdbuf) { + const std::array pre_blit_barriers{ + VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, + .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .oldLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask_, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); - - for (size_t index = 0; index < buffers.size(); index++) { - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], - vk_copies[index]); - } - - const VkMemoryBarrier memory_write_barrier{ - .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, - }; - const VkImageMemoryBarrier image_write_barrier{ + .image = src_vk_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }, + VkImageMemoryBarrier{ .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, .srcAccessMask = 0, - .dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask_, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, - }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, memory_write_barrier, nullptr, image_write_barrier); - }); - return; - } + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .image = *temp_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }}; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, {}, pre_blit_barriers); + + cmdbuf.BlitImage(src_vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *temp_image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions, + VK_FILTER_NEAREST); + + const VkImageMemoryBarrier post_blit_barrier{ + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + .image = *temp_image, + .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0, + VK_REMAINING_ARRAY_LAYERS}, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, post_blit_barrier); + + for (size_t index = 0; index < buffers.size(); index++) { + cmdbuf.CopyImageToBuffer(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + buffers[index], vk_copies[index]); + } + + const VkMemoryBarrier memory_write_barrier{ + .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = VK_ACCESS_HOST_READ_BIT, + }; + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0, + memory_write_barrier, {}, {}); + }); } else { boost::container::small_vector buffers_vector{}; boost::container::small_vector, 8> @@ -1762,12 +1801,12 @@ void Image::DownloadMemory(std::span buffers_span, std::span o .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, read_barrier); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); for (size_t index = 0; index < buffers.size(); index++) { - cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], - vk_copies[index]); + cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + buffers[index], vk_copies[index]); } const VkMemoryBarrier memory_write_barrier{ @@ -1794,8 +1833,9 @@ void Image::DownloadMemory(std::span buffers_span, std::span o .layerCount = VK_REMAINING_ARRAY_LAYERS, }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, memory_write_barrier, nullptr, image_write_barrier); + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier, + nullptr, image_write_barrier); }); } @@ -1991,6 +2031,11 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); } } + + if ((image.UsageFlags() & VK_IMAGE_USAGE_STORAGE_BIT) != 0) { + swizzle = {SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A}; + } + const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); const VkImageViewUsageCreateInfo image_view_usage{ .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,