Compare commits

...

3 commits

Author SHA1 Message Date
7672704731 vk_texture_cache: Rewrite MSAA handling with blits and proper barriers
Refactor MSAA texture upload/download using intermediate blit with correct Vulkan barriers and layout handling.
2025-07-26 23:21:10 +02:00
fac153509a
[externals] Revert xbyak to v7.22 (#137)
Signed-off-by: crueter <crueter@eden-emu.dev>
Reviewed-on: eden-emu/eden#137
2025-07-26 23:06:14 +02:00
b1f3b568fb
[vk] Fix query cache leak on missed sync (#131)
Provided by community member, elementary-particle. Submitted as PR by MaranBR.

Fixed issues:

1. The queue cache forgot to put the host query into unregister queue if they weren't synced. This will block all the banks from freeing causing a major leak.

2. SamplesQueryCounter is not aligned with renderpass begin/end. This creates invalid queries.

3. Conditional rendering is not turned on/off at the correct location making them invalid.

Co-authored-by: Maufeat <sahyno1996@gmail.com>
Reviewed-on: eden-emu/eden#131
Co-authored-by: Shinmegumi <shinmegumi@eden-emu.dev>
Co-committed-by: Shinmegumi <shinmegumi@eden-emu.dev>
2025-07-26 18:12:54 +02:00
6 changed files with 265 additions and 236 deletions

2
externals/xbyak vendored

@ -1 +1 @@
Subproject commit 12557954c68a780563f9ab9fc24a3a156c96cba1
Subproject commit 4e44f4614ddbf038f2a6296f5b906d5c72691e0f

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@ -115,8 +118,8 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_,
Tegra::GPU& gpu_)
: owner{owner_}, rasterizer{rasterizer_},
device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} {
: owner{owner_}, rasterizer{rasterizer_}, device_memory{device_memory_}, runtime{runtime_},
gpu{gpu_} {
streamer_mask = 0;
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@ -267,7 +270,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
return;
}
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
ASSERT(false);
LOG_ERROR(HW_GPU,
"Query report value not synchronized. Consider increasing GPU accuracy.");
if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location);
}
return;
}
query_base->value += streamer->GetAmendValue();
@ -370,8 +377,6 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) {
if (resume) {
impl->runtime.ResumeHostConditionalRendering();
} else {
CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
CounterClose(VideoCommon::QueryType::StreamingByteCount);
impl->runtime.PauseHostConditionalRendering();
}
}

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@ -1161,10 +1164,9 @@ struct QueryCacheRuntimeImpl {
StagingBufferPool& staging_pool_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool)
: rasterizer{rasterizer_}, device_memory{device_memory_},
buffer_cache{buffer_cache_}, device{device_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
guest_streamer(0, runtime),
: rasterizer{rasterizer_}, device_memory{device_memory_}, buffer_cache{buffer_cache_},
device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
staging_pool{staging_pool_}, guest_streamer(0, runtime),
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
device, scheduler, memory_allocator, compute_pass_descriptor_queue,
descriptor_pool),
@ -1300,9 +1302,11 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
if (impl->hcr_is_set) {
if (impl->hcr_setup.buffer == impl->hcr_buffer &&
impl->hcr_setup.offset == impl->hcr_offset) {
ResumeHostConditionalRendering();
return;
}
}
bool was_running = impl->is_hcr_running;
if (was_running) {
PauseHostConditionalRendering();
}
impl->hcr_setup.buffer = impl->hcr_buffer;
@ -1310,7 +1314,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0;
impl->hcr_is_set = true;
impl->is_hcr_running = false;
ResumeHostConditionalRendering();
if (was_running) {
ResumeHostConditionalRendering();
}
}
void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) {
@ -1325,7 +1331,8 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
to_resolve = buffer->Handle();
to_resolve_offset = static_cast<u32>(offset);
}
if (impl->is_hcr_running) {
bool was_running = impl->is_hcr_running;
if (was_running) {
PauseHostConditionalRendering();
}
impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve,
@ -1335,7 +1342,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
impl->hcr_is_set = true;
impl->is_hcr_running = false;
ResumeHostConditionalRendering();
if (was_running) {
ResumeHostConditionalRendering();
}
}
bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1,

View file

@ -217,8 +217,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork();
gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) {
return;
@ -232,9 +230,13 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
UpdateDynamicStates();
HandleTransformFeedback();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
draw_func();
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
}
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
@ -311,8 +313,6 @@ void RasterizerVulkan::DrawTexture() {
};
FlushWork();
query_cache.NotifySegment(true);
std::scoped_lock l{texture_cache.mutex};
texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false);
@ -359,10 +359,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
FlushWork();
gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
regs.clear_surface.A;
@ -378,6 +374,10 @@ void RasterizerVulkan::Clear(u32 layer_count) {
const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer);
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
u32 up_scale = 1;
u32 down_shift = 0;
if (texture_cache.IsRescaling()) {
@ -832,6 +832,7 @@ std::optional<FramebufferTextureInfo> RasterizerVulkan::AccelerateDisplay(
if (!image_view) {
return {};
}
query_cache.NotifySegment(false);
const auto& resolution = Settings::values.resolution_info;
@ -943,22 +944,20 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthBounds(regs);
UpdateStencilFaces(regs);
UpdateLineWidth(regs);
// TODO: updating line stipple causes the cmdbuf to die
// UpdateLineStipple(regs);
const u8 dynamic_state = Settings::values.dyna_state.GetValue();
auto features = DynamicFeatures{
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported()
&& dynamic_state > 0,
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported()
&& dynamic_state > 1,
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported()
&& dynamic_state > 1,
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported()
&& dynamic_state > 2,
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported()
&& dynamic_state > 2,
.has_extended_dynamic_state =
device.IsExtExtendedDynamicStateSupported() && dynamic_state > 0,
.has_extended_dynamic_state_2 =
device.IsExtExtendedDynamicState2Supported() && dynamic_state > 1,
.has_extended_dynamic_state_2_extra =
device.IsExtExtendedDynamicState2ExtrasSupported() && dynamic_state > 1,
.has_extended_dynamic_state_3_blend =
device.IsExtExtendedDynamicState3BlendingSupported() && dynamic_state > 2,
.has_extended_dynamic_state_3_enables =
device.IsExtExtendedDynamicState3EnablesSupported() && dynamic_state > 2,
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
};
@ -983,16 +982,12 @@ void RasterizerVulkan::UpdateDynamicStates() {
if (features.has_extended_dynamic_state_3_enables) {
using namespace Tegra::Engines;
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE
|| device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
struct In
{
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
struct In {
const Maxwell3D::Regs::VertexAttribute::Type d;
In(Maxwell3D::Regs::VertexAttribute::Type n)
: d(n)
{}
bool operator()(Maxwell3D::Regs::VertexAttribute n) const
{
In(Maxwell3D::Regs::VertexAttribute::Type n) : d(n) {}
bool operator()(Maxwell3D::Regs::VertexAttribute n) const {
return n.type == d;
}
};
@ -1143,36 +1138,36 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
if (is_d24 && !device.SupportsD24DepthBuffer()) {
static constexpr const size_t length = sizeof(NEEDS_D24) / sizeof(NEEDS_D24[0]);
static constexpr const u64 *start = NEEDS_D24;
static constexpr const u64 *end = NEEDS_D24 + length;
static constexpr const u64* start = NEEDS_D24;
static constexpr const u64* end = NEEDS_D24 + length;
const u64 *it = std::find(start, end, program_id);
const u64* it = std::find(start, end, program_id);
if (it != end) {
// the base formulas can be obtained from here:
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
const double rescale_factor = static_cast<double>(1ULL << (32 - 24))
/ (static_cast<double>(0x1.ep+127));
const double rescale_factor =
static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
units = static_cast<float>(static_cast<double>(units) * rescale_factor);
}
}
scheduler.Record(
[constant = units, clamp = regs.depth_bias_clamp, factor = regs.slope_scale_depth_bias, this](
vk::CommandBuffer cmdbuf) {
if (device.IsExtDepthBiasControlSupported()) {
static VkDepthBiasRepresentationInfoEXT bias_info{
.sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT,
.pNext = nullptr,
.depthBiasRepresentation = VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT,
.depthBiasExact = VK_FALSE,
};
scheduler.Record([constant = units, clamp = regs.depth_bias_clamp,
factor = regs.slope_scale_depth_bias, this](vk::CommandBuffer cmdbuf) {
if (device.IsExtDepthBiasControlSupported()) {
static VkDepthBiasRepresentationInfoEXT bias_info{
.sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT,
.pNext = nullptr,
.depthBiasRepresentation =
VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT,
.depthBiasExact = VK_FALSE,
};
cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info);
} else {
cmdbuf.SetDepthBias(constant, clamp, factor);
}
});
cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info);
} else {
cmdbuf.SetDepthBias(constant, clamp, factor);
}
});
}
void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
@ -1354,8 +1349,7 @@ void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::
});
}
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs)
{
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchConservativeRasterizationMode()) {
return;
}
@ -1367,8 +1361,7 @@ void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwe
});
}
void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs)
{
void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
if (!state_tracker.TouchLineStippleEnable()) {
return;
}
@ -1378,19 +1371,7 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs&
});
}
void RasterizerVulkan::UpdateLineStipple(Tegra::Engines::Maxwell3D::Regs& regs)
{
if (!state_tracker.TouchLineStipple()) {
return;
}
scheduler.Record([params = regs.line_stipple_params](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineStippleEXT(params.factor, static_cast<uint16_t>(params.pattern));
});
}
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs)
{
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
// if (!state_tracker.TouchLi()) {
// return;
// }

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -257,16 +260,6 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
query_cache->NotifySegment(true);
}
#else
query_cache->NotifySegment(true);
#endif
}
}
void Scheduler::InvalidateState() {
@ -276,15 +269,7 @@ void Scheduler::InvalidateState() {
}
void Scheduler::EndPendingOperations() {
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
// query_cache->DisableStreams();
}
#else
// query_cache->DisableStreams();
#endif
query_cache->NotifySegment(false);
query_cache->CounterReset(VideoCommon::QueryType::ZPassPixelCount64);
EndRenderPass();
}
@ -292,6 +277,10 @@ void Scheduler::EndRenderPass() {
if (!state.renderpass) {
return;
}
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
query_cache->NotifySegment(false);
Record([num_images = num_renderpass_images, images = renderpass_images,
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
std::array<VkImageMemoryBarrier, 9> barriers;

View file

@ -1470,7 +1470,7 @@ void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
if (msaa_copy_pass) {
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
}
UNIMPLEMENTED_MSG("Copying images with different samples is not supported.");
LOG_WARNING(Render_Vulkan, "Copying images with different samples is not supported.");
}
u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
@ -1546,54 +1546,94 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
// Handle MSAA upload if necessary
/* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly
since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/
if (info.num_samples > 1 && runtime->CanUploadMSAA()) {
// Only use MSAA copy pass for color formats
// TODO: Depth/stencil formats need special handling
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
// Create a temporary non-MSAA image to upload the data first
ImageInfo temp_info = info;
temp_info.num_samples = 1;
if (info.num_samples > 1) {
// Create a temporary non-MSAA image to upload the data first
ImageInfo temp_info = info;
temp_info.num_samples = 1;
// Create image with same usage flags as the target image to avoid validation errors
VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info);
temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci);
// Create image with same usage flags as the target image to avoid validation errors
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info);
image_ci.usage = original_image.UsageFlags();
vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci);
auto vk_buffer_image_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
// Upload to the temporary non-MSAA image
scheduler->RequestOutsideRenderPassOperationContext();
auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
const VkBuffer src_buffer = buffer;
const VkImage temp_vk_image = *temp_image;
const VkImageAspectFlags vk_aspect_mask = aspect_mask;
scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, vk_copies);
boost::container::small_vector<VkImageBlit, 16> blit_regions;
blit_regions.reserve(copies.size());
for (const auto& copy : copies) {
blit_regions.emplace_back(VkImageBlit{
.srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
.dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
});
// Use MSAACopyPass to convert from non-MSAA to MSAA
std::vector<VideoCommon::ImageCopy> image_copies;
for (const auto& copy : copies) {
VideoCommon::ImageCopy image_copy;
image_copy.src_offset = {0, 0, 0}; // Use zero offset for source
image_copy.dst_offset = copy.image_offset;
image_copy.src_subresource = copy.image_subresource;
image_copy.dst_subresource = copy.image_subresource;
image_copy.extent = copy.image_extent;
image_copies.push_back(image_copy);
}
// wrapper image for the temporary image
Image temp_wrapper(*runtime, temp_info, 0, 0);
temp_wrapper.original_image = std::move(temp_image);
temp_wrapper.current_image = &Image::original_image;
temp_wrapper.aspect_mask = aspect_mask;
temp_wrapper.initialized = true;
// Use MSAACopyPass to convert from non-MSAA to MSAA
runtime->msaa_copy_pass->CopyImage(*this, temp_wrapper, image_copies, false);
std::exchange(initialized, true);
return;
}
// For depth/stencil formats, fall back to regular upload
const VkImage dst_vk_image = Handle();
const bool is_initialized = std::exchange(initialized, true);
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([=, temp_image = std::move(temp_image)](vk::CommandBuffer cmdbuf) {
// Upload to the temporary non-MSAA image
CopyBufferToImage(cmdbuf, buffer, *temp_image, aspect_mask, false,
vk_buffer_image_copies);
// Transition layouts for blit
const VkAccessFlags src_access_mask =
is_initialized
? (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT)
: VK_ACCESS_NONE;
const std::array<VkImageMemoryBarrier, 2> pre_blit_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = *temp_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = src_access_mask,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout =
is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.image = dst_vk_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
}};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, {}, {}, pre_blit_barriers);
// Blit from temporary to MSAA image
cmdbuf.BlitImage(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_vk_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions,
VK_FILTER_NEAREST);
// Transition destination image to general layout
const VkImageMemoryBarrier post_blit_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.image = dst_vk_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, post_blit_barrier);
});
} else {
// Regular non-MSAA upload
scheduler->RequestOutsideRenderPassOperationContext();
@ -1604,7 +1644,8 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
const bool is_initialized = std::exchange(initialized, true);
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies);
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies);
});
}
@ -1636,102 +1677,100 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
}
// RE-USE MSAA UPLOAD CODE BUT NOW FOR DOWNLOAD
if (info.num_samples > 1 && runtime->msaa_copy_pass) {
// TODO: Depth/stencil formats need special handling
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
ImageInfo temp_info = info;
temp_info.num_samples = 1;
if (info.num_samples > 1) {
ImageInfo temp_info = info;
temp_info.num_samples = 1;
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info);
image_ci.usage = original_image.UsageFlags();
vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci);
VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info);
temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci);
Image temp_wrapper(*runtime, temp_info, 0, 0);
temp_wrapper.original_image = std::move(temp_image);
temp_wrapper.current_image = &Image::original_image;
temp_wrapper.aspect_mask = aspect_mask;
temp_wrapper.initialized = true;
boost::container::small_vector<VkImageBlit, 16> blit_regions;
blit_regions.reserve(copies.size());
for (const auto& copy : copies) {
blit_regions.emplace_back(VkImageBlit{
.srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
.dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
});
}
std::vector<VideoCommon::ImageCopy> image_copies;
for (const auto& copy : copies) {
VideoCommon::ImageCopy image_copy;
image_copy.src_offset = copy.image_offset;
image_copy.dst_offset = copy.image_offset;
image_copy.src_subresource = copy.image_subresource;
image_copy.dst_subresource = copy.image_subresource;
image_copy.extent = copy.image_extent;
image_copies.push_back(image_copy);
}
boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(
TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
}
runtime->msaa_copy_pass->CopyImage(temp_wrapper, *this, image_copies, true);
const VkImage src_vk_image = Handle();
boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
vk_copies;
for (size_t index = 0; index < buffers_span.size(); index++) {
buffers_vector.emplace_back(buffers_span[index]);
vk_copies.emplace_back(
TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
}
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffers = std::move(buffers_vector), image = *temp_wrapper.original_image,
aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([=, temp_image = std::move(temp_image),
buffers = std::move(buffers_vector)](vk::CommandBuffer cmdbuf) {
const std::array<VkImageMemoryBarrier, 2> pre_blit_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask_,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkImageMemoryBarrier image_write_barrier{
.image = src_vk_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask_,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
});
return;
}
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.image = *temp_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
}};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, {}, pre_blit_barriers);
cmdbuf.BlitImage(src_vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *temp_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions,
VK_FILTER_NEAREST);
const VkImageMemoryBarrier post_blit_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = *temp_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, post_blit_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
buffers[index], vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_HOST_READ_BIT,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0,
memory_write_barrier, {}, {});
});
} else {
boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
@ -1762,12 +1801,12 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
vk_copies[index]);
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
buffers[index], vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
@ -1794,8 +1833,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, memory_write_barrier, nullptr, image_write_barrier);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier,
nullptr, image_write_barrier);
});
}
@ -1991,6 +2031,11 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
}
}
if ((image.UsageFlags() & VK_IMAGE_USAGE_STORAGE_BIT) != 0) {
swizzle = {SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A};
}
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
const VkImageViewUsageCreateInfo image_view_usage{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,