Compare commits

...

3 commits

Author SHA1 Message Date
7672704731 vk_texture_cache: Rewrite MSAA handling with blits and proper barriers
Refactor MSAA texture upload/download using intermediate blit with correct Vulkan barriers and layout handling.
2025-07-26 23:21:10 +02:00
fac153509a
[externals] Revert xbyak to v7.22 (#137)
Signed-off-by: crueter <crueter@eden-emu.dev>
Reviewed-on: eden-emu/eden#137
2025-07-26 23:06:14 +02:00
b1f3b568fb
[vk] Fix query cache leak on missed sync (#131)
Provided by community member, elementary-particle. Submitted as PR by MaranBR.

Fixed issues:

1. The queue cache forgot to put the host query into unregister queue if they weren't synced. This will block all the banks from freeing causing a major leak.

2. SamplesQueryCounter is not aligned with renderpass begin/end. This creates invalid queries.

3. Conditional rendering is not turned on/off at the correct location making them invalid.

Co-authored-by: Maufeat <sahyno1996@gmail.com>
Reviewed-on: eden-emu/eden#131
Co-authored-by: Shinmegumi <shinmegumi@eden-emu.dev>
Co-committed-by: Shinmegumi <shinmegumi@eden-emu.dev>
2025-07-26 18:12:54 +02:00
6 changed files with 265 additions and 236 deletions

2
externals/xbyak vendored

@ -1 +1 @@
Subproject commit 12557954c68a780563f9ab9fc24a3a156c96cba1 Subproject commit 4e44f4614ddbf038f2a6296f5b906d5c72691e0f

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
@ -115,8 +118,8 @@ struct QueryCacheBase<Traits>::QueryCacheBaseImpl {
QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_, QueryCacheBaseImpl(QueryCacheBase<Traits>* owner_, VideoCore::RasterizerInterface& rasterizer_,
Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_, RuntimeType& runtime_,
Tegra::GPU& gpu_) Tegra::GPU& gpu_)
: owner{owner_}, rasterizer{rasterizer_}, : owner{owner_}, rasterizer{rasterizer_}, device_memory{device_memory_}, runtime{runtime_},
device_memory{device_memory_}, runtime{runtime_}, gpu{gpu_} { gpu{gpu_} {
streamer_mask = 0; streamer_mask = 0;
for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) { for (size_t i = 0; i < static_cast<size_t>(QueryType::MaxQueryTypes); i++) {
streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i)); streamers[i] = runtime.GetStreamerInterface(static_cast<QueryType>(i));
@ -267,7 +270,11 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
return; return;
} }
if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] { if (False(query_base->flags & QueryFlagBits::IsFinalValueSynced)) [[unlikely]] {
ASSERT(false); LOG_ERROR(HW_GPU,
"Query report value not synchronized. Consider increasing GPU accuracy.");
if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location);
}
return; return;
} }
query_base->value += streamer->GetAmendValue(); query_base->value += streamer->GetAmendValue();
@ -370,8 +377,6 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) {
if (resume) { if (resume) {
impl->runtime.ResumeHostConditionalRendering(); impl->runtime.ResumeHostConditionalRendering();
} else { } else {
CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
CounterClose(VideoCommon::QueryType::StreamingByteCount);
impl->runtime.PauseHostConditionalRendering(); impl->runtime.PauseHostConditionalRendering();
} }
} }

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
@ -1161,10 +1164,9 @@ struct QueryCacheRuntimeImpl {
StagingBufferPool& staging_pool_, StagingBufferPool& staging_pool_,
ComputePassDescriptorQueue& compute_pass_descriptor_queue, ComputePassDescriptorQueue& compute_pass_descriptor_queue,
DescriptorPool& descriptor_pool) DescriptorPool& descriptor_pool)
: rasterizer{rasterizer_}, device_memory{device_memory_}, : rasterizer{rasterizer_}, device_memory{device_memory_}, buffer_cache{buffer_cache_},
buffer_cache{buffer_cache_}, device{device_}, device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, staging_pool{staging_pool_}, guest_streamer(0, runtime),
guest_streamer(0, runtime),
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer, sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
device, scheduler, memory_allocator, compute_pass_descriptor_queue, device, scheduler, memory_allocator, compute_pass_descriptor_queue,
descriptor_pool), descriptor_pool),
@ -1300,9 +1302,11 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
if (impl->hcr_is_set) { if (impl->hcr_is_set) {
if (impl->hcr_setup.buffer == impl->hcr_buffer && if (impl->hcr_setup.buffer == impl->hcr_buffer &&
impl->hcr_setup.offset == impl->hcr_offset) { impl->hcr_setup.offset == impl->hcr_offset) {
ResumeHostConditionalRendering();
return; return;
} }
}
bool was_running = impl->is_hcr_running;
if (was_running) {
PauseHostConditionalRendering(); PauseHostConditionalRendering();
} }
impl->hcr_setup.buffer = impl->hcr_buffer; impl->hcr_setup.buffer = impl->hcr_buffer;
@ -1310,7 +1314,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareValueImpl(VideoCommon::Lo
impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0; impl->hcr_setup.flags = is_equal ? VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT : 0;
impl->hcr_is_set = true; impl->hcr_is_set = true;
impl->is_hcr_running = false; impl->is_hcr_running = false;
ResumeHostConditionalRendering(); if (was_running) {
ResumeHostConditionalRendering();
}
} }
void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) { void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, bool is_equal) {
@ -1325,7 +1331,8 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
to_resolve = buffer->Handle(); to_resolve = buffer->Handle();
to_resolve_offset = static_cast<u32>(offset); to_resolve_offset = static_cast<u32>(offset);
} }
if (impl->is_hcr_running) { bool was_running = impl->is_hcr_running;
if (was_running) {
PauseHostConditionalRendering(); PauseHostConditionalRendering();
} }
impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve, impl->conditional_resolve_pass->Resolve(*impl->hcr_resolve_buffer, to_resolve,
@ -1335,7 +1342,9 @@ void QueryCacheRuntime::HostConditionalRenderingCompareBCImpl(DAddr address, boo
impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT; impl->hcr_setup.flags = is_equal ? 0 : VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT;
impl->hcr_is_set = true; impl->hcr_is_set = true;
impl->is_hcr_running = false; impl->is_hcr_running = false;
ResumeHostConditionalRendering(); if (was_running) {
ResumeHostConditionalRendering();
}
} }
bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1, bool QueryCacheRuntime::HostConditionalRenderingCompareValue(VideoCommon::LookupData object_1,

View file

@ -217,8 +217,6 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
FlushWork(); FlushWork();
gpu_memory->FlushCaching(); gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()}; GraphicsPipeline* const pipeline{pipeline_cache.CurrentGraphicsPipeline()};
if (!pipeline) { if (!pipeline) {
return; return;
@ -232,9 +230,13 @@ void RasterizerVulkan::PrepareDraw(bool is_indexed, Func&& draw_func) {
UpdateDynamicStates(); UpdateDynamicStates();
HandleTransformFeedback(); HandleTransformFeedback();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable); maxwell3d->regs.zpass_pixel_count_enable);
draw_func(); draw_func();
query_cache.CounterEnable(VideoCommon::QueryType::StreamingByteCount, false);
} }
void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) { void RasterizerVulkan::Draw(bool is_indexed, u32 instance_count) {
@ -311,8 +313,6 @@ void RasterizerVulkan::DrawTexture() {
}; };
FlushWork(); FlushWork();
query_cache.NotifySegment(true);
std::scoped_lock l{texture_cache.mutex}; std::scoped_lock l{texture_cache.mutex};
texture_cache.SynchronizeGraphicsDescriptors(); texture_cache.SynchronizeGraphicsDescriptors();
texture_cache.UpdateRenderTargets(false); texture_cache.UpdateRenderTargets(false);
@ -359,10 +359,6 @@ void RasterizerVulkan::Clear(u32 layer_count) {
FlushWork(); FlushWork();
gpu_memory->FlushCaching(); gpu_memory->FlushCaching();
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
auto& regs = maxwell3d->regs; auto& regs = maxwell3d->regs;
const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B || const bool use_color = regs.clear_surface.R || regs.clear_surface.G || regs.clear_surface.B ||
regs.clear_surface.A; regs.clear_surface.A;
@ -378,6 +374,10 @@ void RasterizerVulkan::Clear(u32 layer_count) {
const VkExtent2D render_area = framebuffer->RenderArea(); const VkExtent2D render_area = framebuffer->RenderArea();
scheduler.RequestRenderpass(framebuffer); scheduler.RequestRenderpass(framebuffer);
query_cache.NotifySegment(true);
query_cache.CounterEnable(VideoCommon::QueryType::ZPassPixelCount64,
maxwell3d->regs.zpass_pixel_count_enable);
u32 up_scale = 1; u32 up_scale = 1;
u32 down_shift = 0; u32 down_shift = 0;
if (texture_cache.IsRescaling()) { if (texture_cache.IsRescaling()) {
@ -832,6 +832,7 @@ std::optional<FramebufferTextureInfo> RasterizerVulkan::AccelerateDisplay(
if (!image_view) { if (!image_view) {
return {}; return {};
} }
query_cache.NotifySegment(false); query_cache.NotifySegment(false);
const auto& resolution = Settings::values.resolution_info; const auto& resolution = Settings::values.resolution_info;
@ -943,22 +944,20 @@ void RasterizerVulkan::UpdateDynamicStates() {
UpdateDepthBounds(regs); UpdateDepthBounds(regs);
UpdateStencilFaces(regs); UpdateStencilFaces(regs);
UpdateLineWidth(regs); UpdateLineWidth(regs);
// TODO: updating line stipple causes the cmdbuf to die
// UpdateLineStipple(regs);
const u8 dynamic_state = Settings::values.dyna_state.GetValue(); const u8 dynamic_state = Settings::values.dyna_state.GetValue();
auto features = DynamicFeatures{ auto features = DynamicFeatures{
.has_extended_dynamic_state = device.IsExtExtendedDynamicStateSupported() .has_extended_dynamic_state =
&& dynamic_state > 0, device.IsExtExtendedDynamicStateSupported() && dynamic_state > 0,
.has_extended_dynamic_state_2 = device.IsExtExtendedDynamicState2Supported() .has_extended_dynamic_state_2 =
&& dynamic_state > 1, device.IsExtExtendedDynamicState2Supported() && dynamic_state > 1,
.has_extended_dynamic_state_2_extra = device.IsExtExtendedDynamicState2ExtrasSupported() .has_extended_dynamic_state_2_extra =
&& dynamic_state > 1, device.IsExtExtendedDynamicState2ExtrasSupported() && dynamic_state > 1,
.has_extended_dynamic_state_3_blend = device.IsExtExtendedDynamicState3BlendingSupported() .has_extended_dynamic_state_3_blend =
&& dynamic_state > 2, device.IsExtExtendedDynamicState3BlendingSupported() && dynamic_state > 2,
.has_extended_dynamic_state_3_enables = device.IsExtExtendedDynamicState3EnablesSupported() .has_extended_dynamic_state_3_enables =
&& dynamic_state > 2, device.IsExtExtendedDynamicState3EnablesSupported() && dynamic_state > 2,
.has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(), .has_dynamic_vertex_input = device.IsExtVertexInputDynamicStateSupported(),
}; };
@ -983,16 +982,12 @@ void RasterizerVulkan::UpdateDynamicStates() {
if (features.has_extended_dynamic_state_3_enables) { if (features.has_extended_dynamic_state_3_enables) {
using namespace Tegra::Engines; using namespace Tegra::Engines;
if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE if (device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_OPEN_SOURCE ||
|| device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) { device.GetDriverID() == VkDriverIdKHR::VK_DRIVER_ID_AMD_PROPRIETARY) {
struct In struct In {
{
const Maxwell3D::Regs::VertexAttribute::Type d; const Maxwell3D::Regs::VertexAttribute::Type d;
In(Maxwell3D::Regs::VertexAttribute::Type n) In(Maxwell3D::Regs::VertexAttribute::Type n) : d(n) {}
: d(n) bool operator()(Maxwell3D::Regs::VertexAttribute n) const {
{}
bool operator()(Maxwell3D::Regs::VertexAttribute n) const
{
return n.type == d; return n.type == d;
} }
}; };
@ -1143,36 +1138,36 @@ void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D::Regs& regs) {
if (is_d24 && !device.SupportsD24DepthBuffer()) { if (is_d24 && !device.SupportsD24DepthBuffer()) {
static constexpr const size_t length = sizeof(NEEDS_D24) / sizeof(NEEDS_D24[0]); static constexpr const size_t length = sizeof(NEEDS_D24) / sizeof(NEEDS_D24[0]);
static constexpr const u64 *start = NEEDS_D24; static constexpr const u64* start = NEEDS_D24;
static constexpr const u64 *end = NEEDS_D24 + length; static constexpr const u64* end = NEEDS_D24 + length;
const u64 *it = std::find(start, end, program_id); const u64* it = std::find(start, end, program_id);
if (it != end) { if (it != end) {
// the base formulas can be obtained from here: // the base formulas can be obtained from here:
// https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias // https://docs.microsoft.com/en-us/windows/win32/direct3d11/d3d10-graphics-programming-guide-output-merger-stage-depth-bias
const double rescale_factor = static_cast<double>(1ULL << (32 - 24)) const double rescale_factor =
/ (static_cast<double>(0x1.ep+127)); static_cast<double>(1ULL << (32 - 24)) / (static_cast<double>(0x1.ep+127));
units = static_cast<float>(static_cast<double>(units) * rescale_factor); units = static_cast<float>(static_cast<double>(units) * rescale_factor);
} }
} }
scheduler.Record( scheduler.Record([constant = units, clamp = regs.depth_bias_clamp,
[constant = units, clamp = regs.depth_bias_clamp, factor = regs.slope_scale_depth_bias, this]( factor = regs.slope_scale_depth_bias, this](vk::CommandBuffer cmdbuf) {
vk::CommandBuffer cmdbuf) { if (device.IsExtDepthBiasControlSupported()) {
if (device.IsExtDepthBiasControlSupported()) { static VkDepthBiasRepresentationInfoEXT bias_info{
static VkDepthBiasRepresentationInfoEXT bias_info{ .sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT,
.sType = VK_STRUCTURE_TYPE_DEPTH_BIAS_REPRESENTATION_INFO_EXT, .pNext = nullptr,
.pNext = nullptr, .depthBiasRepresentation =
.depthBiasRepresentation = VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT, VK_DEPTH_BIAS_REPRESENTATION_LEAST_REPRESENTABLE_VALUE_FORCE_UNORM_EXT,
.depthBiasExact = VK_FALSE, .depthBiasExact = VK_FALSE,
}; };
cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info); cmdbuf.SetDepthBias(constant, clamp, factor, &bias_info);
} else { } else {
cmdbuf.SetDepthBias(constant, clamp, factor); cmdbuf.SetDepthBias(constant, clamp, factor);
} }
}); });
} }
void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) { void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D::Regs& regs) {
@ -1354,8 +1349,7 @@ void RasterizerVulkan::UpdateRasterizerDiscardEnable(Tegra::Engines::Maxwell3D::
}); });
} }
void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
{
if (!state_tracker.TouchConservativeRasterizationMode()) { if (!state_tracker.TouchConservativeRasterizationMode()) {
return; return;
} }
@ -1367,8 +1361,7 @@ void RasterizerVulkan::UpdateConservativeRasterizationMode(Tegra::Engines::Maxwe
}); });
} }
void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs& regs) {
{
if (!state_tracker.TouchLineStippleEnable()) { if (!state_tracker.TouchLineStippleEnable()) {
return; return;
} }
@ -1378,19 +1371,7 @@ void RasterizerVulkan::UpdateLineStippleEnable(Tegra::Engines::Maxwell3D::Regs&
}); });
} }
void RasterizerVulkan::UpdateLineStipple(Tegra::Engines::Maxwell3D::Regs& regs) void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs) {
{
if (!state_tracker.TouchLineStipple()) {
return;
}
scheduler.Record([params = regs.line_stipple_params](vk::CommandBuffer cmdbuf) {
cmdbuf.SetLineStippleEXT(params.factor, static_cast<uint16_t>(params.pattern));
});
}
void RasterizerVulkan::UpdateLineRasterizationMode(Tegra::Engines::Maxwell3D::Regs& regs)
{
// if (!state_tracker.TouchLi()) { // if (!state_tracker.TouchLi()) {
// return; // return;
// } // }

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later // SPDX-License-Identifier: GPL-2.0-or-later
@ -257,16 +260,6 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
void Scheduler::AllocateNewContext() { void Scheduler::AllocateNewContext() {
// Enable counters once again. These are disabled when a command buffer is finished. // Enable counters once again. These are disabled when a command buffer is finished.
if (query_cache) {
#if ANDROID
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
query_cache->NotifySegment(true);
}
#else
query_cache->NotifySegment(true);
#endif
}
} }
void Scheduler::InvalidateState() { void Scheduler::InvalidateState() {
@ -276,15 +269,7 @@ void Scheduler::InvalidateState() {
} }
void Scheduler::EndPendingOperations() { void Scheduler::EndPendingOperations() {
#if ANDROID query_cache->CounterReset(VideoCommon::QueryType::ZPassPixelCount64);
if (Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
// query_cache->DisableStreams();
}
#else
// query_cache->DisableStreams();
#endif
query_cache->NotifySegment(false);
EndRenderPass(); EndRenderPass();
} }
@ -292,6 +277,10 @@ void Scheduler::EndRenderPass() {
if (!state.renderpass) { if (!state.renderpass) {
return; return;
} }
query_cache->CounterEnable(VideoCommon::QueryType::ZPassPixelCount64, false);
query_cache->NotifySegment(false);
Record([num_images = num_renderpass_images, images = renderpass_images, Record([num_images = num_renderpass_images, images = renderpass_images,
ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) { ranges = renderpass_image_ranges](vk::CommandBuffer cmdbuf) {
std::array<VkImageMemoryBarrier, 9> barriers; std::array<VkImageMemoryBarrier, 9> barriers;

View file

@ -1470,7 +1470,7 @@ void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src,
if (msaa_copy_pass) { if (msaa_copy_pass) {
return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa); return msaa_copy_pass->CopyImage(dst, src, copies, msaa_to_non_msaa);
} }
UNIMPLEMENTED_MSG("Copying images with different samples is not supported."); LOG_WARNING(Render_Vulkan, "Copying images with different samples is not supported.");
} }
u64 TextureCacheRuntime::GetDeviceLocalMemory() const { u64 TextureCacheRuntime::GetDeviceLocalMemory() const {
@ -1546,54 +1546,94 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
// Handle MSAA upload if necessary // Handle MSAA upload if necessary
/* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly /* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly
since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/ since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/
if (info.num_samples > 1 && runtime->CanUploadMSAA()) { if (info.num_samples > 1) {
// Only use MSAA copy pass for color formats // Create a temporary non-MSAA image to upload the data first
// TODO: Depth/stencil formats need special handling ImageInfo temp_info = info;
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { temp_info.num_samples = 1;
// Create a temporary non-MSAA image to upload the data first // Create image with same usage flags as the target image to avoid validation errors
ImageInfo temp_info = info; VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info);
temp_info.num_samples = 1; temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci);
// Create image with same usage flags as the target image to avoid validation errors auto vk_buffer_image_copies = TransformBufferImageCopies(copies, offset, aspect_mask);
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info);
image_ci.usage = original_image.UsageFlags();
vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci);
// Upload to the temporary non-MSAA image boost::container::small_vector<VkImageBlit, 16> blit_regions;
scheduler->RequestOutsideRenderPassOperationContext(); blit_regions.reserve(copies.size());
auto vk_copies = TransformBufferImageCopies(copies, offset, aspect_mask); for (const auto& copy : copies) {
const VkBuffer src_buffer = buffer; blit_regions.emplace_back(VkImageBlit{
const VkImage temp_vk_image = *temp_image; .srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
const VkImageAspectFlags vk_aspect_mask = aspect_mask; .srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
scheduler->Record([src_buffer, temp_vk_image, vk_aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) { {static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
CopyBufferToImage(cmdbuf, src_buffer, temp_vk_image, vk_aspect_mask, false, vk_copies); static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
.dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
}); });
// Use MSAACopyPass to convert from non-MSAA to MSAA
std::vector<VideoCommon::ImageCopy> image_copies;
for (const auto& copy : copies) {
VideoCommon::ImageCopy image_copy;
image_copy.src_offset = {0, 0, 0}; // Use zero offset for source
image_copy.dst_offset = copy.image_offset;
image_copy.src_subresource = copy.image_subresource;
image_copy.dst_subresource = copy.image_subresource;
image_copy.extent = copy.image_extent;
image_copies.push_back(image_copy);
}
// wrapper image for the temporary image
Image temp_wrapper(*runtime, temp_info, 0, 0);
temp_wrapper.original_image = std::move(temp_image);
temp_wrapper.current_image = &Image::original_image;
temp_wrapper.aspect_mask = aspect_mask;
temp_wrapper.initialized = true;
// Use MSAACopyPass to convert from non-MSAA to MSAA
runtime->msaa_copy_pass->CopyImage(*this, temp_wrapper, image_copies, false);
std::exchange(initialized, true);
return;
} }
// For depth/stencil formats, fall back to regular upload
const VkImage dst_vk_image = Handle();
const bool is_initialized = std::exchange(initialized, true);
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([=, temp_image = std::move(temp_image)](vk::CommandBuffer cmdbuf) {
// Upload to the temporary non-MSAA image
CopyBufferToImage(cmdbuf, buffer, *temp_image, aspect_mask, false,
vk_buffer_image_copies);
// Transition layouts for blit
const VkAccessFlags src_access_mask =
is_initialized
? (VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT)
: VK_ACCESS_NONE;
const std::array<VkImageMemoryBarrier, 2> pre_blit_barriers{
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = *temp_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
},
VkImageMemoryBarrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = src_access_mask,
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout =
is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.image = dst_vk_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
}};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, {}, {}, pre_blit_barriers);
// Blit from temporary to MSAA image
cmdbuf.BlitImage(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_vk_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions,
VK_FILTER_NEAREST);
// Transition destination image to general layout
const VkImageMemoryBarrier post_blit_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
.image = dst_vk_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, post_blit_barrier);
});
} else { } else {
// Regular non-MSAA upload // Regular non-MSAA upload
scheduler->RequestOutsideRenderPassOperationContext(); scheduler->RequestOutsideRenderPassOperationContext();
@ -1604,7 +1644,8 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
const bool is_initialized = std::exchange(initialized, true); const bool is_initialized = std::exchange(initialized, true);
scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized, scheduler->Record([src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies](vk::CommandBuffer cmdbuf) { vk_copies](vk::CommandBuffer cmdbuf) {
CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized, vk_copies); CopyBufferToImage(cmdbuf, src_buffer, vk_image, vk_aspect_mask, is_initialized,
vk_copies);
}); });
} }
@ -1636,102 +1677,100 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
} }
// RE-USE MSAA UPLOAD CODE BUT NOW FOR DOWNLOAD // RE-USE MSAA UPLOAD CODE BUT NOW FOR DOWNLOAD
if (info.num_samples > 1 && runtime->msaa_copy_pass) { if (info.num_samples > 1) {
// TODO: Depth/stencil formats need special handling ImageInfo temp_info = info;
if (aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) { temp_info.num_samples = 1;
ImageInfo temp_info = info;
temp_info.num_samples = 1;
VkImageCreateInfo image_ci = MakeImageCreateInfo(runtime->device, temp_info); VkImageCreateInfo temp_ci = MakeImageCreateInfo(runtime->device, temp_info);
image_ci.usage = original_image.UsageFlags(); temp_ci.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT;
vk::Image temp_image = runtime->memory_allocator.CreateImage(image_ci); vk::Image temp_image = runtime->memory_allocator.CreateImage(temp_ci);
Image temp_wrapper(*runtime, temp_info, 0, 0); boost::container::small_vector<VkImageBlit, 16> blit_regions;
temp_wrapper.original_image = std::move(temp_image); blit_regions.reserve(copies.size());
temp_wrapper.current_image = &Image::original_image; for (const auto& copy : copies) {
temp_wrapper.aspect_mask = aspect_mask; blit_regions.emplace_back(VkImageBlit{
temp_wrapper.initialized = true; .srcSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.srcOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
.dstSubresource = MakeImageSubresourceLayers(copy.image_subresource, aspect_mask),
.dstOffsets = {{copy.image_offset.x, copy.image_offset.y, copy.image_offset.z},
{static_cast<s32>(copy.image_offset.x + copy.image_extent.width),
static_cast<s32>(copy.image_offset.y + copy.image_extent.height),
static_cast<s32>(copy.image_offset.z + copy.image_extent.depth)}},
});
}
std::vector<VideoCommon::ImageCopy> image_copies; boost::container::small_vector<VkBuffer, 8> buffers_vector{};
for (const auto& copy : copies) { boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
VideoCommon::ImageCopy image_copy; vk_copies;
image_copy.src_offset = copy.image_offset; for (size_t index = 0; index < buffers_span.size(); index++) {
image_copy.dst_offset = copy.image_offset; buffers_vector.emplace_back(buffers_span[index]);
image_copy.src_subresource = copy.image_subresource; vk_copies.emplace_back(
image_copy.dst_subresource = copy.image_subresource; TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
image_copy.extent = copy.image_extent; }
image_copies.push_back(image_copy);
}
runtime->msaa_copy_pass->CopyImage(temp_wrapper, *this, image_copies, true); const VkImage src_vk_image = Handle();
boost::container::small_vector<VkBuffer, 8> buffers_vector{}; scheduler->RequestOutsideRenderPassOperationContext();
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> scheduler->Record([=, temp_image = std::move(temp_image),
vk_copies; buffers = std::move(buffers_vector)](vk::CommandBuffer cmdbuf) {
for (size_t index = 0; index < buffers_span.size(); index++) { const std::array<VkImageMemoryBarrier, 2> pre_blit_barriers{
buffers_vector.emplace_back(buffers_span[index]); VkImageMemoryBarrier{
vk_copies.emplace_back(
TransformBufferImageCopies(copies, offsets_span[index], aspect_mask));
}
scheduler->RequestOutsideRenderPassOperationContext();
scheduler->Record([buffers = std::move(buffers_vector), image = *temp_wrapper.original_image,
aspect_mask_ = aspect_mask, vk_copies](vk::CommandBuffer cmdbuf) {
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr, .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
.oldLayout = VK_IMAGE_LAYOUT_GENERAL, .oldLayout = VK_IMAGE_LAYOUT_GENERAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = src_vk_image,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
.image = image, VK_REMAINING_ARRAY_LAYERS},
.subresourceRange{ },
.aspectMask = aspect_mask_, VkImageMemoryBarrier{
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, read_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index],
vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_MEMORY_WRITE_BIT,
.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT,
};
const VkImageMemoryBarrier image_write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = 0, .srcAccessMask = 0,
.dstAccessMask = VK_ACCESS_MEMORY_WRITE_BIT, .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, .oldLayout = VK_IMAGE_LAYOUT_UNDEFINED,
.newLayout = VK_IMAGE_LAYOUT_GENERAL, .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .image = *temp_image,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, .subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
.image = image, VK_REMAINING_ARRAY_LAYERS},
.subresourceRange{ }};
.aspectMask = aspect_mask_, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
.baseMipLevel = 0, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, {}, {}, pre_blit_barriers);
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0, cmdbuf.BlitImage(src_vk_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *temp_image,
.layerCount = VK_REMAINING_ARRAY_LAYERS, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, blit_regions,
}, VK_FILTER_NEAREST);
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, const VkImageMemoryBarrier post_blit_barrier{
0, memory_write_barrier, nullptr, image_write_barrier); .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
}); .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
return; .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
} .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
.image = *temp_image,
.subresourceRange = {aspect_mask, 0, VK_REMAINING_MIP_LEVELS, 0,
VK_REMAINING_ARRAY_LAYERS},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, post_blit_barrier);
for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(*temp_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
buffers[index], vk_copies[index]);
}
const VkMemoryBarrier memory_write_barrier{
.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_HOST_READ_BIT,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0,
memory_write_barrier, {}, {});
});
} else { } else {
boost::container::small_vector<VkBuffer, 8> buffers_vector{}; boost::container::small_vector<VkBuffer, 8> buffers_vector{};
boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8> boost::container::small_vector<boost::container::small_vector<VkBufferImageCopy, 16>, 8>
@ -1762,12 +1801,12 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
.layerCount = VK_REMAINING_ARRAY_LAYERS, .layerCount = VK_REMAINING_ARRAY_LAYERS,
}, },
}; };
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, read_barrier); VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier);
for (size_t index = 0; index < buffers.size(); index++) { for (size_t index = 0; index < buffers.size(); index++) {
cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, buffers[index], cmdbuf.CopyImageToBuffer(image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
vk_copies[index]); buffers[index], vk_copies[index]);
} }
const VkMemoryBarrier memory_write_barrier{ const VkMemoryBarrier memory_write_barrier{
@ -1794,8 +1833,9 @@ void Image::DownloadMemory(std::span<VkBuffer> buffers_span, std::span<size_t> o
.layerCount = VK_REMAINING_ARRAY_LAYERS, .layerCount = VK_REMAINING_ARRAY_LAYERS,
}, },
}; };
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
0, memory_write_barrier, nullptr, image_write_barrier); VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, memory_write_barrier,
nullptr, image_write_barrier);
}); });
} }
@ -1991,6 +2031,11 @@ ImageView::ImageView(TextureCacheRuntime& runtime, const VideoCommon::ImageViewI
std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed); std::ranges::transform(swizzle, swizzle.begin(), ConvertGreenRed);
} }
} }
if ((image.UsageFlags() & VK_IMAGE_USAGE_STORAGE_BIT) != 0) {
swizzle = {SwizzleSource::R, SwizzleSource::G, SwizzleSource::B, SwizzleSource::A};
}
const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format); const auto format_info = MaxwellToVK::SurfaceFormat(*device, FormatType::Optimal, true, format);
const VkImageViewUsageCreateInfo image_view_usage{ const VkImageViewUsageCreateInfo image_view_usage{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO,