Compare commits

..

2 commits

Author SHA1 Message Date
Ribbit
daa5f94915 emergency fix
Some checks failed
eden-license / license-header (pull_request) Failing after 24s
2025-10-08 20:09:32 -07:00
Ribbit
17556bf64b [vk] Fix Vulkan streaming ring alignment and flushes
Some checks failed
eden-license / license-header (pull_request) Failing after 23s
2025-10-08 18:38:43 -07:00
11 changed files with 179 additions and 18 deletions

View file

@ -253,16 +253,16 @@
<item>@string/scaling_filter_nearest_neighbor</item> <item>@string/scaling_filter_nearest_neighbor</item>
<item>@string/scaling_filter_bilinear</item> <item>@string/scaling_filter_bilinear</item>
<item>@string/scaling_filter_bicubic</item> <item>@string/scaling_filter_bicubic</item>
<item>@string/scaling_filter_zero_tangent</item>
<item>@string/scaling_filter_bspline</item>
<item>@string/scaling_filter_mitchell</item>
<item>@string/scaling_filter_spline1</item>
<item>@string/scaling_filter_gaussian</item> <item>@string/scaling_filter_gaussian</item>
<item>@string/scaling_filter_lanczos</item> <item>@string/scaling_filter_lanczos</item>
<item>@string/scaling_filter_scale_force</item> <item>@string/scaling_filter_scale_force</item>
<item>@string/scaling_filter_fsr</item> <item>@string/scaling_filter_fsr</item>
<item>@string/scaling_filter_area</item> <item>@string/scaling_filter_area</item>
<item>@string/scaling_filter_mmpx</item> <item>@string/scaling_filter_mmpx</item>
<item>@string/scaling_filter_zero_tangent</item>
<item>@string/scaling_filter_bspline</item>
<item>@string/scaling_filter_mitchell</item>
<item>@string/scaling_filter_spline1</item>
</string-array> </string-array>
<integer-array name="rendererScalingFilterValues"> <integer-array name="rendererScalingFilterValues">

View file

@ -143,7 +143,7 @@ ENUM(ConfirmStop, Ask_Always, Ask_Based_On_Game, Ask_Never);
ENUM(FullscreenMode, Borderless, Exclusive); ENUM(FullscreenMode, Borderless, Exclusive);
ENUM(NvdecEmulation, Off, Cpu, Gpu); ENUM(NvdecEmulation, Off, Cpu, Gpu);
ENUM(ResolutionSetup, Res1_4X, Res1_2X, Res3_4X, Res1X, Res5_4X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X, Res8X); ENUM(ResolutionSetup, Res1_4X, Res1_2X, Res3_4X, Res1X, Res5_4X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X, Res8X);
ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Gaussian, Lanczos, ScaleForce, Fsr, Area, ZeroTangent, BSpline, Mitchell, Spline1, Mmpx, MaxEnum); ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, ZeroTangent, BSpline, Mitchell, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, Mmpx, MaxEnum);
ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum); ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum);
ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch); ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch);
ENUM(ConsoleMode, Handheld, Docked); ENUM(ConsoleMode, Handheld, Docked);

View file

@ -549,16 +549,16 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QObject* parent)
PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")), PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")),
PAIR(ScalingFilter, Bilinear, tr("Bilinear")), PAIR(ScalingFilter, Bilinear, tr("Bilinear")),
PAIR(ScalingFilter, Bicubic, tr("Bicubic")), PAIR(ScalingFilter, Bicubic, tr("Bicubic")),
PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")),
PAIR(ScalingFilter, BSpline, tr("B-Spline")),
PAIR(ScalingFilter, Mitchell, tr("Mitchell")),
PAIR(ScalingFilter, Spline1, tr("Spline-1")),
PAIR(ScalingFilter, Gaussian, tr("Gaussian")), PAIR(ScalingFilter, Gaussian, tr("Gaussian")),
PAIR(ScalingFilter, Lanczos, tr("Lanczos")), PAIR(ScalingFilter, Lanczos, tr("Lanczos")),
PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")), PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")),
PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™ Super Resolution")), PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™ Super Resolution")),
PAIR(ScalingFilter, Area, tr("Area")), PAIR(ScalingFilter, Area, tr("Area")),
PAIR(ScalingFilter, Mmpx, tr("MMPX")), PAIR(ScalingFilter, Mmpx, tr("MMPX")),
PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")),
PAIR(ScalingFilter, BSpline, tr("B-Spline")),
PAIR(ScalingFilter, Mitchell, tr("Mitchell")),
PAIR(ScalingFilter, Spline1, tr("Spline-1")),
}}); }});
translations->insert({Settings::EnumMetadata<Settings::AntiAliasing>::Index(), translations->insert({Settings::EnumMetadata<Settings::AntiAliasing>::Index(),
{ {

View file

@ -9,6 +9,8 @@
#include <algorithm> #include <algorithm>
#include <memory> #include <memory>
#include <numeric> #include <numeric>
#include <type_traits>
#include <utility>
#include "common/range_sets.inc" #include "common/range_sets.inc"
#include "video_core/buffer_cache/buffer_cache_base.h" #include "video_core/buffer_cache/buffer_cache_base.h"
@ -19,6 +21,43 @@ namespace VideoCommon {
using Core::DEVICE_PAGESIZE; using Core::DEVICE_PAGESIZE;
namespace staging_detail {
template <typename T, typename = void>
struct has_flush_range : std::false_type {};
template <typename T>
struct has_flush_range<
T, std::void_t<decltype(std::declval<T&>().FlushRange(size_t{}, size_t{}))>> : std::true_type {};
template <typename T, typename = void>
struct has_invalidate_range : std::false_type {};
template <typename T>
struct has_invalidate_range<
T, std::void_t<decltype(std::declval<T&>().InvalidateRange(size_t{}, size_t{}))>>
: std::true_type {};
} // namespace staging_detail
template <typename Ref>
inline void StagingFlushRange(Ref& ref, size_t offset, size_t size) {
if constexpr (staging_detail::has_flush_range<Ref>::value) {
ref.FlushRange(offset, size);
} else {
(void)ref;
(void)offset;
(void)size;
}
}
template <typename Ref>
inline void StagingInvalidateRange(Ref& ref, size_t offset, size_t size) {
if constexpr (staging_detail::has_invalidate_range<Ref>::value) {
ref.InvalidateRange(offset, size);
} else {
(void)ref;
(void)offset;
(void)size;
}
}
template <class P> template <class P>
BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_) BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_)
: runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
@ -633,6 +672,7 @@ void BufferCache<P>::PopAsyncBuffers() {
u8* base = async_buffer->mapped_span.data(); u8* base = async_buffer->mapped_span.data();
const size_t base_offset = async_buffer->offset; const size_t base_offset = async_buffer->offset;
for (const auto& copy : downloads) { for (const auto& copy : downloads) {
StagingInvalidateRange(*async_buffer, copy.dst_offset, copy.size);
const DAddr device_addr = static_cast<DAddr>(copy.src_offset); const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
const u64 dst_offset = copy.dst_offset - base_offset; const u64 dst_offset = copy.dst_offset - base_offset;
const u8* read_mapped_memory = base + dst_offset; const u8* read_mapped_memory = base + dst_offset;
@ -696,6 +736,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
std::memcpy(upload_staging.mapped_span.data(), std::memcpy(upload_staging.mapped_span.data(),
draw_state.inline_index_draw_indexes.data(), size); draw_state.inline_index_draw_indexes.data(), size);
StagingFlushRange(upload_staging, upload_staging.offset, size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
} else { } else {
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
@ -1519,7 +1560,7 @@ template <class P>
void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer, void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 total_size_bytes, [[maybe_unused]] u64 total_size_bytes,
[[maybe_unused]] std::span<BufferCopy> copies) { [[maybe_unused]] std::span<BufferCopy> copies) {
if constexpr (USE_MEMORY_MAPS) { if constexpr (USE_MEMORY_MAPS) {
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
const std::span<u8> staging_pointer = upload_staging.mapped_span; const std::span<u8> staging_pointer = upload_staging.mapped_span;
for (BufferCopy& copy : copies) { for (BufferCopy& copy : copies) {
@ -1530,6 +1571,7 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
// Apply the staging offset // Apply the staging offset
copy.src_offset += upload_staging.offset; copy.src_offset += upload_staging.offset;
} }
StagingFlushRange(upload_staging, upload_staging.offset, total_size_bytes);
const bool can_reorder = runtime.CanReorderUpload(buffer, copies); const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} }
@ -1572,6 +1614,7 @@ void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_
}}; }};
u8* const src_pointer = upload_staging.mapped_span.data(); u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size); std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
StagingFlushRange(upload_staging, upload_staging.offset, copy_size);
const bool can_reorder = runtime.CanReorderUpload(buffer, copies); const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} else { } else {
@ -1626,6 +1669,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
} }
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
runtime.Finish(); runtime.Finish();
StagingInvalidateRange(download_staging, download_staging.offset, total_size_bytes);
for (const BufferCopy& copy : copies) { for (const BufferCopy& copy : copies) {
const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
// Undo the modified offset // Undo the modified offset

View file

@ -194,6 +194,7 @@ public:
} }
if (!host_visible) { if (!host_visible) {
staging.FlushRange(staging.offset, static_cast<VkDeviceSize>(size_bytes));
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {

View file

@ -850,6 +850,10 @@ public:
pending_flush_sets.pop_front(); pending_flush_sets.pop_front();
} }
const VkDeviceSize read_size =
static_cast<VkDeviceSize>(flushed_queries.size() * TFBQueryBank::QUERY_SIZE);
staging_ref.InvalidateRange(staging_ref.offset, read_size);
size_t offset_base = staging_ref.offset; size_t offset_base = staging_ref.offset;
for (auto q : flushed_queries) { for (auto q : flushed_queries) {
auto* query = GetQuery(q); auto* query = GetQuery(q);
@ -1577,6 +1581,7 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba
values[i].size); values[i].size);
accumulated_size += values[i].size; accumulated_size += values[i].size;
} }
ref.FlushRange(ref.offset, static_cast<VkDeviceSize>(accumulated_size));
src_buffer = ref.buffer; src_buffer = ref.buffer;
} else { } else {
for (size_t i = 0; i < values.size(); i++) { for (size_t i = 0; i < values.size(); i++) {

View file

@ -5,6 +5,7 @@
// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm> #include <algorithm>
#include <memory>
#include <utility> #include <utility>
#include <vector> #include <vector>
@ -187,9 +188,12 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
.buffer = *stream_buffer, .buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset), .offset = static_cast<VkDeviceSize>(offset),
.mapped_span = stream_pointer.subspan(offset, size), .mapped_span = stream_pointer.subspan(offset, size),
.usage{}, .usage = MemoryUsage::Upload,
.log2_level{}, .log2_level = 0,
.index{}, .index = 0,
.owner = &stream_buffer,
.atom_size = non_coherent_atom_size,
.is_coherent = stream_is_coherent,
}; };
} }
@ -301,15 +305,19 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
++buffer_index; ++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
} }
const bool is_coherent = buffer.IsHostCoherent();
const std::span<u8> mapped_span = buffer.Mapped(); const std::span<u8> mapped_span = buffer.Mapped();
auto buffer_ptr = std::make_unique<vk::Buffer>(std::move(buffer));
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{ StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer), .buffer = std::move(buffer_ptr),
.mapped_span = mapped_span, .mapped_span = mapped_span,
.usage = usage, .usage = usage,
.log2_level = log2, .log2_level = log2,
.index = unique_ids++, .index = unique_ids++,
.tick = deferred ? (std::numeric_limits<u64>::max)() : scheduler.CurrentTick(), .tick = deferred ? (std::numeric_limits<u64>::max)() : scheduler.CurrentTick(),
.deferred = deferred, .deferred = deferred,
.is_coherent = is_coherent,
.atom_size = is_coherent ? 1 : non_coherent_atom_size,
}); });
return entry.Ref(); return entry.Ref();
} }

View file

@ -8,9 +8,11 @@
#include <climits> #include <climits>
#include <mutex> #include <mutex>
#include <memory>
#include <vector> #include <vector>
#include "common/common_types.h" #include "common/common_types.h"
#include "common/alignment.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h" #include "video_core/vulkan_common/vulkan_wrapper.h"
@ -27,6 +29,47 @@ struct StagingBufferRef {
MemoryUsage usage; MemoryUsage usage;
u32 log2_level; u32 log2_level;
u64 index; u64 index;
const vk::Buffer* owner = nullptr;
VkDeviceSize atom_size = 1;
bool is_coherent = true;
void FlushRange(VkDeviceSize range_offset, VkDeviceSize size) const {
if (!owner || is_coherent || size == 0) {
return;
}
if (size == VK_WHOLE_SIZE) {
owner->FlushRange(range_offset, size);
return;
}
const VkDeviceSize atom = atom_size ? atom_size : 1;
const VkDeviceSize range_end = range_offset + size;
if (range_end < range_offset) {
owner->FlushRange(range_offset, size);
return;
}
const VkDeviceSize aligned_begin = Common::AlignDown(range_offset, atom);
const VkDeviceSize aligned_end = Common::AlignUp(range_end, atom);
owner->FlushRange(aligned_begin, aligned_end - aligned_begin);
}
void InvalidateRange(VkDeviceSize range_offset, VkDeviceSize size) const {
if (!owner || is_coherent || size == 0) {
return;
}
if (size == VK_WHOLE_SIZE) {
owner->InvalidateRange(range_offset, size);
return;
}
const VkDeviceSize atom = atom_size ? atom_size : 1;
const VkDeviceSize range_end = range_offset + size;
if (range_end < range_offset) {
owner->InvalidateRange(range_offset, size);
return;
}
const VkDeviceSize aligned_begin = Common::AlignDown(range_offset, atom);
const VkDeviceSize aligned_end = Common::AlignUp(range_end, atom);
owner->InvalidateRange(aligned_begin, aligned_end - aligned_begin);
}
}; };
class StagingBufferPool { class StagingBufferPool {
@ -55,22 +98,27 @@ private:
}; };
struct StagingBuffer { struct StagingBuffer {
vk::Buffer buffer; std::unique_ptr<vk::Buffer> buffer;
std::span<u8> mapped_span; std::span<u8> mapped_span;
MemoryUsage usage; MemoryUsage usage;
u32 log2_level; u32 log2_level;
u64 index; u64 index;
u64 tick = 0; u64 tick = 0;
bool deferred{}; bool deferred{};
bool is_coherent = true;
VkDeviceSize atom_size = 1;
StagingBufferRef Ref() const noexcept { StagingBufferRef Ref() const noexcept {
return { return {
.buffer = *buffer, .buffer = buffer ? **buffer : VkBuffer{},
.offset = 0, .offset = 0,
.mapped_span = mapped_span, .mapped_span = mapped_span,
.usage = usage, .usage = usage,
.log2_level = log2_level, .log2_level = log2_level,
.index = index, .index = index,
.owner = buffer.get(),
.atom_size = atom_size,
.is_coherent = is_coherent,
}; };
} }
}; };

View file

@ -7,6 +7,8 @@
#pragma once #pragma once
#include <unordered_set> #include <unordered_set>
#include <type_traits>
#include <utility>
#include <boost/container/small_vector.hpp> #include <boost/container/small_vector.hpp>
#include "common/alignment.h" #include "common/alignment.h"
@ -30,6 +32,42 @@ using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType; using VideoCore::Surface::SurfaceType;
using namespace Common::Literals; using namespace Common::Literals;
namespace staging_detail {
template <typename T, typename = void>
struct has_flush_range : std::false_type {};
template <typename T>
struct has_flush_range<
T, std::void_t<decltype(std::declval<T&>().FlushRange(size_t{}, size_t{}))>> : std::true_type {};
template <typename T, typename = void>
struct has_invalidate_range : std::false_type {};
template <typename T>
struct has_invalidate_range<
T, std::void_t<decltype(std::declval<T&>().InvalidateRange(size_t{}, size_t{}))>>
: std::true_type {};
} // namespace staging_detail
template <typename Ref>
inline void StagingFlushRange(Ref& ref, size_t offset, size_t size) {
if constexpr (staging_detail::has_flush_range<Ref>::value) {
ref.FlushRange(offset, size);
} else {
(void)ref;
(void)offset;
(void)size;
}
}
template <typename Ref>
inline void StagingInvalidateRange(Ref& ref, size_t offset, size_t size) {
if constexpr (staging_detail::has_invalidate_range<Ref>::value) {
ref.InvalidateRange(offset, size);
} else {
(void)ref;
(void)offset;
(void)size;
}
}
template <class P> template <class P>
TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_) TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
: runtime{runtime_}, device_memory{device_memory_} { : runtime{runtime_}, device_memory{device_memory_} {
@ -111,6 +149,7 @@ void TextureCache<P>::RunGarbageCollector() {
const auto copies = FullDownloadCopies(image.info); const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies); image.DownloadMemory(map, copies);
runtime.Finish(); runtime.Finish();
StagingInvalidateRange(map, map.offset, image.unswizzled_size_bytes);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer); swizzle_data_buffer);
} }
@ -567,6 +606,7 @@ void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
const auto copies = FullDownloadCopies(image.info); const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies); image.DownloadMemory(map, copies);
runtime.Finish(); runtime.Finish();
StagingInvalidateRange(map, map.offset, image.unswizzled_size_bytes);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer); swizzle_data_buffer);
} }
@ -863,13 +903,17 @@ void TextureCache<P>::PopAsyncFlushes() {
if (download_info.is_swizzle) { if (download_info.is_swizzle) {
const ImageBase& image = slot_images[download_info.object_id]; const ImageBase& image = slot_images[download_info.object_id];
const auto copies = FullDownloadCopies(image.info); const auto copies = FullDownloadCopies(image.info);
download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); const size_t aligned_size =
Common::AlignUp(image.unswizzled_size_bytes, static_cast<size_t>(64));
download_buffer.offset -= aligned_size;
StagingInvalidateRange(download_buffer, download_buffer.offset, aligned_size);
std::span<u8> download_span = std::span<u8> download_span =
download_buffer.mapped_span.subspan(download_buffer.offset); download_buffer.mapped_span.subspan(download_buffer.offset);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer); swizzle_data_buffer);
} else { } else {
const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
StagingInvalidateRange(download_buffer, download_buffer.offset, buffer_info.size);
std::span<u8> download_span = std::span<u8> download_span =
download_buffer.mapped_span.subspan(download_buffer.offset); download_buffer.mapped_span.subspan(download_buffer.offset);
gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
@ -907,6 +951,7 @@ void TextureCache<P>::PopAsyncFlushes() {
} }
// Wait for downloads to finish // Wait for downloads to finish
runtime.Finish(); runtime.Finish();
StagingInvalidateRange(download_map, original_offset, total_size_bytes);
download_map.offset = original_offset; download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span; std::span<u8> download_span = download_map.mapped_span;
for (const PendingDownload& download_info : download_ids) { for (const PendingDownload& download_info : download_ids) {
@ -1081,6 +1126,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(), gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(),
VideoCommon::CacheType::NoTextureCache); VideoCommon::CacheType::NoTextureCache);
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
const auto uploads = FullUploadSwizzles(image.info); const auto uploads = FullUploadSwizzles(image.info);
runtime.AccelerateImageUpload(image, staging, uploads); runtime.AccelerateImageUpload(image, staging, uploads);
return; return;
@ -1094,10 +1140,12 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
auto copies = auto copies =
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer);
ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
image.UploadMemory(staging, copies); image.UploadMemory(staging, copies);
} else { } else {
const auto copies = const auto copies =
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span);
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
image.UploadMemory(staging, copies); image.UploadMemory(staging, copies);
} }
} }
@ -1329,6 +1377,7 @@ void TextureCache<P>::TickAsyncDecode() {
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(), std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(),
async_decode->decoded_data.size()); async_decode->decoded_data.size());
StagingFlushRange(staging, staging.offset, async_decode->decoded_data.size());
image.UploadMemory(staging, async_decode->copies); image.UploadMemory(staging, async_decode->copies);
image.flags &= ~ImageFlagBits::IsDecoding; image.flags &= ~ImageFlagBits::IsDecoding;
has_uploads = true; has_uploads = true;

View file

@ -509,8 +509,12 @@ void Buffer::FlushRange(VkDeviceSize offset, VkDeviceSize size) const {
} }
void Buffer::Invalidate() const { void Buffer::Invalidate() const {
InvalidateRange(0, VK_WHOLE_SIZE);
}
void Buffer::InvalidateRange(VkDeviceSize offset, VkDeviceSize size) const {
if (!is_coherent) { if (!is_coherent) {
vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE); vmaInvalidateAllocation(allocator, allocation, offset, size);
} }
} }

View file

@ -783,6 +783,8 @@ public:
void Invalidate() const; void Invalidate() const;
void InvalidateRange(VkDeviceSize offset, VkDeviceSize size) const;
void SetObjectNameEXT(const char* name) const; void SetObjectNameEXT(const char* name) const;
private: private: