diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index 2150d401db..7e44750909 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -253,16 +253,16 @@ @string/scaling_filter_nearest_neighbor @string/scaling_filter_bilinear @string/scaling_filter_bicubic + @string/scaling_filter_zero_tangent + @string/scaling_filter_bspline + @string/scaling_filter_mitchell + @string/scaling_filter_spline1 @string/scaling_filter_gaussian @string/scaling_filter_lanczos @string/scaling_filter_scale_force @string/scaling_filter_fsr @string/scaling_filter_area @string/scaling_filter_mmpx - @string/scaling_filter_zero_tangent - @string/scaling_filter_bspline - @string/scaling_filter_mitchell - @string/scaling_filter_spline1 diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index ccf6f1cfb2..3fcdf08256 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -143,7 +143,7 @@ ENUM(ConfirmStop, Ask_Always, Ask_Based_On_Game, Ask_Never); ENUM(FullscreenMode, Borderless, Exclusive); ENUM(NvdecEmulation, Off, Cpu, Gpu); ENUM(ResolutionSetup, Res1_4X, Res1_2X, Res3_4X, Res1X, Res5_4X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X, Res8X); -ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Gaussian, Lanczos, ScaleForce, Fsr, Area, ZeroTangent, BSpline, Mitchell, Spline1, Mmpx, MaxEnum); +ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, ZeroTangent, BSpline, Mitchell, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, Mmpx, MaxEnum); ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum); ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch); ENUM(ConsoleMode, Handheld, Docked); diff --git a/src/qt_common/shared_translation.cpp b/src/qt_common/shared_translation.cpp index 054d28e8e2..0b40ca9904 100644 --- a/src/qt_common/shared_translation.cpp +++ b/src/qt_common/shared_translation.cpp @@ -549,16 +549,16 @@ std::unique_ptr ComboboxEnumeration(QObject* parent) PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")), PAIR(ScalingFilter, Bilinear, tr("Bilinear")), PAIR(ScalingFilter, Bicubic, tr("Bicubic")), + PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")), + PAIR(ScalingFilter, BSpline, tr("B-Spline")), + PAIR(ScalingFilter, Mitchell, tr("Mitchell")), + PAIR(ScalingFilter, Spline1, tr("Spline-1")), PAIR(ScalingFilter, Gaussian, tr("Gaussian")), PAIR(ScalingFilter, Lanczos, tr("Lanczos")), PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")), PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™️ Super Resolution")), PAIR(ScalingFilter, Area, tr("Area")), PAIR(ScalingFilter, Mmpx, tr("MMPX")), - PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")), - PAIR(ScalingFilter, BSpline, tr("B-Spline")), - PAIR(ScalingFilter, Mitchell, tr("Mitchell")), - PAIR(ScalingFilter, Spline1, tr("Spline-1")), }}); translations->insert({Settings::EnumMetadata::Index(), { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 388c8034c5..b5c8bd1996 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include "common/range_sets.inc" #include "video_core/buffer_cache/buffer_cache_base.h" @@ -19,6 +21,43 @@ namespace VideoCommon { using Core::DEVICE_PAGESIZE; +namespace staging_detail { +template +struct has_flush_range : std::false_type {}; +template +struct has_flush_range< + T, std::void_t().FlushRange(size_t{}, size_t{}))>> : std::true_type {}; + +template +struct has_invalidate_range : std::false_type {}; +template +struct has_invalidate_range< + T, std::void_t().InvalidateRange(size_t{}, size_t{}))>> + : std::true_type {}; +} // namespace staging_detail + +template +inline void StagingFlushRange(Ref& ref, size_t offset, size_t size) { + if constexpr (staging_detail::has_flush_range::value) { + ref.FlushRange(offset, size); + } else { + (void)ref; + (void)offset; + (void)size; + } +} + +template +inline void StagingInvalidateRange(Ref& ref, size_t offset, size_t size) { + if constexpr (staging_detail::has_invalidate_range::value) { + ref.InvalidateRange(offset, size); + } else { + (void)ref; + (void)offset; + (void)size; + } +} + template BufferCache

::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_) : runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} { @@ -633,6 +672,7 @@ void BufferCache

::PopAsyncBuffers() { u8* base = async_buffer->mapped_span.data(); const size_t base_offset = async_buffer->offset; for (const auto& copy : downloads) { + StagingInvalidateRange(*async_buffer, copy.dst_offset, copy.size); const DAddr device_addr = static_cast(copy.src_offset); const u64 dst_offset = copy.dst_offset - base_offset; const u8* read_mapped_memory = base + dst_offset; @@ -696,6 +736,7 @@ void BufferCache

::BindHostIndexBuffer() { {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; std::memcpy(upload_staging.mapped_span.data(), draw_state.inline_index_draw_indexes.data(), size); + StagingFlushRange(upload_staging, upload_staging.offset, size); runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true); } else { buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes); @@ -1519,7 +1560,7 @@ template void BufferCache

::MappedUploadMemory([[maybe_unused]] Buffer& buffer, [[maybe_unused]] u64 total_size_bytes, [[maybe_unused]] std::span copies) { - if constexpr (USE_MEMORY_MAPS) { + if constexpr (USE_MEMORY_MAPS) { auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes); const std::span staging_pointer = upload_staging.mapped_span; for (BufferCopy& copy : copies) { @@ -1530,6 +1571,7 @@ void BufferCache

::MappedUploadMemory([[maybe_unused]] Buffer& buffer, // Apply the staging offset copy.src_offset += upload_staging.offset; } + StagingFlushRange(upload_staging, upload_staging.offset, total_size_bytes); const bool can_reorder = runtime.CanReorderUpload(buffer, copies); runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); } @@ -1572,6 +1614,7 @@ void BufferCache

::InlineMemoryImplementation(DAddr dest_address, size_t copy_ }}; u8* const src_pointer = upload_staging.mapped_span.data(); std::memcpy(src_pointer, inlined_buffer.data(), copy_size); + StagingFlushRange(upload_staging, upload_staging.offset, copy_size); const bool can_reorder = runtime.CanReorderUpload(buffer, copies); runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder); } else { @@ -1626,6 +1669,7 @@ void BufferCache

::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64 } runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true); runtime.Finish(); + StagingInvalidateRange(download_staging, download_staging.offset, total_size_bytes); for (const BufferCopy& copy : copies) { const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset; // Undo the modified offset diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 2c807b9c69..45033ba6d4 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -194,6 +194,7 @@ public: } if (!host_visible) { + staging.FlushRange(staging.offset, static_cast(size_bytes)); scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset, dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) { diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp index 31cc05c2e1..32efe87b28 100644 --- a/src/video_core/renderer_vulkan/vk_query_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp @@ -850,6 +850,10 @@ public: pending_flush_sets.pop_front(); } + const VkDeviceSize read_size = + static_cast(flushed_queries.size() * TFBQueryBank::QUERY_SIZE); + staging_ref.InvalidateRange(staging_ref.offset, read_size); + size_t offset_base = staging_ref.offset; for (auto q : flushed_queries) { auto* query = GetQuery(q); @@ -1577,6 +1581,7 @@ void QueryCacheRuntime::SyncValues(std::span values, VkBuffer ba values[i].size); accumulated_size += values[i].size; } + ref.FlushRange(ref.offset, static_cast(accumulated_size)); src_buffer = ref.buffer; } else { for (size_t i = 0; i < values.size(); i++) { diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 49a0f33805..88da60b190 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -5,6 +5,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include +#include #include #include @@ -187,9 +188,12 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { .buffer = *stream_buffer, .offset = static_cast(offset), .mapped_span = stream_pointer.subspan(offset, size), - .usage{}, - .log2_level{}, - .index{}, + .usage = MemoryUsage::Upload, + .log2_level = 0, + .index = 0, + .owner = &stream_buffer, + .atom_size = non_coherent_atom_size, + .is_coherent = stream_is_coherent, }; } @@ -301,15 +305,19 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage ++buffer_index; buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str()); } + const bool is_coherent = buffer.IsHostCoherent(); const std::span mapped_span = buffer.Mapped(); + auto buffer_ptr = std::make_unique(std::move(buffer)); StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{ - .buffer = std::move(buffer), + .buffer = std::move(buffer_ptr), .mapped_span = mapped_span, .usage = usage, .log2_level = log2, .index = unique_ids++, .tick = deferred ? (std::numeric_limits::max)() : scheduler.CurrentTick(), .deferred = deferred, + .is_coherent = is_coherent, + .atom_size = is_coherent ? 1 : non_coherent_atom_size, }); return entry.Ref(); } diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h index 9284578975..8bd325c51f 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h @@ -8,9 +8,11 @@ #include #include +#include #include #include "common/common_types.h" +#include "common/alignment.h" #include "video_core/vulkan_common/vulkan_memory_allocator.h" #include "video_core/vulkan_common/vulkan_wrapper.h" @@ -27,6 +29,47 @@ struct StagingBufferRef { MemoryUsage usage; u32 log2_level; u64 index; + const vk::Buffer* owner = nullptr; + VkDeviceSize atom_size = 1; + bool is_coherent = true; + + void FlushRange(VkDeviceSize range_offset, VkDeviceSize size) const { + if (!owner || is_coherent || size == 0) { + return; + } + if (size == VK_WHOLE_SIZE) { + owner->FlushRange(range_offset, size); + return; + } + const VkDeviceSize atom = atom_size ? atom_size : 1; + const VkDeviceSize range_end = range_offset + size; + if (range_end < range_offset) { + owner->FlushRange(range_offset, size); + return; + } + const VkDeviceSize aligned_begin = Common::AlignDown(range_offset, atom); + const VkDeviceSize aligned_end = Common::AlignUp(range_end, atom); + owner->FlushRange(aligned_begin, aligned_end - aligned_begin); + } + + void InvalidateRange(VkDeviceSize range_offset, VkDeviceSize size) const { + if (!owner || is_coherent || size == 0) { + return; + } + if (size == VK_WHOLE_SIZE) { + owner->InvalidateRange(range_offset, size); + return; + } + const VkDeviceSize atom = atom_size ? atom_size : 1; + const VkDeviceSize range_end = range_offset + size; + if (range_end < range_offset) { + owner->InvalidateRange(range_offset, size); + return; + } + const VkDeviceSize aligned_begin = Common::AlignDown(range_offset, atom); + const VkDeviceSize aligned_end = Common::AlignUp(range_end, atom); + owner->InvalidateRange(aligned_begin, aligned_end - aligned_begin); + } }; class StagingBufferPool { @@ -55,22 +98,27 @@ private: }; struct StagingBuffer { - vk::Buffer buffer; + std::unique_ptr buffer; std::span mapped_span; MemoryUsage usage; u32 log2_level; u64 index; u64 tick = 0; bool deferred{}; + bool is_coherent = true; + VkDeviceSize atom_size = 1; StagingBufferRef Ref() const noexcept { return { - .buffer = *buffer, + .buffer = buffer ? **buffer : VkBuffer{}, .offset = 0, .mapped_span = mapped_span, .usage = usage, .log2_level = log2_level, .index = index, + .owner = buffer.get(), + .atom_size = atom_size, + .is_coherent = is_coherent, }; } }; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index e5d559b591..d855c5f16b 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -7,6 +7,8 @@ #pragma once #include +#include +#include #include #include "common/alignment.h" @@ -30,6 +32,42 @@ using VideoCore::Surface::PixelFormat; using VideoCore::Surface::SurfaceType; using namespace Common::Literals; +namespace staging_detail { +template +struct has_flush_range : std::false_type {}; +template +struct has_flush_range< + T, std::void_t().FlushRange(size_t{}, size_t{}))>> : std::true_type {}; +template +struct has_invalidate_range : std::false_type {}; +template +struct has_invalidate_range< + T, std::void_t().InvalidateRange(size_t{}, size_t{}))>> + : std::true_type {}; +} // namespace staging_detail + +template +inline void StagingFlushRange(Ref& ref, size_t offset, size_t size) { + if constexpr (staging_detail::has_flush_range::value) { + ref.FlushRange(offset, size); + } else { + (void)ref; + (void)offset; + (void)size; + } +} + +template +inline void StagingInvalidateRange(Ref& ref, size_t offset, size_t size) { + if constexpr (staging_detail::has_invalidate_range::value) { + ref.InvalidateRange(offset, size); + } else { + (void)ref; + (void)offset; + (void)size; + } +} + template TextureCache

::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_) : runtime{runtime_}, device_memory{device_memory_} { @@ -111,6 +149,7 @@ void TextureCache

::RunGarbageCollector() { const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(map, copies); runtime.Finish(); + StagingInvalidateRange(map, map.offset, image.unswizzled_size_bytes); SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, swizzle_data_buffer); } @@ -567,6 +606,7 @@ void TextureCache

::DownloadMemory(DAddr cpu_addr, size_t size) { const auto copies = FullDownloadCopies(image.info); image.DownloadMemory(map, copies); runtime.Finish(); + StagingInvalidateRange(map, map.offset, image.unswizzled_size_bytes); SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span, swizzle_data_buffer); } @@ -863,13 +903,17 @@ void TextureCache

::PopAsyncFlushes() { if (download_info.is_swizzle) { const ImageBase& image = slot_images[download_info.object_id]; const auto copies = FullDownloadCopies(image.info); - download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64); + const size_t aligned_size = + Common::AlignUp(image.unswizzled_size_bytes, static_cast(64)); + download_buffer.offset -= aligned_size; + StagingInvalidateRange(download_buffer, download_buffer.offset, aligned_size); std::span download_span = download_buffer.mapped_span.subspan(download_buffer.offset); SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span, swizzle_data_buffer); } else { const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id]; + StagingInvalidateRange(download_buffer, download_buffer.offset, buffer_info.size); std::span download_span = download_buffer.mapped_span.subspan(download_buffer.offset); gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(), @@ -907,6 +951,7 @@ void TextureCache

::PopAsyncFlushes() { } // Wait for downloads to finish runtime.Finish(); + StagingInvalidateRange(download_map, original_offset, total_size_bytes); download_map.offset = original_offset; std::span download_span = download_map.mapped_span; for (const PendingDownload& download_info : download_ids) { @@ -1081,6 +1126,7 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) if (True(image.flags & ImageFlagBits::AcceleratedUpload)) { gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(), VideoCommon::CacheType::NoTextureCache); + StagingFlushRange(staging, staging.offset, mapped_span.size_bytes()); const auto uploads = FullUploadSwizzles(image.info); runtime.AccelerateImageUpload(image, staging, uploads); return; @@ -1094,10 +1140,12 @@ void TextureCache

::UploadImageContents(Image& image, StagingBuffer& staging) auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer); ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies); + StagingFlushRange(staging, staging.offset, mapped_span.size_bytes()); image.UploadMemory(staging, copies); } else { const auto copies = UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span); + StagingFlushRange(staging, staging.offset, mapped_span.size_bytes()); image.UploadMemory(staging, copies); } } @@ -1329,6 +1377,7 @@ void TextureCache

::TickAsyncDecode() { auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image)); std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(), async_decode->decoded_data.size()); + StagingFlushRange(staging, staging.offset, async_decode->decoded_data.size()); image.UploadMemory(staging, async_decode->copies); image.flags &= ~ImageFlagBits::IsDecoding; has_uploads = true; diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp index f8fbc0c206..77534776cf 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.cpp +++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp @@ -509,8 +509,12 @@ void Buffer::FlushRange(VkDeviceSize offset, VkDeviceSize size) const { } void Buffer::Invalidate() const { + InvalidateRange(0, VK_WHOLE_SIZE); +} + +void Buffer::InvalidateRange(VkDeviceSize offset, VkDeviceSize size) const { if (!is_coherent) { - vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE); + vmaInvalidateAllocation(allocator, allocation, offset, size); } } diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h index 625dc32fa8..7541a08e7f 100644 --- a/src/video_core/vulkan_common/vulkan_wrapper.h +++ b/src/video_core/vulkan_common/vulkan_wrapper.h @@ -783,6 +783,8 @@ public: void Invalidate() const; + void InvalidateRange(VkDeviceSize offset, VkDeviceSize size) const; + void SetObjectNameEXT(const char* name) const; private: