diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml
index 7e44750909..2150d401db 100644
--- a/src/android/app/src/main/res/values/arrays.xml
+++ b/src/android/app/src/main/res/values/arrays.xml
@@ -253,16 +253,16 @@
- @string/scaling_filter_nearest_neighbor
- @string/scaling_filter_bilinear
- @string/scaling_filter_bicubic
- - @string/scaling_filter_zero_tangent
- - @string/scaling_filter_bspline
- - @string/scaling_filter_mitchell
- - @string/scaling_filter_spline1
- @string/scaling_filter_gaussian
- @string/scaling_filter_lanczos
- @string/scaling_filter_scale_force
- @string/scaling_filter_fsr
- @string/scaling_filter_area
- @string/scaling_filter_mmpx
+ - @string/scaling_filter_zero_tangent
+ - @string/scaling_filter_bspline
+ - @string/scaling_filter_mitchell
+ - @string/scaling_filter_spline1
diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h
index 3fcdf08256..ccf6f1cfb2 100644
--- a/src/common/settings_enums.h
+++ b/src/common/settings_enums.h
@@ -143,7 +143,7 @@ ENUM(ConfirmStop, Ask_Always, Ask_Based_On_Game, Ask_Never);
ENUM(FullscreenMode, Borderless, Exclusive);
ENUM(NvdecEmulation, Off, Cpu, Gpu);
ENUM(ResolutionSetup, Res1_4X, Res1_2X, Res3_4X, Res1X, Res5_4X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X, Res8X);
-ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, ZeroTangent, BSpline, Mitchell, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, Mmpx, MaxEnum);
+ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Gaussian, Lanczos, ScaleForce, Fsr, Area, ZeroTangent, BSpline, Mitchell, Spline1, Mmpx, MaxEnum);
ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum);
ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch);
ENUM(ConsoleMode, Handheld, Docked);
diff --git a/src/qt_common/shared_translation.cpp b/src/qt_common/shared_translation.cpp
index 0b40ca9904..054d28e8e2 100644
--- a/src/qt_common/shared_translation.cpp
+++ b/src/qt_common/shared_translation.cpp
@@ -549,16 +549,16 @@ std::unique_ptr ComboboxEnumeration(QObject* parent)
PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")),
PAIR(ScalingFilter, Bilinear, tr("Bilinear")),
PAIR(ScalingFilter, Bicubic, tr("Bicubic")),
- PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")),
- PAIR(ScalingFilter, BSpline, tr("B-Spline")),
- PAIR(ScalingFilter, Mitchell, tr("Mitchell")),
- PAIR(ScalingFilter, Spline1, tr("Spline-1")),
PAIR(ScalingFilter, Gaussian, tr("Gaussian")),
PAIR(ScalingFilter, Lanczos, tr("Lanczos")),
PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")),
PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™️ Super Resolution")),
PAIR(ScalingFilter, Area, tr("Area")),
PAIR(ScalingFilter, Mmpx, tr("MMPX")),
+ PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")),
+ PAIR(ScalingFilter, BSpline, tr("B-Spline")),
+ PAIR(ScalingFilter, Mitchell, tr("Mitchell")),
+ PAIR(ScalingFilter, Spline1, tr("Spline-1")),
}});
translations->insert({Settings::EnumMetadata::Index(),
{
diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.cpp b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
index e6e72cdca7..3ef606c4d2 100644
--- a/src/video_core/renderer_vulkan/renderer_vulkan.cpp
+++ b/src/video_core/renderer_vulkan/renderer_vulkan.cpp
@@ -166,7 +166,7 @@ try
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
turbo_mode.emplace(instance, dld);
- scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
+ scheduler.AddOnSubmit([this] { turbo_mode->QueueSubmitted(); });
}
Report();
@@ -176,7 +176,7 @@ try
}
RendererVulkan::~RendererVulkan() {
- scheduler.RegisterOnSubmit([] {});
+ scheduler.RegisterOnSubmit(std::function{});
void(device.GetLogical().WaitIdle());
}
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
index 55565e3d79..2c807b9c69 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp
@@ -179,6 +179,11 @@ public:
}();
u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
+#ifdef YUZU_DEBUG
+ if (!host_visible) {
+ ASSERT(staging.mapped_span.size() >= size_bytes);
+ }
+#endif
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
@@ -514,6 +519,10 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
ReserveNullBuffer();
vk_buffer = *null_buffer;
}
+#ifdef YUZU_DEBUG
+ const size_t bytes_per_index = BytesPerIndex(vk_index_type);
+ ASSERT(bytes_per_index == 0 || (vk_offset % bytes_per_index) == 0);
+#endif
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
});
diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h
index efe960258c..784e1f7c5c 100644
--- a/src/video_core/renderer_vulkan/vk_buffer_cache.h
+++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h
@@ -128,20 +128,37 @@ public:
[[maybe_unused]] u32 binding_index, u32 size) {
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
BindBuffer(ref.buffer, static_cast(ref.offset), size);
+#ifdef YUZU_DEBUG
+ ASSERT(ref.mapped_span.size() >= size);
+ const VkDeviceSize ubo_align = device.GetUniformBufferAlignment();
+ ASSERT(ubo_align == 0 || (ref.offset % ubo_align) == 0);
+#endif
return ref.mapped_span;
}
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
+#ifdef YUZU_DEBUG
+ const VkDeviceSize ubo_align = device.GetUniformBufferAlignment();
+ ASSERT(ubo_align == 0 || (offset % ubo_align) == 0);
+#endif
BindBuffer(buffer, offset, size);
}
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
[[maybe_unused]] bool is_written) {
+#ifdef YUZU_DEBUG
+ const VkDeviceSize ssbo_align = device.GetStorageBufferAlignment();
+ ASSERT(ssbo_align == 0 || (offset % ssbo_align) == 0);
+#endif
BindBuffer(buffer, offset, size);
}
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format) {
+#ifdef YUZU_DEBUG
+ const VkDeviceSize texel_align = device.GetTexelBufferAlignment();
+ ASSERT(texel_align == 0 || (offset % texel_align) == 0);
+#endif
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
diff --git a/src/video_core/renderer_vulkan/vk_query_cache.cpp b/src/video_core/renderer_vulkan/vk_query_cache.cpp
index 89e0b1114e..31cc05c2e1 100644
--- a/src/video_core/renderer_vulkan/vk_query_cache.cpp
+++ b/src/video_core/renderer_vulkan/vk_query_cache.cpp
@@ -854,6 +854,9 @@ public:
for (auto q : flushed_queries) {
auto* query = GetQuery(q);
u32 result = 0;
+#ifdef YUZU_DEBUG
+ ASSERT(staging_ref.mapped_span.size() >= offset_base + sizeof(u32));
+#endif
std::memcpy(&result, staging_ref.mapped_span.data() + offset_base, sizeof(u32));
query->value = static_cast(result);
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
@@ -1567,6 +1570,9 @@ void QueryCacheRuntime::SyncValues(std::span values, VkBuffer ba
impl->little_cache[which_copy].first,
.size = values[i].size,
});
+#ifdef YUZU_DEBUG
+ ASSERT(ref.mapped_span.size() >= accumulated_size + values[i].size);
+#endif
std::memcpy(ref.mapped_span.data() + accumulated_size, &values[i].value,
values[i].size);
accumulated_size += values[i].size;
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.cpp b/src/video_core/renderer_vulkan/vk_scheduler.cpp
index d109d22cab..06b4d34078 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.cpp
+++ b/src/video_core/renderer_vulkan/vk_scheduler.cpp
@@ -15,6 +15,7 @@
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
+#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
@@ -233,8 +234,14 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
upload_cmdbuf.End();
cmdbuf.End();
- if (on_submit) {
- on_submit();
+ if (staging_buffer_pool) {
+ staging_buffer_pool->FlushStream();
+ }
+
+ for (const auto& callback : on_submit_callbacks) {
+ if (callback) {
+ callback();
+ }
}
std::scoped_lock lock{submit_mutex};
diff --git a/src/video_core/renderer_vulkan/vk_scheduler.h b/src/video_core/renderer_vulkan/vk_scheduler.h
index 54ab8ba52b..50d05e57f1 100644
--- a/src/video_core/renderer_vulkan/vk_scheduler.h
+++ b/src/video_core/renderer_vulkan/vk_scheduler.h
@@ -10,6 +10,7 @@
#include
#include
#include
+#include
#include "common/alignment.h"
#include "common/common_types.h"
@@ -29,6 +30,7 @@ class Device;
class Framebuffer;
class GraphicsPipeline;
class StateTracker;
+class StagingBufferPool;
struct QueryCacheParams;
@@ -73,9 +75,23 @@ public:
query_cache = &query_cache_;
}
- // Registers a callback to perform on queue submission.
+ void SetStagingBufferPool(StagingBufferPool* pool) {
+ staging_buffer_pool = pool;
+ }
+
+ // Registers a callback to perform on queue submission, replacing existing callbacks.
void RegisterOnSubmit(std::function&& func) {
- on_submit = std::move(func);
+ on_submit_callbacks.clear();
+ if (func) {
+ on_submit_callbacks.emplace_back(std::move(func));
+ }
+ }
+
+ // Adds an additional callback to perform on queue submission.
+ void AddOnSubmit(std::function&& func) {
+ if (func) {
+ on_submit_callbacks.emplace_back(std::move(func));
+ }
}
/// Send work to a separate thread.
@@ -237,12 +253,13 @@ private:
std::unique_ptr command_pool;
VideoCommon::QueryCacheBase* query_cache = nullptr;
+ StagingBufferPool* staging_buffer_pool = nullptr;
vk::CommandBuffer current_cmdbuf;
vk::CommandBuffer current_upload_cmdbuf;
std::unique_ptr chunk;
- std::function on_submit;
+ std::vector> on_submit_callbacks;
State state;
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index 0fbe707b04..49a0f33805 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -30,6 +30,11 @@ constexpr VkDeviceSize MIN_STREAM_ALIGNMENT = 256;
// Stream buffer size in bytes
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
+VkDeviceSize GetStreamAlignment(const Device& device) {
+ return (std::max)({device.GetUniformBufferAlignment(), device.GetStorageBufferAlignment(),
+ device.GetTexelBufferAlignment(), MIN_STREAM_ALIGNMENT});
+}
+
size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
VkDeviceSize size{0};
if (device.HasDebuggingToolAttached()) {
@@ -63,8 +68,7 @@ size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
- stream_alignment{std::max(device_.GetUniformBufferAlignment(),
- MIN_STREAM_ALIGNMENT)},
+ stream_alignment{GetStreamAlignment(device_)},
stream_buffer_size{GetStreamBufferSize(device_, stream_alignment)},
region_size{stream_buffer_size / StagingBufferPool::NUM_SYNCS} {
VkBufferCreateInfo stream_ci = {
@@ -87,9 +91,18 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
}
stream_pointer = stream_buffer.Mapped();
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
+ stream_is_coherent = stream_buffer.IsHostCoherent();
+ non_coherent_atom_size = std::max(device.GetNonCoherentAtomSize(),
+ static_cast(1));
+ dirty_begin = stream_buffer_size;
+ dirty_end = 0;
+ stream_dirty = false;
+ scheduler.SetStagingBufferPool(this);
}
-StagingBufferPool::~StagingBufferPool() = default;
+StagingBufferPool::~StagingBufferPool() {
+ scheduler.SetStagingBufferPool(nullptr);
+}
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
@@ -121,9 +134,10 @@ void StagingBufferPool::TickFrame() {
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
const size_t alignment = static_cast(stream_alignment);
const size_t aligned_size = Common::AlignUp(size, alignment);
- const bool wraps = iterator + size >= stream_buffer_size;
+ const size_t capacity = static_cast(stream_buffer_size);
+ const bool wraps = iterator + aligned_size > capacity;
const size_t new_iterator =
- wraps ? aligned_size : Common::AlignUp(iterator + size, alignment);
+ wraps ? aligned_size : Common::AlignUp(iterator + aligned_size, alignment);
const size_t begin_region = wraps ? 0 : Region(iterator);
const size_t last_byte = new_iterator == 0 ? 0 : new_iterator - 1;
const size_t end_region = (std::min)(Region(last_byte) + 1, NUM_SYNCS);
@@ -167,6 +181,8 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
free_iterator = (std::max)(free_iterator, offset + aligned_size);
}
+ TrackStreamWrite(static_cast(offset), static_cast(aligned_size));
+
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast(offset),
@@ -177,6 +193,53 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
};
}
+void StagingBufferPool::TrackStreamWrite(VkDeviceSize offset, VkDeviceSize size) {
+ if (stream_is_coherent || size == 0) {
+ return;
+ }
+ const VkDeviceSize clamped_offset = (std::min)(offset, stream_buffer_size);
+ const VkDeviceSize clamped_end = (std::min)(clamped_offset + size, stream_buffer_size);
+ std::scoped_lock lock{stream_mutex};
+ if (!stream_dirty) {
+ dirty_begin = clamped_offset;
+ dirty_end = clamped_end;
+ stream_dirty = true;
+ return;
+ }
+ dirty_begin = (std::min)(dirty_begin, clamped_offset);
+ dirty_end = (std::max)(dirty_end, clamped_end);
+}
+
+void StagingBufferPool::FlushStream() {
+ if (stream_is_coherent) {
+ return;
+ }
+
+ VkDeviceSize flush_begin = 0;
+ VkDeviceSize flush_end = 0;
+ {
+ std::scoped_lock lock{stream_mutex};
+ if (!stream_dirty) {
+ return;
+ }
+ flush_begin = dirty_begin;
+ flush_end = dirty_end;
+ stream_dirty = false;
+ dirty_begin = stream_buffer_size;
+ dirty_end = 0;
+ }
+
+ if (flush_begin >= flush_end) {
+ return;
+ }
+
+ const VkDeviceSize atom = non_coherent_atom_size;
+ const VkDeviceSize aligned_begin = Common::AlignDown(flush_begin, atom);
+ const VkDeviceSize aligned_end = Common::AlignUp(flush_end, atom);
+ const VkDeviceSize flush_size = aligned_end - aligned_begin;
+ stream_buffer.FlushRange(aligned_begin, flush_size);
+}
+
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index 5c40ca069f..9284578975 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -7,6 +7,7 @@
#pragma once
#include
+#include
#include
#include "common/common_types.h"
@@ -30,6 +31,8 @@ struct StagingBufferRef {
class StagingBufferPool {
public:
+ friend class Scheduler;
+
static constexpr size_t NUM_SYNCS = 16;
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
@@ -83,6 +86,9 @@ private:
StagingBufferRef GetStreamBuffer(size_t size);
+ void TrackStreamWrite(VkDeviceSize offset, VkDeviceSize size);
+ void FlushStream();
+
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage, bool deferred = false);
@@ -110,6 +116,12 @@ private:
std::span stream_pointer;
VkDeviceSize stream_buffer_size;
VkDeviceSize region_size;
+ bool stream_is_coherent = true;
+ VkDeviceSize non_coherent_atom_size = 1;
+ VkDeviceSize dirty_begin = 0;
+ VkDeviceSize dirty_end = 0;
+ bool stream_dirty = false;
+ std::mutex stream_mutex;
size_t iterator = 0;
size_t used_iterator = 0;
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index cb13f28523..312d25a449 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -295,6 +295,16 @@ public:
return properties.properties.limits.minStorageBufferOffsetAlignment;
}
+ /// Returns texel buffer alignment requirement.
+ VkDeviceSize GetTexelBufferAlignment() const {
+ return properties.properties.limits.minTexelBufferOffsetAlignment;
+ }
+
+ /// Returns the non-coherent atom size for memory flushes.
+ VkDeviceSize GetNonCoherentAtomSize() const {
+ return properties.properties.limits.nonCoherentAtomSize;
+ }
+
/// Returns the maximum range for storage buffers.
VkDeviceSize GetMaxStorageBufferRange() const {
return properties.properties.limits.maxStorageBufferRange;
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.cpp b/src/video_core/vulkan_common/vulkan_wrapper.cpp
index b77d01711a..f8fbc0c206 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.cpp
+++ b/src/video_core/vulkan_common/vulkan_wrapper.cpp
@@ -499,8 +499,12 @@ void Image::Release() const noexcept {
}
void Buffer::Flush() const {
+ FlushRange(0, VK_WHOLE_SIZE);
+}
+
+void Buffer::FlushRange(VkDeviceSize offset, VkDeviceSize size) const {
if (!is_coherent) {
- vmaFlushAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
+ vmaFlushAllocation(allocator, allocation, offset, size);
}
}
diff --git a/src/video_core/vulkan_common/vulkan_wrapper.h b/src/video_core/vulkan_common/vulkan_wrapper.h
index 39396b3279..625dc32fa8 100644
--- a/src/video_core/vulkan_common/vulkan_wrapper.h
+++ b/src/video_core/vulkan_common/vulkan_wrapper.h
@@ -772,8 +772,15 @@ public:
return !mapped.empty();
}
+ /// Returns true if the buffer memory is host coherent.
+ bool IsHostCoherent() const noexcept {
+ return is_coherent;
+ }
+
void Flush() const;
+ void FlushRange(VkDeviceSize offset, VkDeviceSize size) const;
+
void Invalidate() const;
void SetObjectNameEXT(const char* name) const;