Compare commits

..

3 commits

Author SHA1 Message Date
Ribbit
790f78e0ec last hope
Some checks failed
eden-license / license-header (pull_request) Failing after 24s
2025-10-08 21:23:22 -07:00
Ribbit
daa5f94915 emergency fix
Some checks failed
eden-license / license-header (pull_request) Failing after 24s
2025-10-08 20:09:32 -07:00
Ribbit
17556bf64b [vk] Fix Vulkan streaming ring alignment and flushes
Some checks failed
eden-license / license-header (pull_request) Failing after 23s
2025-10-08 18:38:43 -07:00
24 changed files with 504 additions and 184 deletions

View file

@ -253,16 +253,16 @@
<item>@string/scaling_filter_nearest_neighbor</item>
<item>@string/scaling_filter_bilinear</item>
<item>@string/scaling_filter_bicubic</item>
<item>@string/scaling_filter_zero_tangent</item>
<item>@string/scaling_filter_bspline</item>
<item>@string/scaling_filter_mitchell</item>
<item>@string/scaling_filter_spline1</item>
<item>@string/scaling_filter_gaussian</item>
<item>@string/scaling_filter_lanczos</item>
<item>@string/scaling_filter_scale_force</item>
<item>@string/scaling_filter_fsr</item>
<item>@string/scaling_filter_area</item>
<item>@string/scaling_filter_mmpx</item>
<item>@string/scaling_filter_zero_tangent</item>
<item>@string/scaling_filter_bspline</item>
<item>@string/scaling_filter_mitchell</item>
<item>@string/scaling_filter_spline1</item>
</string-array>
<integer-array name="rendererScalingFilterValues">

View file

@ -143,7 +143,7 @@ ENUM(ConfirmStop, Ask_Always, Ask_Based_On_Game, Ask_Never);
ENUM(FullscreenMode, Borderless, Exclusive);
ENUM(NvdecEmulation, Off, Cpu, Gpu);
ENUM(ResolutionSetup, Res1_4X, Res1_2X, Res3_4X, Res1X, Res5_4X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X, Res8X);
ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Gaussian, Lanczos, ScaleForce, Fsr, Area, ZeroTangent, BSpline, Mitchell, Spline1, Mmpx, MaxEnum);
ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, ZeroTangent, BSpline, Mitchell, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, Mmpx, MaxEnum);
ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum);
ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch);
ENUM(ConsoleMode, Handheld, Docked);

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -109,6 +112,9 @@ public:
void ReadBlock(DAddr address, void* dest_pointer, size_t size);
void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
#ifdef YUZU_DEBUG
bool ReadBlockFastChecked(DAddr address, void* dest_pointer, size_t size);
#endif
void WriteBlock(DAddr address, const void* src_pointer, size_t size);
void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -467,6 +470,29 @@ void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_poin
});
}
#ifdef YUZU_DEBUG
template <typename Traits>
bool DeviceMemoryManager<Traits>::ReadBlockFastChecked(DAddr address, void* dest_pointer,
size_t size) {
bool success = true;
WalkBlock(
address, size,
[&](size_t copy_amount, DAddr current_vaddr) {
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", current_vaddr,
size);
std::memset(dest_pointer, 0, copy_amount);
success = false;
},
[&](size_t copy_amount, const u8* const src_ptr) {
std::memcpy(dest_pointer, src_ptr, copy_amount);
},
[&](const std::size_t copy_amount) {
dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
});
return success;
}
#endif
template <typename Traits>
void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
size_t size) {

View file

@ -509,9 +509,6 @@ std::vector<std::string> ProfileManager::FindOrphanedProfiles()
good_uuids.emplace_back(uuid_string);
}
// used for acnh, etc
good_uuids.emplace_back("00000000000000000000000000000000");
// TODO: fetch save_id programmatically
const auto path = Common::FS::GetEdenPath(Common::FS::EdenPath::NANDDir)
/ "user/save/0000000000000000";

View file

@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2020 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -71,6 +68,42 @@ std::string ResolveURL(const std::string& url) {
return url.substr(0, index) + "lp1" + url.substr(index + 1);
}
WebArgInputTLVMap ReadWebArgs(const std::vector<u8>& web_arg, WebArgHeader& web_arg_header) {
std::memcpy(&web_arg_header, web_arg.data(), sizeof(WebArgHeader));
if (web_arg.size() == sizeof(WebArgHeader)) {
return {};
}
WebArgInputTLVMap input_tlv_map;
u64 current_offset = sizeof(WebArgHeader);
for (std::size_t i = 0; i < web_arg_header.total_tlv_entries; ++i) {
if (web_arg.size() < current_offset + sizeof(WebArgInputTLV)) {
return input_tlv_map;
}
WebArgInputTLV input_tlv;
std::memcpy(&input_tlv, web_arg.data() + current_offset, sizeof(WebArgInputTLV));
current_offset += sizeof(WebArgInputTLV);
if (web_arg.size() < current_offset + input_tlv.arg_data_size) {
return input_tlv_map;
}
std::vector<u8> data(input_tlv.arg_data_size);
std::memcpy(data.data(), web_arg.data() + current_offset, input_tlv.arg_data_size);
current_offset += input_tlv.arg_data_size;
input_tlv_map.insert_or_assign(input_tlv.input_tlv_type, std::move(data));
}
return input_tlv_map;
}
FileSys::VirtualFile GetOfflineRomFS(Core::System& system, u64 title_id,
FileSys::ContentRecordType nca_type) {
if (nca_type == FileSys::ContentRecordType::Data) {
@ -111,43 +144,6 @@ FileSys::VirtualFile GetOfflineRomFS(Core::System& system, u64 title_id,
}
}
#ifdef YUZU_USE_QT_WEB_ENGINE
WebArgInputTLVMap ReadWebArgs(const std::vector<u8>& web_arg, WebArgHeader& web_arg_header) {
std::memcpy(&web_arg_header, web_arg.data(), sizeof(WebArgHeader));
if (web_arg.size() == sizeof(WebArgHeader)) {
return {};
}
WebArgInputTLVMap input_tlv_map;
u64 current_offset = sizeof(WebArgHeader);
for (std::size_t i = 0; i < web_arg_header.total_tlv_entries; ++i) {
if (web_arg.size() < current_offset + sizeof(WebArgInputTLV)) {
return input_tlv_map;
}
WebArgInputTLV input_tlv;
std::memcpy(&input_tlv, web_arg.data() + current_offset, sizeof(WebArgInputTLV));
current_offset += sizeof(WebArgInputTLV);
if (web_arg.size() < current_offset + input_tlv.arg_data_size) {
return input_tlv_map;
}
std::vector<u8> data(input_tlv.arg_data_size);
std::memcpy(data.data(), web_arg.data() + current_offset, input_tlv.arg_data_size);
current_offset += input_tlv.arg_data_size;
input_tlv_map.insert_or_assign(input_tlv.input_tlv_type, std::move(data));
}
return input_tlv_map;
}
void ExtractSharedFonts(Core::System& system) {
static constexpr std::array<const char*, 7> DECRYPTED_SHARED_FONTS{
"FontStandard.ttf",
@ -225,7 +221,6 @@ void ExtractSharedFonts(Core::System& system) {
FileSys::VfsRawCopy(decrypted_font, out_file);
}
}
#endif
} // namespace
@ -237,7 +232,6 @@ WebBrowser::WebBrowser(Core::System& system_, std::shared_ptr<Applet> applet_,
WebBrowser::~WebBrowser() = default;
void WebBrowser::Initialize() {
#ifdef YUZU_USE_QT_WEB_ENGINE
FrontendApplet::Initialize();
LOG_INFO(Service_AM, "Initializing Web Browser Applet.");
@ -290,7 +284,6 @@ void WebBrowser::Initialize() {
ASSERT_MSG(false, "Invalid ShimKind={}", web_arg_header.shim_kind);
break;
}
#endif
}
Result WebBrowser::GetStatus() const {
@ -302,7 +295,6 @@ void WebBrowser::ExecuteInteractive() {
}
void WebBrowser::Execute() {
#ifdef YUZU_USE_QT_WEB_ENGINE
switch (web_arg_header.shim_kind) {
case ShimKind::Shop:
ExecuteShop();
@ -330,10 +322,6 @@ void WebBrowser::Execute() {
WebBrowserExit(WebExitReason::EndButtonPressed);
break;
}
#else
LOG_INFO(Service_AM, "Web Browser Applet disabled, skipping.");
WebBrowserExit(WebExitReason::EndButtonPressed);
#endif
}
void WebBrowser::ExtractOfflineRomFS() {

View file

@ -45,10 +45,6 @@ if (NOT APPLE AND ENABLE_OPENGL)
target_compile_definitions(qt_common PUBLIC HAS_OPENGL)
endif()
if (UNIX AND NOT APPLE)
if (TARGET Qt6::GuiPrivate)
target_link_libraries(qt_common PRIVATE Qt6::GuiPrivate)
else()
target_include_directories(qt_common PRIVATE ${Qt6Gui_PRIVATE_INCLUDE_DIRS})
endif()
if (NOT WIN32)
target_include_directories(qt_common PRIVATE ${Qt6Gui_PRIVATE_INCLUDE_DIRS})
endif()

View file

@ -549,16 +549,16 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QObject* parent)
PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")),
PAIR(ScalingFilter, Bilinear, tr("Bilinear")),
PAIR(ScalingFilter, Bicubic, tr("Bicubic")),
PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")),
PAIR(ScalingFilter, BSpline, tr("B-Spline")),
PAIR(ScalingFilter, Mitchell, tr("Mitchell")),
PAIR(ScalingFilter, Spline1, tr("Spline-1")),
PAIR(ScalingFilter, Gaussian, tr("Gaussian")),
PAIR(ScalingFilter, Lanczos, tr("Lanczos")),
PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")),
PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™ Super Resolution")),
PAIR(ScalingFilter, Area, tr("Area")),
PAIR(ScalingFilter, Mmpx, tr("MMPX")),
PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")),
PAIR(ScalingFilter, BSpline, tr("B-Spline")),
PAIR(ScalingFilter, Mitchell, tr("Mitchell")),
PAIR(ScalingFilter, Spline1, tr("Spline-1")),
}});
translations->insert({Settings::EnumMetadata<Settings::AntiAliasing>::Index(),
{

View file

@ -9,6 +9,8 @@
#include <algorithm>
#include <memory>
#include <numeric>
#include <type_traits>
#include <utility>
#include "common/range_sets.inc"
#include "video_core/buffer_cache/buffer_cache_base.h"
@ -19,6 +21,43 @@ namespace VideoCommon {
using Core::DEVICE_PAGESIZE;
namespace staging_detail {
template <typename T, typename = void>
struct has_flush_range : std::false_type {};
template <typename T>
struct has_flush_range<
T, std::void_t<decltype(std::declval<T&>().FlushRange(size_t{}, size_t{}))>> : std::true_type {};
template <typename T, typename = void>
struct has_invalidate_range : std::false_type {};
template <typename T>
struct has_invalidate_range<
T, std::void_t<decltype(std::declval<T&>().InvalidateRange(size_t{}, size_t{}))>>
: std::true_type {};
} // namespace staging_detail
template <typename Ref>
inline void StagingFlushRange(Ref& ref, size_t offset, size_t size) {
if constexpr (staging_detail::has_flush_range<Ref>::value) {
ref.FlushRange(offset, size);
} else {
(void)ref;
(void)offset;
(void)size;
}
}
template <typename Ref>
inline void StagingInvalidateRange(Ref& ref, size_t offset, size_t size) {
if constexpr (staging_detail::has_invalidate_range<Ref>::value) {
ref.InvalidateRange(offset, size);
} else {
(void)ref;
(void)offset;
(void)size;
}
}
template <class P>
BufferCache<P>::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, Runtime& runtime_)
: runtime{runtime_}, device_memory{device_memory_}, memory_tracker{device_memory} {
@ -386,7 +425,8 @@ void BufferCache<P>::BindHostComputeBuffers() {
template <class P>
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
const UniformBufferSizes* sizes) {
if (channel_state->enabled_uniform_buffer_masks != mask) {
const bool mask_changed = channel_state->enabled_uniform_buffer_masks != mask;
if (mask_changed) {
channel_state->fast_bound_uniform_buffers.fill(0);
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->dirty_uniform_buffers.fill(~u32{0});
@ -632,6 +672,7 @@ void BufferCache<P>::PopAsyncBuffers() {
u8* base = async_buffer->mapped_span.data();
const size_t base_offset = async_buffer->offset;
for (const auto& copy : downloads) {
StagingInvalidateRange(*async_buffer, copy.dst_offset, copy.size);
const DAddr device_addr = static_cast<DAddr>(copy.src_offset);
const u64 dst_offset = copy.dst_offset - base_offset;
const u8* read_mapped_memory = base + dst_offset;
@ -695,6 +736,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
std::memcpy(upload_staging.mapped_span.data(),
draw_state.inline_index_draw_indexes.data(), size);
StagingFlushRange(upload_staging, upload_staging.offset, size);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true);
} else {
buffer.ImmediateUpload(0, draw_state.inline_index_draw_indexes);
@ -817,7 +859,18 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
#ifdef YUZU_DEBUG
ASSERT(binding_index < NUM_GRAPHICS_UNIFORM_BUFFERS);
ASSERT(span.size() >= size && "UBO stream span too small");
if (!device_memory.ReadBlockFastChecked(device_addr, span.data(), size)) {
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", device_addr, size);
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
ASSERT(false);
return;
}
#else
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
#endif
return;
}
// Classic cached path
@ -826,7 +879,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
// Skip binding if it's not needed and if the bound buffer is not the fast version
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
const bool was_fast_bound = HasFastUniformBufferBound(stage, binding_index);
needs_bind |= was_fast_bound;
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
}
@ -1506,7 +1560,7 @@ template <class P>
void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 total_size_bytes,
[[maybe_unused]] std::span<BufferCopy> copies) {
if constexpr (USE_MEMORY_MAPS) {
if constexpr (USE_MEMORY_MAPS) {
auto upload_staging = runtime.UploadStagingBuffer(total_size_bytes);
const std::span<u8> staging_pointer = upload_staging.mapped_span;
for (BufferCopy& copy : copies) {
@ -1517,6 +1571,7 @@ void BufferCache<P>::MappedUploadMemory([[maybe_unused]] Buffer& buffer,
// Apply the staging offset
copy.src_offset += upload_staging.offset;
}
StagingFlushRange(upload_staging, upload_staging.offset, total_size_bytes);
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
}
@ -1559,6 +1614,7 @@ void BufferCache<P>::InlineMemoryImplementation(DAddr dest_address, size_t copy_
}};
u8* const src_pointer = upload_staging.mapped_span.data();
std::memcpy(src_pointer, inlined_buffer.data(), copy_size);
StagingFlushRange(upload_staging, upload_staging.offset, copy_size);
const bool can_reorder = runtime.CanReorderUpload(buffer, copies);
runtime.CopyBuffer(buffer, upload_staging.buffer, copies, true, can_reorder);
} else {
@ -1613,6 +1669,7 @@ void BufferCache<P>::DownloadBufferMemory(Buffer& buffer, DAddr device_addr, u64
}
runtime.CopyBuffer(download_staging.buffer, buffer, copies_span, true);
runtime.Finish();
StagingInvalidateRange(download_staging, download_staging.offset, total_size_bytes);
for (const BufferCopy& copy : copies) {
const DAddr copy_device_addr = buffer.CpuAddr() + copy.src_offset;
// Undo the modified offset

View file

@ -53,7 +53,6 @@ constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
constexpr u32 NUM_STORAGE_BUFFERS = 16;
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
constexpr u32 NUM_STAGES = 5;
static_assert(NUM_GRAPHICS_UNIFORM_BUFFERS <= 32, "fast bitmask must fit u32");
using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;

View file

@ -166,7 +166,7 @@ try
if (Settings::values.renderer_force_max_clock.GetValue() && device.ShouldBoostClocks()) {
turbo_mode.emplace(instance, dld);
scheduler.RegisterOnSubmit([this] { turbo_mode->QueueSubmitted(); });
scheduler.AddOnSubmit([this] { turbo_mode->QueueSubmitted(); });
}
Report();
@ -176,7 +176,7 @@ try
}
RendererVulkan::~RendererVulkan() {
scheduler.RegisterOnSubmit([] {});
scheduler.RegisterOnSubmit(std::function<void()>{});
void(device.GetLogical().WaitIdle());
}

View file

@ -179,6 +179,11 @@ public:
}();
u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
#ifdef YUZU_DEBUG
if (!host_visible) {
ASSERT(staging.mapped_span.size() >= size_bytes);
}
#endif
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
@ -189,6 +194,7 @@ public:
}
if (!host_visible) {
staging.FlushRange(staging.offset, static_cast<VkDeviceSize>(size_bytes));
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
@ -337,11 +343,6 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_, MemoryAllocator& m
uint8_pass = std::make_unique<Uint8Pass>(device, scheduler, descriptor_pool, staging_pool,
compute_pass_descriptor_queue);
}
const u32 ubo_align = static_cast<u32>(
device.GetUniformBufferAlignment() //check if the device has it
);
// add the ability to change the size in settings in future
uniform_ring.Init(device, memory_allocator, 8 * 1024 * 1024 /* 8 MiB */, ubo_align ? ubo_align : 256);
quad_array_index_buffer = std::make_shared<QuadArrayIndexBuffer>(device_, memory_allocator_,
scheduler_, staging_pool_);
quad_strip_index_buffer = std::make_shared<QuadStripIndexBuffer>(device_, memory_allocator_,
@ -360,42 +361,6 @@ void BufferCacheRuntime::FreeDeferredStagingBuffer(StagingBufferRef& ref) {
staging_pool.FreeDeferred(ref);
}
void BufferCacheRuntime::UniformRing::Init(const Device& device,
MemoryAllocator& alloc,
u64 bytes, u32 alignment) {
for (size_t i = 0; i < NUM_FRAMES; ++i) {
VkBufferCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.size = bytes,
.usage = VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
buffers[i] = alloc.CreateBuffer(ci, MemoryUsage::Upload);
mapped[i] = buffers[i].Mapped().data();
}
size = bytes;
align = alignment ? alignment : 256;
head = 0;
current_frame = 0;
}
std::span<u8> BufferCacheRuntime::UniformRing::Alloc(u32 bytes, u32& out_offset) {
const u64 aligned = Common::AlignUp(head, static_cast<u64>(align));
u64 end = aligned + bytes;
if (end > size) {
return {}; // Fallback to staging pool
}
out_offset = static_cast<u32>(aligned);
head = end;
return {mapped[current_frame] + out_offset, bytes};
}
u64 BufferCacheRuntime::GetDeviceLocalMemory() const {
return device.GetDeviceLocalMemory();
}
@ -416,7 +381,6 @@ void BufferCacheRuntime::TickFrame(Common::SlotVector<Buffer>& slot_buffers) noe
for (auto it = slot_buffers.begin(); it != slot_buffers.end(); it++) {
it->ResetUsageTracking();
}
uniform_ring.BeginFrame();
}
void BufferCacheRuntime::Finish() {
@ -556,6 +520,10 @@ void BufferCacheRuntime::BindIndexBuffer(PrimitiveTopology topology, IndexFormat
ReserveNullBuffer();
vk_buffer = *null_buffer;
}
#ifdef YUZU_DEBUG
const size_t bytes_per_index = BytesPerIndex(vk_index_type);
ASSERT(bytes_per_index == 0 || (vk_offset % bytes_per_index) == 0);
#endif
scheduler.Record([vk_buffer, vk_offset, vk_index_type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(vk_buffer, vk_offset, vk_index_type);
});

View file

@ -1,6 +1,3 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -127,60 +124,52 @@ public:
void BindTransformFeedbackBuffers(VideoCommon::HostBindings<Buffer>& bindings);
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t /*stage*/,
[[maybe_unused]] u32 /*binding_index*/,
u32 size) {
u32 offset = 0;
if (auto span = uniform_ring.Alloc(size, offset); !span.empty()) {
BindBuffer(*uniform_ring.buffers[uniform_ring.current_frame], offset, size);
return span;
}
// Fallback for giant requests
std::span<u8> BindMappedUniformBuffer([[maybe_unused]] size_t stage,
[[maybe_unused]] u32 binding_index, u32 size) {
const StagingBufferRef ref = staging_pool.Request(size, MemoryUsage::Upload);
BindBuffer(ref.buffer, static_cast<u32>(ref.offset), size);
BindBuffer(ref.buffer, ref.offset, static_cast<VkDeviceSize>(size));
#ifdef YUZU_DEBUG
ASSERT(ref.mapped_span.size() >= size);
const VkDeviceSize ubo_align = device.GetUniformBufferAlignment();
ASSERT(ubo_align == 0 || (ref.offset % ubo_align) == 0);
#endif
return ref.mapped_span;
}
void BindUniformBuffer(VkBuffer buffer, u32 offset, u32 size) {
void BindUniformBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
#ifdef YUZU_DEBUG
const VkDeviceSize ubo_align = device.GetUniformBufferAlignment();
ASSERT(ubo_align == 0 || (offset % ubo_align) == 0);
#endif
BindBuffer(buffer, offset, size);
}
void BindStorageBuffer(VkBuffer buffer, u32 offset, u32 size,
void BindStorageBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size,
[[maybe_unused]] bool is_written) {
#ifdef YUZU_DEBUG
const VkDeviceSize ssbo_align = device.GetStorageBufferAlignment();
ASSERT(ssbo_align == 0 || (offset % ssbo_align) == 0);
#endif
BindBuffer(buffer, offset, size);
}
void BindTextureBuffer(Buffer& buffer, u32 offset, u32 size,
VideoCore::Surface::PixelFormat format) {
#ifdef YUZU_DEBUG
const VkDeviceSize texel_align = device.GetTexelBufferAlignment();
ASSERT(texel_align == 0 || (offset % texel_align) == 0);
#endif
guest_descriptor_queue.AddTexelBuffer(buffer.View(offset, size, format));
}
private:
void BindBuffer(VkBuffer buffer, u32 offset, u32 size) {
void BindBuffer(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) {
guest_descriptor_queue.AddBuffer(buffer, offset, size);
}
void ReserveNullBuffer();
vk::Buffer CreateNullBuffer();
struct UniformRing {
static constexpr size_t NUM_FRAMES = 3;
std::array<vk::Buffer, NUM_FRAMES> buffers{};
std::array<u8*, NUM_FRAMES> mapped{};
u64 size = 0;
u64 head = 0;
u32 align = 256;
size_t current_frame = 0;
void Init(const Device& device, MemoryAllocator& alloc, u64 bytes, u32 alignment);
void BeginFrame() {
current_frame = (current_frame + 1) % NUM_FRAMES;
head = 0;
}
std::span<u8> Alloc(u32 bytes, u32& out_offset);
};
UniformRing uniform_ring;
const Device& device;
MemoryAllocator& memory_allocator;
Scheduler& scheduler;

View file

@ -850,10 +850,17 @@ public:
pending_flush_sets.pop_front();
}
const VkDeviceSize read_size =
static_cast<VkDeviceSize>(flushed_queries.size() * TFBQueryBank::QUERY_SIZE);
staging_ref.InvalidateRange(staging_ref.offset, read_size);
size_t offset_base = staging_ref.offset;
for (auto q : flushed_queries) {
auto* query = GetQuery(q);
u32 result = 0;
#ifdef YUZU_DEBUG
ASSERT(staging_ref.mapped_span.size() >= offset_base + sizeof(u32));
#endif
std::memcpy(&result, staging_ref.mapped_span.data() + offset_base, sizeof(u32));
query->value = static_cast<u64>(result);
query->flags |= VideoCommon::QueryFlagBits::IsFinalValueSynced;
@ -1567,10 +1574,14 @@ void QueryCacheRuntime::SyncValues(std::span<SyncValuesType> values, VkBuffer ba
impl->little_cache[which_copy].first,
.size = values[i].size,
});
#ifdef YUZU_DEBUG
ASSERT(ref.mapped_span.size() >= accumulated_size + values[i].size);
#endif
std::memcpy(ref.mapped_span.data() + accumulated_size, &values[i].value,
values[i].size);
accumulated_size += values[i].size;
}
ref.FlushRange(ref.offset, static_cast<VkDeviceSize>(accumulated_size));
src_buffer = ref.buffer;
} else {
for (size_t i = 0; i < values.size(); i++) {

View file

@ -15,6 +15,7 @@
#include "video_core/renderer_vulkan/vk_command_pool.h"
#include "video_core/renderer_vulkan/vk_master_semaphore.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
#include "video_core/renderer_vulkan/vk_state_tracker.h"
#include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/vulkan_common/vulkan_device.h"
@ -233,8 +234,14 @@ u64 Scheduler::SubmitExecution(VkSemaphore signal_semaphore, VkSemaphore wait_se
upload_cmdbuf.End();
cmdbuf.End();
if (on_submit) {
on_submit();
if (staging_buffer_pool) {
staging_buffer_pool->FlushStream();
}
for (const auto& callback : on_submit_callbacks) {
if (callback) {
callback();
}
}
std::scoped_lock lock{submit_mutex};

View file

@ -10,6 +10,7 @@
#include <thread>
#include <utility>
#include <queue>
#include <vector>
#include "common/alignment.h"
#include "common/common_types.h"
@ -29,6 +30,7 @@ class Device;
class Framebuffer;
class GraphicsPipeline;
class StateTracker;
class StagingBufferPool;
struct QueryCacheParams;
@ -73,9 +75,23 @@ public:
query_cache = &query_cache_;
}
// Registers a callback to perform on queue submission.
void SetStagingBufferPool(StagingBufferPool* pool) {
staging_buffer_pool = pool;
}
// Registers a callback to perform on queue submission, replacing existing callbacks.
void RegisterOnSubmit(std::function<void()>&& func) {
on_submit = std::move(func);
on_submit_callbacks.clear();
if (func) {
on_submit_callbacks.emplace_back(std::move(func));
}
}
// Adds an additional callback to perform on queue submission.
void AddOnSubmit(std::function<void()>&& func) {
if (func) {
on_submit_callbacks.emplace_back(std::move(func));
}
}
/// Send work to a separate thread.
@ -237,12 +253,13 @@ private:
std::unique_ptr<CommandPool> command_pool;
VideoCommon::QueryCacheBase<QueryCacheParams>* query_cache = nullptr;
StagingBufferPool* staging_buffer_pool = nullptr;
vk::CommandBuffer current_cmdbuf;
vk::CommandBuffer current_upload_cmdbuf;
std::unique_ptr<CommandChunk> chunk;
std::function<void()> on_submit;
std::vector<std::function<void()>> on_submit_callbacks;
State state;

View file

@ -5,6 +5,7 @@
// SPDX-License-Identifier: GPL-3.0-or-later
#include <algorithm>
#include <memory>
#include <utility>
#include <vector>
@ -25,35 +26,52 @@ namespace {
using namespace Common::Literals;
// Maximum potential alignment of a Vulkan buffer
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
// Minimum alignment we want to enforce for the streaming ring
constexpr VkDeviceSize MIN_STREAM_ALIGNMENT = 256;
// Stream buffer size in bytes
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
size_t GetStreamBufferSize(const Device& device) {
VkDeviceSize GetStreamAlignment(const Device& device) {
return (std::max)({device.GetUniformBufferAlignment(), device.GetStorageBufferAlignment(),
device.GetTexelBufferAlignment(), MIN_STREAM_ALIGNMENT});
}
size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
VkDeviceSize size{0};
if (device.HasDebuggingToolAttached()) {
ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) {
bool found_heap = false;
ForEachDeviceLocalHostVisibleHeap(device, [&size, &found_heap](size_t /*index*/, VkMemoryHeap& heap) {
size = (std::max)(size, heap.size);
found_heap = true;
});
// If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
// loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
// as the heap will be much larger.
if (size <= 256_MiB) {
// If no suitable heap was found fall back to the default cap to avoid creating a zero-sized stream buffer.
if (!found_heap) {
size = MAX_STREAM_BUFFER_SIZE;
} else if (size <= 256_MiB) {
// If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be
// loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue
// as the heap will be much larger.
size = size * 40 / 100;
}
} else {
size = MAX_STREAM_BUFFER_SIZE;
}
return (std::min)(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);
// Clamp to the configured maximum, align up for safety, and ensure a sane minimum so
// region_size (stream_buffer_size / NUM_SYNCS) never becomes zero.
const VkDeviceSize aligned =
(std::min)(Common::AlignUp(size, alignment), MAX_STREAM_BUFFER_SIZE);
const VkDeviceSize min_size = alignment * StagingBufferPool::NUM_SYNCS;
return static_cast<size_t>((std::max)(aligned, min_size));
}
} // Anonymous namespace
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size /
StagingBufferPool::NUM_SYNCS} {
stream_alignment{GetStreamAlignment(device_)},
stream_buffer_size{GetStreamBufferSize(device_, stream_alignment)},
region_size{stream_buffer_size / StagingBufferPool::NUM_SYNCS} {
VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@ -74,9 +92,18 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
}
stream_pointer = stream_buffer.Mapped();
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
stream_is_coherent = stream_buffer.IsHostCoherent();
non_coherent_atom_size = std::max<VkDeviceSize>(device.GetNonCoherentAtomSize(),
static_cast<VkDeviceSize>(1));
dirty_begin = stream_buffer_size;
dirty_end = 0;
stream_dirty = false;
scheduler.SetStagingBufferPool(this);
}
StagingBufferPool::~StagingBufferPool() = default;
StagingBufferPool::~StagingBufferPool() {
scheduler.SetStagingBufferPool(nullptr);
}
StagingBufferRef StagingBufferPool::Request(size_t size, MemoryUsage usage, bool deferred) {
if (!deferred && usage == MemoryUsage::Upload && size <= region_size) {
@ -106,41 +133,118 @@ void StagingBufferPool::TickFrame() {
}
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
if (AreRegionsActive(Region(free_iterator) + 1,
(std::min)(Region(iterator + size) + 1, NUM_SYNCS))) {
const size_t alignment = static_cast<size_t>(stream_alignment);
const size_t aligned_size = Common::AlignUp(size, alignment);
const size_t capacity = static_cast<size_t>(stream_buffer_size);
const bool wraps = iterator + aligned_size > capacity;
const size_t new_iterator =
wraps ? aligned_size : Common::AlignUp(iterator + aligned_size, alignment);
const size_t begin_region = wraps ? 0 : Region(iterator);
const size_t last_byte = new_iterator == 0 ? 0 : new_iterator - 1;
const size_t end_region = (std::min)(Region(last_byte) + 1, NUM_SYNCS);
const size_t guard_begin = (std::min)(Region(free_iterator) + 1, NUM_SYNCS);
if (!wraps) {
if (guard_begin < end_region && AreRegionsActive(guard_begin, end_region)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
} else if (guard_begin < NUM_SYNCS && AreRegionsActive(guard_begin, NUM_SYNCS)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
const u64 current_tick = scheduler.CurrentTick();
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator),
current_tick);
used_iterator = iterator;
free_iterator = (std::max)(free_iterator, iterator + size);
if (iterator + size >= stream_buffer_size) {
if (wraps) {
std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS,
current_tick);
used_iterator = 0;
iterator = 0;
free_iterator = size;
if (AreRegionsActive(0, Region(size) + 1)) {
free_iterator = aligned_size;
const size_t head_last_byte = aligned_size == 0 ? 0 : aligned_size - 1;
const size_t head_end_region = (std::min)(Region(head_last_byte) + 1, NUM_SYNCS);
if (AreRegionsActive(0, head_end_region)) {
// Avoid waiting for the previous usages to be free
return GetStagingBuffer(size, MemoryUsage::Upload);
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
std::fill(sync_ticks.begin() + begin_region, sync_ticks.begin() + end_region, current_tick);
const size_t offset = wraps ? 0 : iterator;
iterator = new_iterator;
if (!wraps) {
free_iterator = (std::max)(free_iterator, offset + aligned_size);
}
TrackStreamWrite(static_cast<VkDeviceSize>(offset), static_cast<VkDeviceSize>(aligned_size));
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),
.mapped_span = stream_pointer.subspan(offset, size),
.usage{},
.log2_level{},
.index{},
.usage = MemoryUsage::Upload,
.log2_level = 0,
.index = 0,
.owner = &stream_buffer,
.atom_size = non_coherent_atom_size,
.is_coherent = stream_is_coherent,
.is_stream_ring = true,
};
}
void StagingBufferPool::TrackStreamWrite(VkDeviceSize offset, VkDeviceSize size) {
if (stream_is_coherent || size == 0) {
return;
}
const VkDeviceSize clamped_offset = (std::min)(offset, stream_buffer_size);
const VkDeviceSize clamped_end = (std::min)(clamped_offset + size, stream_buffer_size);
std::scoped_lock lock{stream_mutex};
if (!stream_dirty) {
dirty_begin = clamped_offset;
dirty_end = clamped_end;
stream_dirty = true;
return;
}
dirty_begin = (std::min)(dirty_begin, clamped_offset);
dirty_end = (std::max)(dirty_end, clamped_end);
}
void StagingBufferPool::FlushStream() {
if (stream_is_coherent) {
return;
}
VkDeviceSize flush_begin = 0;
VkDeviceSize flush_end = 0;
{
std::scoped_lock lock{stream_mutex};
if (!stream_dirty) {
return;
}
flush_begin = dirty_begin;
flush_end = dirty_end;
stream_dirty = false;
dirty_begin = stream_buffer_size;
dirty_end = 0;
}
if (flush_begin >= flush_end) {
return;
}
const VkDeviceSize atom = non_coherent_atom_size;
const VkDeviceSize aligned_begin = Common::AlignDown(flush_begin, atom);
const VkDeviceSize aligned_end = Common::AlignUp(flush_end, atom);
const VkDeviceSize flush_size = aligned_end - aligned_begin;
stream_buffer.FlushRange(aligned_begin, flush_size);
}
bool StagingBufferPool::AreRegionsActive(size_t region_begin, size_t region_end) const {
const u64 gpu_tick = scheduler.GetMasterSemaphore().KnownGpuTick();
return std::any_of(sync_ticks.begin() + region_begin, sync_ticks.begin() + region_end,
@ -202,15 +306,19 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
}
const bool is_coherent = buffer.IsHostCoherent();
const std::span<u8> mapped_span = buffer.Mapped();
auto buffer_ptr = std::make_unique<vk::Buffer>(std::move(buffer));
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer),
.buffer = std::move(buffer_ptr),
.mapped_span = mapped_span,
.usage = usage,
.log2_level = log2,
.index = unique_ids++,
.tick = deferred ? (std::numeric_limits<u64>::max)() : scheduler.CurrentTick(),
.deferred = deferred,
.is_coherent = is_coherent,
.atom_size = is_coherent ? 1 : non_coherent_atom_size,
});
return entry.Ref();
}

View file

@ -1,12 +1,18 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
#pragma once
#include <climits>
#include <mutex>
#include <memory>
#include <vector>
#include "common/common_types.h"
#include "common/alignment.h"
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
@ -23,10 +29,54 @@ struct StagingBufferRef {
MemoryUsage usage;
u32 log2_level;
u64 index;
const vk::Buffer* owner = nullptr;
VkDeviceSize atom_size = 1;
bool is_coherent = true;
bool is_stream_ring = false;
void FlushRange(VkDeviceSize range_offset, VkDeviceSize size) const {
if (!owner || is_coherent || size == 0 || is_stream_ring) {
return;
}
if (size == VK_WHOLE_SIZE) {
owner->FlushRange(range_offset, size);
return;
}
const VkDeviceSize atom = atom_size ? atom_size : 1;
const VkDeviceSize range_end = range_offset + size;
if (range_end < range_offset) {
owner->FlushRange(range_offset, size);
return;
}
const VkDeviceSize aligned_begin = Common::AlignDown(range_offset, atom);
const VkDeviceSize aligned_end = Common::AlignUp(range_end, atom);
owner->FlushRange(aligned_begin, aligned_end - aligned_begin);
}
void InvalidateRange(VkDeviceSize range_offset, VkDeviceSize size) const {
if (!owner || is_coherent || size == 0 || is_stream_ring) {
return;
}
if (size == VK_WHOLE_SIZE) {
owner->InvalidateRange(range_offset, size);
return;
}
const VkDeviceSize atom = atom_size ? atom_size : 1;
const VkDeviceSize range_end = range_offset + size;
if (range_end < range_offset) {
owner->InvalidateRange(range_offset, size);
return;
}
const VkDeviceSize aligned_begin = Common::AlignDown(range_offset, atom);
const VkDeviceSize aligned_end = Common::AlignUp(range_end, atom);
owner->InvalidateRange(aligned_begin, aligned_end - aligned_begin);
}
};
class StagingBufferPool {
public:
friend class Scheduler;
static constexpr size_t NUM_SYNCS = 16;
explicit StagingBufferPool(const Device& device, MemoryAllocator& memory_allocator,
@ -49,22 +99,28 @@ private:
};
struct StagingBuffer {
vk::Buffer buffer;
std::unique_ptr<vk::Buffer> buffer;
std::span<u8> mapped_span;
MemoryUsage usage;
u32 log2_level;
u64 index;
u64 tick = 0;
bool deferred{};
bool is_coherent = true;
VkDeviceSize atom_size = 1;
StagingBufferRef Ref() const noexcept {
return {
.buffer = *buffer,
.buffer = buffer ? **buffer : VkBuffer{},
.offset = 0,
.mapped_span = mapped_span,
.usage = usage,
.log2_level = log2_level,
.index = index,
.owner = buffer.get(),
.atom_size = atom_size,
.is_coherent = is_coherent,
.is_stream_ring = false,
};
}
};
@ -80,6 +136,9 @@ private:
StagingBufferRef GetStreamBuffer(size_t size);
void TrackStreamWrite(VkDeviceSize offset, VkDeviceSize size);
void FlushStream();
bool AreRegionsActive(size_t region_begin, size_t region_end) const;
StagingBufferRef GetStagingBuffer(size_t size, MemoryUsage usage, bool deferred = false);
@ -102,10 +161,17 @@ private:
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
VkDeviceSize stream_alignment;
vk::Buffer stream_buffer;
std::span<u8> stream_pointer;
VkDeviceSize stream_buffer_size;
VkDeviceSize region_size;
bool stream_is_coherent = true;
VkDeviceSize non_coherent_atom_size = 1;
VkDeviceSize dirty_begin = 0;
VkDeviceSize dirty_end = 0;
bool stream_dirty = false;
std::mutex stream_mutex;
size_t iterator = 0;
size_t used_iterator = 0;

View file

@ -7,6 +7,8 @@
#pragma once
#include <unordered_set>
#include <type_traits>
#include <utility>
#include <boost/container/small_vector.hpp>
#include "common/alignment.h"
@ -30,6 +32,42 @@ using VideoCore::Surface::PixelFormat;
using VideoCore::Surface::SurfaceType;
using namespace Common::Literals;
namespace staging_detail {
template <typename T, typename = void>
struct has_flush_range : std::false_type {};
template <typename T>
struct has_flush_range<
T, std::void_t<decltype(std::declval<T&>().FlushRange(size_t{}, size_t{}))>> : std::true_type {};
template <typename T, typename = void>
struct has_invalidate_range : std::false_type {};
template <typename T>
struct has_invalidate_range<
T, std::void_t<decltype(std::declval<T&>().InvalidateRange(size_t{}, size_t{}))>>
: std::true_type {};
} // namespace staging_detail
template <typename Ref>
inline void StagingFlushRange(Ref& ref, size_t offset, size_t size) {
if constexpr (staging_detail::has_flush_range<Ref>::value) {
ref.FlushRange(offset, size);
} else {
(void)ref;
(void)offset;
(void)size;
}
}
template <typename Ref>
inline void StagingInvalidateRange(Ref& ref, size_t offset, size_t size) {
if constexpr (staging_detail::has_invalidate_range<Ref>::value) {
ref.InvalidateRange(offset, size);
} else {
(void)ref;
(void)offset;
(void)size;
}
}
template <class P>
TextureCache<P>::TextureCache(Runtime& runtime_, Tegra::MaxwellDeviceMemoryManager& device_memory_)
: runtime{runtime_}, device_memory{device_memory_} {
@ -111,6 +149,7 @@ void TextureCache<P>::RunGarbageCollector() {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
StagingInvalidateRange(map, map.offset, image.unswizzled_size_bytes);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
}
@ -567,6 +606,7 @@ void TextureCache<P>::DownloadMemory(DAddr cpu_addr, size_t size) {
const auto copies = FullDownloadCopies(image.info);
image.DownloadMemory(map, copies);
runtime.Finish();
StagingInvalidateRange(map, map.offset, image.unswizzled_size_bytes);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, map.mapped_span,
swizzle_data_buffer);
}
@ -863,13 +903,17 @@ void TextureCache<P>::PopAsyncFlushes() {
if (download_info.is_swizzle) {
const ImageBase& image = slot_images[download_info.object_id];
const auto copies = FullDownloadCopies(image.info);
download_buffer.offset -= Common::AlignUp(image.unswizzled_size_bytes, 64);
const size_t aligned_size =
Common::AlignUp(image.unswizzled_size_bytes, static_cast<size_t>(64));
download_buffer.offset -= aligned_size;
StagingInvalidateRange(download_buffer, download_buffer.offset, aligned_size);
std::span<u8> download_span =
download_buffer.mapped_span.subspan(download_buffer.offset);
SwizzleImage(*gpu_memory, image.gpu_addr, image.info, copies, download_span,
swizzle_data_buffer);
} else {
const BufferDownload& buffer_info = slot_buffer_downloads[download_info.object_id];
StagingInvalidateRange(download_buffer, download_buffer.offset, buffer_info.size);
std::span<u8> download_span =
download_buffer.mapped_span.subspan(download_buffer.offset);
gpu_memory->WriteBlockUnsafe(buffer_info.address, download_span.data(),
@ -907,6 +951,7 @@ void TextureCache<P>::PopAsyncFlushes() {
}
// Wait for downloads to finish
runtime.Finish();
StagingInvalidateRange(download_map, original_offset, total_size_bytes);
download_map.offset = original_offset;
std::span<u8> download_span = download_map.mapped_span;
for (const PendingDownload& download_info : download_ids) {
@ -1081,6 +1126,7 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
if (True(image.flags & ImageFlagBits::AcceleratedUpload)) {
gpu_memory->ReadBlock(gpu_addr, mapped_span.data(), mapped_span.size_bytes(),
VideoCommon::CacheType::NoTextureCache);
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
const auto uploads = FullUploadSwizzles(image.info);
runtime.AccelerateImageUpload(image, staging, uploads);
return;
@ -1094,10 +1140,12 @@ void TextureCache<P>::UploadImageContents(Image& image, StagingBuffer& staging)
auto copies =
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, unswizzle_data_buffer);
ConvertImage(unswizzle_data_buffer, image.info, mapped_span, copies);
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
image.UploadMemory(staging, copies);
} else {
const auto copies =
UnswizzleImage(*gpu_memory, gpu_addr, image.info, swizzle_data, mapped_span);
StagingFlushRange(staging, staging.offset, mapped_span.size_bytes());
image.UploadMemory(staging, copies);
}
}
@ -1329,6 +1377,7 @@ void TextureCache<P>::TickAsyncDecode() {
auto staging = runtime.UploadStagingBuffer(MapSizeBytes(image));
std::memcpy(staging.mapped_span.data(), async_decode->decoded_data.data(),
async_decode->decoded_data.size());
StagingFlushRange(staging, staging.offset, async_decode->decoded_data.size());
image.UploadMemory(staging, async_decode->copies);
image.flags &= ~ImageFlagBits::IsDecoding;
has_uploads = true;

View file

@ -295,6 +295,16 @@ public:
return properties.properties.limits.minStorageBufferOffsetAlignment;
}
/// Returns texel buffer alignment requirement.
VkDeviceSize GetTexelBufferAlignment() const {
return properties.properties.limits.minTexelBufferOffsetAlignment;
}
/// Returns the non-coherent atom size for memory flushes.
VkDeviceSize GetNonCoherentAtomSize() const {
return properties.properties.limits.nonCoherentAtomSize;
}
/// Returns the maximum range for storage buffers.
VkDeviceSize GetMaxStorageBufferRange() const {
return properties.properties.limits.maxStorageBufferRange;

View file

@ -499,14 +499,22 @@ void Image::Release() const noexcept {
}
void Buffer::Flush() const {
FlushRange(0, VK_WHOLE_SIZE);
}
void Buffer::FlushRange(VkDeviceSize offset, VkDeviceSize size) const {
if (!is_coherent) {
vmaFlushAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
vmaFlushAllocation(allocator, allocation, offset, size);
}
}
void Buffer::Invalidate() const {
InvalidateRange(0, VK_WHOLE_SIZE);
}
void Buffer::InvalidateRange(VkDeviceSize offset, VkDeviceSize size) const {
if (!is_coherent) {
vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
vmaInvalidateAllocation(allocator, allocation, offset, size);
}
}

View file

@ -772,10 +772,19 @@ public:
return !mapped.empty();
}
/// Returns true if the buffer memory is host coherent.
bool IsHostCoherent() const noexcept {
return is_coherent;
}
void Flush() const;
void FlushRange(VkDeviceSize offset, VkDeviceSize size) const;
void Invalidate() const;
void InvalidateRange(VkDeviceSize offset, VkDeviceSize size) const;
void SetObjectNameEXT(const char* name) const;
private:

View file

@ -393,8 +393,16 @@ target_link_libraries(yuzu PRIVATE common core input_common frontend_common netw
target_link_libraries(yuzu PRIVATE Boost::headers glad Qt6::Widgets)
target_link_libraries(yuzu PRIVATE ${PLATFORM_LIBRARIES} Threads::Threads)
if (NOT WIN32)
target_include_directories(yuzu PRIVATE ${Qt6Gui_PRIVATE_INCLUDE_DIRS})
endif()
if (UNIX AND NOT APPLE)
target_link_libraries(yuzu PRIVATE Qt6::DBus)
if (TARGET Qt6::GuiPrivate)
target_link_libraries(yuzu PRIVATE Qt6::GuiPrivate)
endif()
endif()
target_compile_definitions(yuzu PRIVATE

View file

@ -83,7 +83,8 @@ void ConfigureDebug::SetConfiguration() {
#ifdef YUZU_USE_QT_WEB_ENGINE
ui->disable_web_applet->setChecked(UISettings::values.disable_web_applet.GetValue());
#else
ui->disable_web_applet->setVisible(false);
ui->disable_web_applet->setEnabled(false);
ui->disable_web_applet->setText(tr("Web applet not compiled"));
#endif
}