revert 8078990b9b
revert [vk] Fast UBO: fix tracking, resize heuristics, add debug guard (#2695) Well, stuff showed up after testing phase, that showed us this change break SMO and some mods after being merged directly into master, we will keep stuying why happens this and add a better handling later. Co-authored-by: Ribbit <ribbit@placeholder.com> Reviewed-on: #2695 Reviewed-by: CamilleLaVey <camillelavey99@gmail.com> Co-authored-by: Ribbit <ribbit@eden-emu.dev> Co-committed-by: Ribbit <ribbit@eden-emu.dev>
This commit is contained in:
parent
954c17c18a
commit
78586272a5
6 changed files with 33 additions and 76 deletions
|
@ -1,6 +1,3 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -112,9 +109,6 @@ public:
|
|||
|
||||
void ReadBlock(DAddr address, void* dest_pointer, size_t size);
|
||||
void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
|
||||
#ifdef YUZU_DEBUG
|
||||
bool ReadBlockFastChecked(DAddr address, void* dest_pointer, size_t size);
|
||||
#endif
|
||||
void WriteBlock(DAddr address, const void* src_pointer, size_t size);
|
||||
void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
|
||||
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -470,29 +467,6 @@ void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_poin
|
|||
});
|
||||
}
|
||||
|
||||
#ifdef YUZU_DEBUG
|
||||
template <typename Traits>
|
||||
bool DeviceMemoryManager<Traits>::ReadBlockFastChecked(DAddr address, void* dest_pointer,
|
||||
size_t size) {
|
||||
bool success = true;
|
||||
WalkBlock(
|
||||
address, size,
|
||||
[&](size_t copy_amount, DAddr current_vaddr) {
|
||||
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", current_vaddr,
|
||||
size);
|
||||
std::memset(dest_pointer, 0, copy_amount);
|
||||
success = false;
|
||||
},
|
||||
[&](size_t copy_amount, const u8* const src_ptr) {
|
||||
std::memcpy(dest_pointer, src_ptr, copy_amount);
|
||||
},
|
||||
[&](const std::size_t copy_amount) {
|
||||
dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
|
||||
});
|
||||
return success;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Traits>
|
||||
void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
|
||||
size_t size) {
|
||||
|
|
|
@ -386,10 +386,11 @@ void BufferCache<P>::BindHostComputeBuffers() {
|
|||
template <class P>
|
||||
void BufferCache<P>::SetUniformBuffersState(const std::array<u32, NUM_STAGES>& mask,
|
||||
const UniformBufferSizes* sizes) {
|
||||
const bool mask_changed = channel_state->enabled_uniform_buffer_masks != mask;
|
||||
if (mask_changed) {
|
||||
channel_state->fast_bound_uniform_buffers.fill(0);
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
if (channel_state->enabled_uniform_buffer_masks != mask) {
|
||||
if constexpr (IS_OPENGL) {
|
||||
channel_state->fast_bound_uniform_buffers.fill(0);
|
||||
}
|
||||
channel_state->dirty_uniform_buffers.fill(~u32{0});
|
||||
channel_state->uniform_buffer_binding_sizes.fill({});
|
||||
}
|
||||
|
@ -805,7 +806,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
if (should_fast_bind) {
|
||||
// We only have to bind when the currently bound buffer is not the fast version
|
||||
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
|
||||
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
runtime.BindFastUniformBuffer(stage, binding_index, size);
|
||||
}
|
||||
|
@ -814,22 +815,13 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
return;
|
||||
}
|
||||
}
|
||||
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
|
||||
if constexpr (IS_OPENGL) {
|
||||
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||
}
|
||||
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
|
||||
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
|
||||
#ifdef YUZU_DEBUG
|
||||
ASSERT(binding_index < NUM_GRAPHICS_UNIFORM_BUFFERS);
|
||||
ASSERT(span.size() >= size && "UBO stream span too small");
|
||||
if (!device_memory.ReadBlockFastChecked(device_addr, span.data(), size)) {
|
||||
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", device_addr, size);
|
||||
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
|
||||
ASSERT(false);
|
||||
return;
|
||||
}
|
||||
#else
|
||||
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
// Classic cached path
|
||||
|
@ -838,8 +830,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
}
|
||||
// Skip binding if it's not needed and if the bound buffer is not the fast version
|
||||
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
|
||||
const bool was_fast_bound = HasFastUniformBufferBound(stage, binding_index);
|
||||
needs_bind |= was_fast_bound;
|
||||
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
|
||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||
}
|
||||
|
@ -848,6 +839,9 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
}
|
||||
const u32 offset = buffer.Offset(device_addr);
|
||||
if constexpr (IS_OPENGL) {
|
||||
// Fast buffer will be unbound
|
||||
channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
|
||||
|
||||
// Mark the index as dirty if offset doesn't match
|
||||
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
||||
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
||||
|
@ -861,7 +855,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
|||
} else {
|
||||
runtime.BindUniformBuffer(buffer, offset, size);
|
||||
}
|
||||
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
@ -1796,7 +1789,12 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
|
|||
|
||||
template <class P>
|
||||
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
|
||||
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0;
|
||||
if constexpr (IS_OPENGL) {
|
||||
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
|
||||
} else {
|
||||
// Only OpenGL has fast uniform buffers
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template <class P>
|
||||
|
|
|
@ -53,7 +53,6 @@ constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
|
|||
constexpr u32 NUM_STORAGE_BUFFERS = 16;
|
||||
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
|
||||
constexpr u32 NUM_STAGES = 5;
|
||||
static_assert(NUM_GRAPHICS_UNIFORM_BUFFERS <= 32, "fast bitmask must fit u32");
|
||||
|
||||
using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
|
||||
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
|
||||
|
@ -138,8 +137,8 @@ public:
|
|||
u32 written_compute_texture_buffers = 0;
|
||||
u32 image_compute_texture_buffers = 0;
|
||||
|
||||
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_hits{};
|
||||
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_shots{};
|
||||
std::array<u32, 16> uniform_cache_hits{};
|
||||
std::array<u32, 16> uniform_cache_shots{};
|
||||
|
||||
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
||||
|
||||
|
|
|
@ -25,12 +25,12 @@ namespace {
|
|||
|
||||
using namespace Common::Literals;
|
||||
|
||||
// Minimum alignment we want to enforce for the streaming ring
|
||||
constexpr VkDeviceSize MIN_STREAM_ALIGNMENT = 256;
|
||||
// Maximum potential alignment of a Vulkan buffer
|
||||
constexpr VkDeviceSize MAX_ALIGNMENT = 256;
|
||||
// Stream buffer size in bytes
|
||||
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
|
||||
|
||||
size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
|
||||
size_t GetStreamBufferSize(const Device& device) {
|
||||
VkDeviceSize size{0};
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
bool found_heap = false;
|
||||
|
@ -53,9 +53,8 @@ size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
|
|||
|
||||
// Clamp to the configured maximum, align up for safety, and ensure a sane minimum so
|
||||
// region_size (stream_buffer_size / NUM_SYNCS) never becomes zero.
|
||||
const VkDeviceSize aligned =
|
||||
(std::min)(Common::AlignUp(size, alignment), MAX_STREAM_BUFFER_SIZE);
|
||||
const VkDeviceSize min_size = alignment * StagingBufferPool::NUM_SYNCS;
|
||||
const VkDeviceSize aligned = (std::min)(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);
|
||||
const VkDeviceSize min_size = MAX_ALIGNMENT * StagingBufferPool::NUM_SYNCS;
|
||||
return static_cast<size_t>((std::max)(aligned, min_size));
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
@ -63,10 +62,8 @@ size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
|
|||
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
|
||||
Scheduler& scheduler_)
|
||||
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
|
||||
stream_alignment{std::max<VkDeviceSize>(device_.GetUniformBufferAlignment(),
|
||||
MIN_STREAM_ALIGNMENT)},
|
||||
stream_buffer_size{GetStreamBufferSize(device_, stream_alignment)},
|
||||
region_size{stream_buffer_size / StagingBufferPool::NUM_SYNCS} {
|
||||
stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size /
|
||||
StagingBufferPool::NUM_SYNCS} {
|
||||
VkBufferCreateInfo stream_ci = {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
|
@ -119,11 +116,10 @@ void StagingBufferPool::TickFrame() {
|
|||
}
|
||||
|
||||
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
||||
const size_t alignment = static_cast<size_t>(stream_alignment);
|
||||
const size_t aligned_size = Common::AlignUp(size, alignment);
|
||||
const size_t aligned_size = Common::AlignUp(size, MAX_ALIGNMENT);
|
||||
const bool wraps = iterator + size >= stream_buffer_size;
|
||||
const size_t new_iterator =
|
||||
wraps ? aligned_size : Common::AlignUp(iterator + size, alignment);
|
||||
wraps ? aligned_size : Common::AlignUp(iterator + size, MAX_ALIGNMENT);
|
||||
const size_t begin_region = wraps ? 0 : Region(iterator);
|
||||
const size_t last_byte = new_iterator == 0 ? 0 : new_iterator - 1;
|
||||
const size_t end_region = (std::min)(Region(last_byte) + 1, NUM_SYNCS);
|
||||
|
@ -149,7 +145,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
|||
current_tick);
|
||||
used_iterator = 0;
|
||||
iterator = 0;
|
||||
free_iterator = aligned_size;
|
||||
free_iterator = size;
|
||||
const size_t head_last_byte = aligned_size == 0 ? 0 : aligned_size - 1;
|
||||
const size_t head_end_region = (std::min)(Region(head_last_byte) + 1, NUM_SYNCS);
|
||||
if (AreRegionsActive(0, head_end_region)) {
|
||||
|
@ -164,7 +160,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
|
|||
iterator = new_iterator;
|
||||
|
||||
if (!wraps) {
|
||||
free_iterator = (std::max)(free_iterator, offset + aligned_size);
|
||||
free_iterator = (std::max)(free_iterator, offset + size);
|
||||
}
|
||||
|
||||
return StagingBufferRef{
|
||||
|
|
|
@ -1,6 +1,3 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
|
@ -105,7 +102,6 @@ private:
|
|||
MemoryAllocator& memory_allocator;
|
||||
Scheduler& scheduler;
|
||||
|
||||
VkDeviceSize stream_alignment;
|
||||
vk::Buffer stream_buffer;
|
||||
std::span<u8> stream_pointer;
|
||||
VkDeviceSize stream_buffer_size;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue