[vk] Fast UBO: fix tracking, resize heuristics, add debug guard #2695
4 changed files with 47 additions and 17 deletions
|
@ -109,6 +109,9 @@ public:
|
||||||
|
|
||||||
void ReadBlock(DAddr address, void* dest_pointer, size_t size);
|
void ReadBlock(DAddr address, void* dest_pointer, size_t size);
|
||||||
void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
|
void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size);
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
bool ReadBlockFastChecked(DAddr address, void* dest_pointer, size_t size);
|
||||||
|
#endif
|
||||||
void WriteBlock(DAddr address, const void* src_pointer, size_t size);
|
void WriteBlock(DAddr address, const void* src_pointer, size_t size);
|
||||||
void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
|
void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size);
|
||||||
|
|
||||||
|
|
|
@ -467,6 +467,29 @@ void DeviceMemoryManager<Traits>::ReadBlockUnsafe(DAddr address, void* dest_poin
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
template <typename Traits>
|
||||||
|
bool DeviceMemoryManager<Traits>::ReadBlockFastChecked(DAddr address, void* dest_pointer,
|
||||||
|
size_t size) {
|
||||||
|
bool success = true;
|
||||||
|
WalkBlock(
|
||||||
|
address, size,
|
||||||
|
[&](size_t copy_amount, DAddr current_vaddr) {
|
||||||
|
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", current_vaddr,
|
||||||
|
size);
|
||||||
|
std::memset(dest_pointer, 0, copy_amount);
|
||||||
|
success = false;
|
||||||
|
},
|
||||||
|
[&](size_t copy_amount, const u8* const src_ptr) {
|
||||||
|
std::memcpy(dest_pointer, src_ptr, copy_amount);
|
||||||
|
},
|
||||||
|
[&](const std::size_t copy_amount) {
|
||||||
|
dest_pointer = static_cast<u8*>(dest_pointer) + copy_amount;
|
||||||
|
});
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
template <typename Traits>
|
template <typename Traits>
|
||||||
void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
|
void DeviceMemoryManager<Traits>::WriteBlockUnsafe(DAddr address, const void* src_pointer,
|
||||||
size_t size) {
|
size_t size) {
|
||||||
|
|
|
@ -806,7 +806,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||||
if (should_fast_bind) {
|
if (should_fast_bind) {
|
||||||
// We only have to bind when the currently bound buffer is not the fast version
|
// We only have to bind when the currently bound buffer is not the fast version
|
||||||
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
|
||||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||||
runtime.BindFastUniformBuffer(stage, binding_index, size);
|
runtime.BindFastUniformBuffer(stage, binding_index, size);
|
||||||
}
|
}
|
||||||
|
@ -815,13 +815,22 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr (IS_OPENGL) {
|
channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
|
||||||
channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
|
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
||||||
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
|
|
||||||
}
|
|
||||||
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
|
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
|
||||||
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
|
const std::span<u8> span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
|
||||||
|
#ifdef YUZU_DEBUG
|
||||||
|
ASSERT(binding_index < NUM_GRAPHICS_UNIFORM_BUFFERS);
|
||||||
|
ASSERT(span.size() >= size && "UBO stream span too small");
|
||||||
|
if (!device_memory.ReadBlockFastChecked(device_addr, span.data(), size)) {
|
||||||
|
LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", device_addr, size);
|
||||||
|
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
|
||||||
|
ASSERT(false);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
#else
|
||||||
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
|
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
|
||||||
|
#endif
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Classic cached path
|
// Classic cached path
|
||||||
|
@ -830,7 +839,8 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
}
|
}
|
||||||
// Skip binding if it's not needed and if the bound buffer is not the fast version
|
// Skip binding if it's not needed and if the bound buffer is not the fast version
|
||||||
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
|
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
|
||||||
needs_bind |= HasFastUniformBufferBound(stage, binding_index);
|
const bool was_fast_bound = HasFastUniformBufferBound(stage, binding_index);
|
||||||
|
needs_bind |= was_fast_bound;
|
||||||
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
|
||||||
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
|
||||||
}
|
}
|
||||||
|
@ -839,9 +849,6 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
}
|
}
|
||||||
const u32 offset = buffer.Offset(device_addr);
|
const u32 offset = buffer.Offset(device_addr);
|
||||||
if constexpr (IS_OPENGL) {
|
if constexpr (IS_OPENGL) {
|
||||||
// Fast buffer will be unbound
|
|
||||||
channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
|
|
||||||
|
|
||||||
// Mark the index as dirty if offset doesn't match
|
// Mark the index as dirty if offset doesn't match
|
||||||
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
|
||||||
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
|
||||||
|
@ -855,6 +862,7 @@ void BufferCache<P>::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
|
||||||
} else {
|
} else {
|
||||||
runtime.BindUniformBuffer(buffer, offset, size);
|
runtime.BindUniformBuffer(buffer, offset, size);
|
||||||
}
|
}
|
||||||
|
channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
@ -1789,12 +1797,7 @@ std::span<u8> BufferCache<P>::ImmediateBuffer(size_t wanted_capacity) {
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
|
bool BufferCache<P>::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
|
||||||
if constexpr (IS_OPENGL) {
|
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0;
|
||||||
return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
|
|
||||||
} else {
|
|
||||||
// Only OpenGL has fast uniform buffers
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class P>
|
template <class P>
|
||||||
|
|
|
@ -53,6 +53,7 @@ constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
|
||||||
constexpr u32 NUM_STORAGE_BUFFERS = 16;
|
constexpr u32 NUM_STORAGE_BUFFERS = 16;
|
||||||
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
|
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
|
||||||
constexpr u32 NUM_STAGES = 5;
|
constexpr u32 NUM_STAGES = 5;
|
||||||
|
static_assert(NUM_GRAPHICS_UNIFORM_BUFFERS <= 32, "fast bitmask must fit u32");
|
||||||
|
|
||||||
using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
|
using UniformBufferSizes = std::array<std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS>, NUM_STAGES>;
|
||||||
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
|
using ComputeUniformBufferSizes = std::array<u32, NUM_COMPUTE_UNIFORM_BUFFERS>;
|
||||||
|
@ -137,8 +138,8 @@ public:
|
||||||
u32 written_compute_texture_buffers = 0;
|
u32 written_compute_texture_buffers = 0;
|
||||||
u32 image_compute_texture_buffers = 0;
|
u32 image_compute_texture_buffers = 0;
|
||||||
|
|
||||||
std::array<u32, 16> uniform_cache_hits{};
|
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_hits{};
|
||||||
std::array<u32, 16> uniform_cache_shots{};
|
std::array<u32, NUM_GRAPHICS_UNIFORM_BUFFERS> uniform_cache_shots{};
|
||||||
|
|
||||||
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue