diff --git a/src/core/device_memory_manager.h b/src/core/device_memory_manager.h index 6dcf7bb228..2b5ad8847d 100644 --- a/src/core/device_memory_manager.h +++ b/src/core/device_memory_manager.h @@ -109,6 +109,9 @@ public: void ReadBlock(DAddr address, void* dest_pointer, size_t size); void ReadBlockUnsafe(DAddr address, void* dest_pointer, size_t size); +#ifdef YUZU_DEBUG + bool ReadBlockFastChecked(DAddr address, void* dest_pointer, size_t size); +#endif void WriteBlock(DAddr address, const void* src_pointer, size_t size); void WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size); diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc index 52dff5df9a..b00999772e 100644 --- a/src/core/device_memory_manager.inc +++ b/src/core/device_memory_manager.inc @@ -467,6 +467,29 @@ void DeviceMemoryManager::ReadBlockUnsafe(DAddr address, void* dest_poin }); } +#ifdef YUZU_DEBUG +template +bool DeviceMemoryManager::ReadBlockFastChecked(DAddr address, void* dest_pointer, + size_t size) { + bool success = true; + WalkBlock( + address, size, + [&](size_t copy_amount, DAddr current_vaddr) { + LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", current_vaddr, + size); + std::memset(dest_pointer, 0, copy_amount); + success = false; + }, + [&](size_t copy_amount, const u8* const src_ptr) { + std::memcpy(dest_pointer, src_ptr, copy_amount); + }, + [&](const std::size_t copy_amount) { + dest_pointer = static_cast(dest_pointer) + copy_amount; + }); + return success; +} +#endif + template void DeviceMemoryManager::WriteBlockUnsafe(DAddr address, const void* src_pointer, size_t size) { diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 5223afe937..13b7e98491 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -806,7 +806,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; if (should_fast_bind) { // We only have to bind when the currently bound buffer is not the fast version - channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; + channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index; channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; runtime.BindFastUniformBuffer(stage, binding_index, size); } @@ -815,13 +815,22 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 return; } } - if constexpr (IS_OPENGL) { - channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index; - channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; - } + channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index; + channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size; // Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size); +#ifdef YUZU_DEBUG + ASSERT(binding_index < NUM_GRAPHICS_UNIFORM_BUFFERS); + ASSERT(span.size() >= size && "UBO stream span too small"); + if (!device_memory.ReadBlockFastChecked(device_addr, span.data(), size)) { + LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", device_addr, size); + channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index); + ASSERT(false); + return; + } +#else device_memory.ReadBlockUnsafe(device_addr, span.data(), size); +#endif return; } // Classic cached path @@ -830,7 +839,8 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } // Skip binding if it's not needed and if the bound buffer is not the fast version // This exists to avoid instances where the fast buffer is bound and a GPU write happens - needs_bind |= HasFastUniformBufferBound(stage, binding_index); + const bool was_fast_bound = HasFastUniformBufferBound(stage, binding_index); + needs_bind |= was_fast_bound; if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size; } @@ -839,9 +849,6 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } const u32 offset = buffer.Offset(device_addr); if constexpr (IS_OPENGL) { - // Fast buffer will be unbound - channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index); - // Mark the index as dirty if offset doesn't match const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset(); channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index; @@ -855,6 +862,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 } else { runtime.BindUniformBuffer(buffer, offset, size); } + channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index); } template @@ -1789,12 +1797,7 @@ std::span BufferCache

::ImmediateBuffer(size_t wanted_capacity) { template bool BufferCache

::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept { - if constexpr (IS_OPENGL) { - return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0; - } else { - // Only OpenGL has fast uniform buffers - return false; - } + return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0; } template diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index 486d19fb79..09631ffd83 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -53,6 +53,7 @@ constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8; constexpr u32 NUM_STORAGE_BUFFERS = 16; constexpr u32 NUM_TEXTURE_BUFFERS = 32; constexpr u32 NUM_STAGES = 5; +static_assert(NUM_GRAPHICS_UNIFORM_BUFFERS <= 32, "fast bitmask must fit u32"); using UniformBufferSizes = std::array, NUM_STAGES>; using ComputeUniformBufferSizes = std::array; @@ -137,8 +138,8 @@ public: u32 written_compute_texture_buffers = 0; u32 image_compute_texture_buffers = 0; - std::array uniform_cache_hits{}; - std::array uniform_cache_shots{}; + std::array uniform_cache_hits{}; + std::array uniform_cache_shots{}; u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;