& mask,
const UniformBufferSizes* sizes) {
- if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
- if (channel_state->enabled_uniform_buffer_masks != mask) {
- if constexpr (IS_OPENGL) {
- channel_state->fast_bound_uniform_buffers.fill(0);
- }
+ const bool mask_changed = channel_state->enabled_uniform_buffer_masks != mask;
+ if (mask_changed) {
+ channel_state->fast_bound_uniform_buffers.fill(0);
+ if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
channel_state->dirty_uniform_buffers.fill(~u32{0});
channel_state->uniform_buffer_binding_sizes.fill({});
}
@@ -806,7 +805,7 @@ void BufferCache::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
if (should_fast_bind) {
// We only have to bind when the currently bound buffer is not the fast version
- channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
+ channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
runtime.BindFastUniformBuffer(stage, binding_index, size);
}
@@ -815,13 +814,22 @@ void BufferCache
::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
return;
}
}
- if constexpr (IS_OPENGL) {
- channel_state->fast_bound_uniform_buffers[stage] |= 1U << binding_index;
- channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
- }
+ channel_state->fast_bound_uniform_buffers[stage] |= 1u << binding_index;
+ channel_state->uniform_buffer_binding_sizes[stage][binding_index] = size;
// Stream buffer path to avoid stalling on non-Nvidia drivers or Vulkan
const std::span span = runtime.BindMappedUniformBuffer(stage, binding_index, size);
+#ifdef YUZU_DEBUG
+ ASSERT(binding_index < NUM_GRAPHICS_UNIFORM_BUFFERS);
+ ASSERT(span.size() >= size && "UBO stream span too small");
+ if (!device_memory.ReadBlockFastChecked(device_addr, span.data(), size)) {
+ LOG_CRITICAL(Render, "DeviceMemory OOB/unmapped: addr=0x{:x} size={}", device_addr, size);
+ channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
+ ASSERT(false);
+ return;
+ }
+#else
device_memory.ReadBlockUnsafe(device_addr, span.data(), size);
+#endif
return;
}
// Classic cached path
@@ -830,7 +838,8 @@ void BufferCache::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
// Skip binding if it's not needed and if the bound buffer is not the fast version
// This exists to avoid instances where the fast buffer is bound and a GPU write happens
- needs_bind |= HasFastUniformBufferBound(stage, binding_index);
+ const bool was_fast_bound = HasFastUniformBufferBound(stage, binding_index);
+ needs_bind |= was_fast_bound;
if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) {
needs_bind |= channel_state->uniform_buffer_binding_sizes[stage][binding_index] != size;
}
@@ -839,9 +848,6 @@ void BufferCache
::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
}
const u32 offset = buffer.Offset(device_addr);
if constexpr (IS_OPENGL) {
- // Fast buffer will be unbound
- channel_state->fast_bound_uniform_buffers[stage] &= ~(1U << binding_index);
-
// Mark the index as dirty if offset doesn't match
const bool is_copy_bind = offset != 0 && !runtime.SupportsNonZeroUniformOffset();
channel_state->dirty_uniform_buffers[stage] |= (is_copy_bind ? 1U : 0U) << index;
@@ -855,6 +861,7 @@ void BufferCache
::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32
} else {
runtime.BindUniformBuffer(buffer, offset, size);
}
+ channel_state->fast_bound_uniform_buffers[stage] &= ~(1u << binding_index);
}
template
@@ -1789,12 +1796,7 @@ std::span BufferCache::ImmediateBuffer(size_t wanted_capacity) {
template
bool BufferCache::HasFastUniformBufferBound(size_t stage, u32 binding_index) const noexcept {
- if constexpr (IS_OPENGL) {
- return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1) != 0;
- } else {
- // Only OpenGL has fast uniform buffers
- return false;
- }
+ return ((channel_state->fast_bound_uniform_buffers[stage] >> binding_index) & 1u) != 0;
}
template
diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h
index 486d19fb79..09631ffd83 100644
--- a/src/video_core/buffer_cache/buffer_cache_base.h
+++ b/src/video_core/buffer_cache/buffer_cache_base.h
@@ -53,6 +53,7 @@ constexpr u32 NUM_COMPUTE_UNIFORM_BUFFERS = 8;
constexpr u32 NUM_STORAGE_BUFFERS = 16;
constexpr u32 NUM_TEXTURE_BUFFERS = 32;
constexpr u32 NUM_STAGES = 5;
+static_assert(NUM_GRAPHICS_UNIFORM_BUFFERS <= 32, "fast bitmask must fit u32");
using UniformBufferSizes = std::array, NUM_STAGES>;
using ComputeUniformBufferSizes = std::array;
@@ -137,8 +138,8 @@ public:
u32 written_compute_texture_buffers = 0;
u32 image_compute_texture_buffers = 0;
- std::array uniform_cache_hits{};
- std::array uniform_cache_shots{};
+ std::array uniform_cache_hits{};
+ std::array uniform_cache_shots{};
u32 uniform_buffer_skip_cache_size = DEFAULT_SKIP_CACHE_SIZE;
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
index ecc4f77dc7..0fbe707b04 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp
@@ -25,12 +25,12 @@ namespace {
using namespace Common::Literals;
-// Maximum potential alignment of a Vulkan buffer
-constexpr VkDeviceSize MAX_ALIGNMENT = 256;
+// Minimum alignment we want to enforce for the streaming ring
+constexpr VkDeviceSize MIN_STREAM_ALIGNMENT = 256;
// Stream buffer size in bytes
constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB;
-size_t GetStreamBufferSize(const Device& device) {
+size_t GetStreamBufferSize(const Device& device, VkDeviceSize alignment) {
VkDeviceSize size{0};
if (device.HasDebuggingToolAttached()) {
bool found_heap = false;
@@ -53,8 +53,9 @@ size_t GetStreamBufferSize(const Device& device) {
// Clamp to the configured maximum, align up for safety, and ensure a sane minimum so
// region_size (stream_buffer_size / NUM_SYNCS) never becomes zero.
- const VkDeviceSize aligned = (std::min)(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE);
- const VkDeviceSize min_size = MAX_ALIGNMENT * StagingBufferPool::NUM_SYNCS;
+ const VkDeviceSize aligned =
+ (std::min)(Common::AlignUp(size, alignment), MAX_STREAM_BUFFER_SIZE);
+ const VkDeviceSize min_size = alignment * StagingBufferPool::NUM_SYNCS;
return static_cast((std::max)(aligned, min_size));
}
} // Anonymous namespace
@@ -62,8 +63,10 @@ size_t GetStreamBufferSize(const Device& device) {
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_},
- stream_buffer_size{GetStreamBufferSize(device)}, region_size{stream_buffer_size /
- StagingBufferPool::NUM_SYNCS} {
+ stream_alignment{std::max(device_.GetUniformBufferAlignment(),
+ MIN_STREAM_ALIGNMENT)},
+ stream_buffer_size{GetStreamBufferSize(device_, stream_alignment)},
+ region_size{stream_buffer_size / StagingBufferPool::NUM_SYNCS} {
VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
@@ -116,10 +119,11 @@ void StagingBufferPool::TickFrame() {
}
StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
- const size_t aligned_size = Common::AlignUp(size, MAX_ALIGNMENT);
+ const size_t alignment = static_cast(stream_alignment);
+ const size_t aligned_size = Common::AlignUp(size, alignment);
const bool wraps = iterator + size >= stream_buffer_size;
const size_t new_iterator =
- wraps ? aligned_size : Common::AlignUp(iterator + size, MAX_ALIGNMENT);
+ wraps ? aligned_size : Common::AlignUp(iterator + size, alignment);
const size_t begin_region = wraps ? 0 : Region(iterator);
const size_t last_byte = new_iterator == 0 ? 0 : new_iterator - 1;
const size_t end_region = (std::min)(Region(last_byte) + 1, NUM_SYNCS);
@@ -145,7 +149,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
current_tick);
used_iterator = 0;
iterator = 0;
- free_iterator = size;
+ free_iterator = aligned_size;
const size_t head_last_byte = aligned_size == 0 ? 0 : aligned_size - 1;
const size_t head_end_region = (std::min)(Region(head_last_byte) + 1, NUM_SYNCS);
if (AreRegionsActive(0, head_end_region)) {
@@ -160,7 +164,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
iterator = new_iterator;
if (!wraps) {
- free_iterator = (std::max)(free_iterator, offset + size);
+ free_iterator = (std::max)(free_iterator, offset + aligned_size);
}
return StagingBufferRef{
diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
index f63a203272..5c40ca069f 100644
--- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
+++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.h
@@ -1,3 +1,6 @@
+// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
// SPDX-FileCopyrightText: Copyright 2022 yuzu Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
@@ -102,6 +105,7 @@ private:
MemoryAllocator& memory_allocator;
Scheduler& scheduler;
+ VkDeviceSize stream_alignment;
vk::Buffer stream_buffer;
std::span stream_pointer;
VkDeviceSize stream_buffer_size;
diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp
index 0e0bec2ce3..ef118b89e4 100644
--- a/src/video_core/vulkan_common/vulkan_device.cpp
+++ b/src/video_core/vulkan_common/vulkan_device.cpp
@@ -502,6 +502,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
}
if (is_qualcomm) {
+ if (extensions.shader_float_controls) {
+ LOG_WARNING(Render_Vulkan,
+ "Qualcomm drivers have broken VK_KHR_shader_float_controls");
+ RemoveExtension(extensions.shader_float_controls,
+ VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME);
+ }
LOG_WARNING(Render_Vulkan,
"Qualcomm drivers have a slow VK_KHR_push_descriptor implementation");
//RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME);
@@ -985,6 +991,17 @@ bool Device::GetSuitability(bool requires_swapchain) {
// Set instance version.
instance_version = properties.properties.apiVersion;
+ VkPhysicalDeviceDriverProperties driver_probe_props{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES,
+ };
+ VkPhysicalDeviceProperties2 driver_probe{
+ .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2,
+ .pNext = &driver_probe_props,
+ };
+ physical.GetProperties2(driver_probe);
+ const bool is_qualcomm = driver_probe_props.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY;
+ const bool disable_shader_int64 = is_qualcomm;
+
// Minimum of API version 1.1 is required. (This is well-supported.)
ASSERT(instance_version >= VK_API_VERSION_1_1);
@@ -1095,8 +1112,18 @@ bool Device::GetSuitability(bool requires_swapchain) {
// Perform the feature test.
physical.GetFeatures2(features2);
+ if (disable_shader_int64) {
+ features2.features.shaderInt64 = VK_FALSE;
+ }
+
// Base Vulkan 1.0 features are always valid regardless of instance version.
features.features = features2.features;
+ if (disable_shader_int64) {
+ features.features.shaderInt64 = VK_FALSE;
+ features.shader_atomic_int64.shaderBufferInt64Atomics = VK_FALSE;
+ features.shader_atomic_int64.shaderSharedInt64Atomics = VK_FALSE;
+ LOG_WARNING(Render_Vulkan, "Disabling broken shaderInt64 support on Qualcomm drivers ");
+ }
// Some features are mandatory. Check those.
#define CHECK_FEATURE(feature, name) \
@@ -1137,8 +1164,7 @@ bool Device::GetSuitability(bool requires_swapchain) {
properties.subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
SetNext(next, properties.subgroup_properties);
- // Retrieve relevant extension properties.
- if (extensions.shader_float_controls) {
+ if (is_qualcomm) {
properties.float_controls.sType =
VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES;
SetNext(next, properties.float_controls);
diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h
index cb13f28523..27b80b210c 100644
--- a/src/video_core/vulkan_common/vulkan_device.h
+++ b/src/video_core/vulkan_common/vulkan_device.h
@@ -377,6 +377,10 @@ public:
/// Returns true if shader int64 is supported.
bool IsShaderInt64Supported() const {
+ const auto driver = GetDriverID();
+ if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
+ return false;
+ }
return features.features.shaderInt64;
}
@@ -591,6 +595,10 @@ public:
/// Returns true if the device supports VK_KHR_shader_atomic_int64.
bool IsExtShaderAtomicInt64Supported() const {
+ const auto driver = GetDriverID();
+ if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
+ return false;
+ }
return extensions.shader_atomic_int64;
}