diff --git a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp index 08513d1534..ecc4f77dc7 100644 --- a/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp +++ b/src/video_core/renderer_vulkan/vk_staging_buffer_pool.cpp @@ -33,19 +33,29 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_SIZE = 128_MiB; size_t GetStreamBufferSize(const Device& device) { VkDeviceSize size{0}; if (device.HasDebuggingToolAttached()) { - ForEachDeviceLocalHostVisibleHeap(device, [&size](size_t index, VkMemoryHeap& heap) { + bool found_heap = false; + ForEachDeviceLocalHostVisibleHeap(device, [&size, &found_heap](size_t /*index*/, VkMemoryHeap& heap) { size = (std::max)(size, heap.size); + found_heap = true; }); - // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be - // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue - // as the heap will be much larger. - if (size <= 256_MiB) { + // If no suitable heap was found fall back to the default cap to avoid creating a zero-sized stream buffer. + if (!found_heap) { + size = MAX_STREAM_BUFFER_SIZE; + } else if (size <= 256_MiB) { + // If rebar is not supported, cut the max heap size to 40%. This will allow 2 captures to be + // loaded at the same time in RenderDoc. If rebar is supported, this shouldn't be an issue + // as the heap will be much larger. size = size * 40 / 100; } } else { size = MAX_STREAM_BUFFER_SIZE; } - return (std::min)(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); + + // Clamp to the configured maximum, align up for safety, and ensure a sane minimum so + // region_size (stream_buffer_size / NUM_SYNCS) never becomes zero. + const VkDeviceSize aligned = (std::min)(Common::AlignUp(size, MAX_ALIGNMENT), MAX_STREAM_BUFFER_SIZE); + const VkDeviceSize min_size = MAX_ALIGNMENT * StagingBufferPool::NUM_SYNCS; + return static_cast((std::max)(aligned, min_size)); } } // Anonymous namespace @@ -106,31 +116,53 @@ void StagingBufferPool::TickFrame() { } StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) { - if (AreRegionsActive(Region(free_iterator) + 1, - (std::min)(Region(iterator + size) + 1, NUM_SYNCS))) { + const size_t aligned_size = Common::AlignUp(size, MAX_ALIGNMENT); + const bool wraps = iterator + size >= stream_buffer_size; + const size_t new_iterator = + wraps ? aligned_size : Common::AlignUp(iterator + size, MAX_ALIGNMENT); + const size_t begin_region = wraps ? 0 : Region(iterator); + const size_t last_byte = new_iterator == 0 ? 0 : new_iterator - 1; + const size_t end_region = (std::min)(Region(last_byte) + 1, NUM_SYNCS); + const size_t guard_begin = (std::min)(Region(free_iterator) + 1, NUM_SYNCS); + + if (!wraps) { + if (guard_begin < end_region && AreRegionsActive(guard_begin, end_region)) { + // Avoid waiting for the previous usages to be free + return GetStagingBuffer(size, MemoryUsage::Upload); + } + } else if (guard_begin < NUM_SYNCS && AreRegionsActive(guard_begin, NUM_SYNCS)) { // Avoid waiting for the previous usages to be free return GetStagingBuffer(size, MemoryUsage::Upload); } + const u64 current_tick = scheduler.CurrentTick(); std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + Region(iterator), current_tick); used_iterator = iterator; - free_iterator = (std::max)(free_iterator, iterator + size); - if (iterator + size >= stream_buffer_size) { + if (wraps) { std::fill(sync_ticks.begin() + Region(used_iterator), sync_ticks.begin() + NUM_SYNCS, current_tick); used_iterator = 0; iterator = 0; free_iterator = size; - - if (AreRegionsActive(0, Region(size) + 1)) { + const size_t head_last_byte = aligned_size == 0 ? 0 : aligned_size - 1; + const size_t head_end_region = (std::min)(Region(head_last_byte) + 1, NUM_SYNCS); + if (AreRegionsActive(0, head_end_region)) { // Avoid waiting for the previous usages to be free return GetStagingBuffer(size, MemoryUsage::Upload); } } - const size_t offset = iterator; - iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT); + + std::fill(sync_ticks.begin() + begin_region, sync_ticks.begin() + end_region, current_tick); + + const size_t offset = wraps ? 0 : iterator; + iterator = new_iterator; + + if (!wraps) { + free_iterator = (std::max)(free_iterator, offset + size); + } + return StagingBufferRef{ .buffer = *stream_buffer, .offset = static_cast(offset), diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 0e0bec2ce3..ef118b89e4 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -502,6 +502,12 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } if (is_qualcomm) { + if (extensions.shader_float_controls) { + LOG_WARNING(Render_Vulkan, + "Qualcomm drivers have broken VK_KHR_shader_float_controls"); + RemoveExtension(extensions.shader_float_controls, + VK_KHR_SHADER_FLOAT_CONTROLS_EXTENSION_NAME); + } LOG_WARNING(Render_Vulkan, "Qualcomm drivers have a slow VK_KHR_push_descriptor implementation"); //RemoveExtension(extensions.push_descriptor, VK_KHR_PUSH_DESCRIPTOR_EXTENSION_NAME); @@ -985,6 +991,17 @@ bool Device::GetSuitability(bool requires_swapchain) { // Set instance version. instance_version = properties.properties.apiVersion; + VkPhysicalDeviceDriverProperties driver_probe_props{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES, + }; + VkPhysicalDeviceProperties2 driver_probe{ + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2, + .pNext = &driver_probe_props, + }; + physical.GetProperties2(driver_probe); + const bool is_qualcomm = driver_probe_props.driverID == VK_DRIVER_ID_QUALCOMM_PROPRIETARY; + const bool disable_shader_int64 = is_qualcomm; + // Minimum of API version 1.1 is required. (This is well-supported.) ASSERT(instance_version >= VK_API_VERSION_1_1); @@ -1095,8 +1112,18 @@ bool Device::GetSuitability(bool requires_swapchain) { // Perform the feature test. physical.GetFeatures2(features2); + if (disable_shader_int64) { + features2.features.shaderInt64 = VK_FALSE; + } + // Base Vulkan 1.0 features are always valid regardless of instance version. features.features = features2.features; + if (disable_shader_int64) { + features.features.shaderInt64 = VK_FALSE; + features.shader_atomic_int64.shaderBufferInt64Atomics = VK_FALSE; + features.shader_atomic_int64.shaderSharedInt64Atomics = VK_FALSE; + LOG_WARNING(Render_Vulkan, "Disabling broken shaderInt64 support on Qualcomm drivers "); + } // Some features are mandatory. Check those. #define CHECK_FEATURE(feature, name) \ @@ -1137,8 +1164,7 @@ bool Device::GetSuitability(bool requires_swapchain) { properties.subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; SetNext(next, properties.subgroup_properties); - // Retrieve relevant extension properties. - if (extensions.shader_float_controls) { + if (is_qualcomm) { properties.float_controls.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES; SetNext(next, properties.float_controls); diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index bd54144480..eb9c340a4d 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -376,6 +376,10 @@ public: /// Returns true if shader int64 is supported. bool IsShaderInt64Supported() const { + const auto driver = GetDriverID(); + if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + return false; + } return features.features.shaderInt64; } @@ -585,6 +589,10 @@ public: /// Returns true if the device supports VK_KHR_shader_atomic_int64. bool IsExtShaderAtomicInt64Supported() const { + const auto driver = GetDriverID(); + if (driver == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + return false; + } return extensions.shader_atomic_int64; }