diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 83e4857672..535f34e1e1 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -239,7 +239,11 @@ namespace Vulkan { VmaAllocation allocation{}; VkMemoryPropertyFlags property_flags{}; - vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); + VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info); + if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) { + LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size); + } + vk::Check(result); vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); u8 *data = reinterpret_cast(alloc_info.pMappedData); @@ -253,30 +257,36 @@ namespace Vulkan { MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements &reqs, MemoryUsage usage) { + // Adreno stands firm - ensure 4KB alignment for Qualcomm GPUs + VkMemoryRequirements adjusted_reqs = reqs; + if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) { + adjusted_reqs.size = Common::AlignUp(reqs.size, 4096); + } + const auto vma_usage = MemoryUsageVma(usage); VmaAllocationCreateInfo ci{}; ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage); ci.usage = vma_usage; - ci.memoryTypeBits = reqs.memoryTypeBits & valid_memory_types; + ci.memoryTypeBits = adjusted_reqs.memoryTypeBits & valid_memory_types; ci.requiredFlags = 0; ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage); VmaAllocation a{}; VmaAllocationInfo info{}; - VkResult res = vmaAllocateMemory(allocator, &reqs, &ci, &a, &info); + VkResult res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci, &a, &info); if (res != VK_SUCCESS) { // Relax 1: drop budget constraint auto ci2 = ci; ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT; - res = vmaAllocateMemory(allocator, &reqs, &ci2, &a, &info); + res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci2, &a, &info); // Relax 2: if we preferred DEVICE_LOCAL, drop that preference if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) { auto ci3 = ci2; ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - res = vmaAllocateMemory(allocator, &reqs, &ci3, &a, &info); + res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci3, &a, &info); } } @@ -322,234 +332,6 @@ namespace Vulkan { return MemoryCommit(allocator, a, info); } - MemoryAllocator* const allocator; ///< Parent memory allocation. - const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. - const u64 allocation_size; ///< Size of this allocation. - const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. - const u32 shifted_memory_type; ///< Shifted Vulkan memory type. - std::vector commits; ///< All commit ranges done from this allocation. - std::span memory_mapped_span; ///< Memory mapped span. Empty if not queried before. -}; -MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, - u64 end_) noexcept - : allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {} - -MemoryCommit::~MemoryCommit() { - Release(); -} - -MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept { - Release(); - allocation = std::exchange(rhs.allocation, nullptr); - memory = rhs.memory; - begin = rhs.begin; - end = rhs.end; - span = std::exchange(rhs.span, std::span{}); - return *this; -} - -MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept - : allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin}, - end{rhs.end}, span{std::exchange(rhs.span, std::span{})} {} - -std::span MemoryCommit::Map() { - if (span.empty()) { - span = allocation->Map().subspan(begin, end - begin); - } - return span; -} - -void MemoryCommit::Release() { - if (allocation) { - allocation->Free(begin); - } -} - -MemoryAllocator::MemoryAllocator(const Device& device_) - : device{device_}, allocator{device.GetAllocator()}, - properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, - buffer_image_granularity{ - device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { - // GPUs not supporting rebar may only have a region with less than 256MB host visible/device - // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to - // the heap running out of memory. With RenderDoc attached and only a small host/device region, - // only allow the stream buffer in this memory heap. - if (device.HasDebuggingToolAttached()) { - using namespace Common::Literals; - ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { - if (heap.size <= 256_MiB) { - valid_memory_types &= ~(1u << index); - } - }); - } -} - -MemoryAllocator::~MemoryAllocator() = default; - -vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const { - const VmaAllocationCreateInfo alloc_ci = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, - .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, - .requiredFlags = 0, - .preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - .memoryTypeBits = 0, - .pool = VK_NULL_HANDLE, - .pUserData = nullptr, - .priority = 0.f, - }; - - VkImage handle{}; - VmaAllocation allocation{}; - - vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr)); - - return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation, - device.GetDispatchLoader()); -} - -vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const { - const VmaAllocationCreateInfo alloc_ci = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), - .usage = MemoryUsageVma(usage), - .requiredFlags = 0, - .preferredFlags = MemoryUsagePreferredVmaFlags(usage), - .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, - .pool = VK_NULL_HANDLE, - .pUserData = nullptr, - .priority = 0.f, - }; - - VkBuffer handle{}; - VmaAllocationInfo alloc_info{}; - VmaAllocation allocation{}; - VkMemoryPropertyFlags property_flags{}; - - VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info); - if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) { - LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size); - } - vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); - - u8* data = reinterpret_cast(alloc_info.pMappedData); - const std::span mapped_data = data ? std::span{data, ci.size} : std::span{}; - const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - - return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent, - device.GetDispatchLoader()); -} - -MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { - // Find the fastest memory flags we can afford with the current requirements - const u32 type_mask = requirements.memoryTypeBits; - const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage); - const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags); - if (std::optional commit = TryCommit(requirements, flags)) { - return std::move(*commit); - } - // Commit has failed, allocate more memory. - const u64 chunk_size = AllocationChunkSize(requirements.size); - if (!TryAllocMemory(flags, type_mask, chunk_size)) { - // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. - throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - // Commit again, this time it won't fail since there's a fresh allocation above. - // If it does, there's a bug. - return TryCommit(requirements, flags).value(); - } - -bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { - const auto type_opt = FindType(flags, type_mask); - if (!type_opt) { - return false; - } - - // Adreno stands firm - const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ? - Common::AlignUp(size, 4096) : - size; - - vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = nullptr, - .allocationSize = aligned_size, - .memoryTypeIndex = *type_opt, - }); - - if (!memory) { - return false; - } - - allocations.push_back( - std::make_unique(this, std::move(memory), flags, aligned_size, *type_opt)); - return true; -} - -void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) { - const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr::get); - ASSERT(it != allocations.end()); - allocations.erase(it); -} - -std::optional MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, - VkMemoryPropertyFlags flags) { - // Conservative, spec-compliant alignment for suballocation - VkDeviceSize eff_align = requirements.alignment; - const auto& limits = device.GetPhysical().GetProperties().limits; - if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && - !(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - // Non-coherent memory must be invalidated on atom boundary - if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize; - } - // Separate buffers to avoid stalls on tilers - if (buffer_image_granularity > eff_align) { - eff_align = buffer_image_granularity; - } - eff_align = std::bit_ceil(eff_align); - - for (auto& allocation : allocations) { - if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) { - continue; - } - if (auto commit = allocation->Commit(requirements.size, eff_align)) { - return commit; - } - } - if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { - // Look for non device local commits on failure - return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - } - return std::nullopt; -} - -VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, - VkMemoryPropertyFlags flags) const { - if (FindType(flags, type_mask)) { - // Found a memory type with those requirements - return flags; - } - if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) { - // Remove host cached bit in case it's not supported - return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT); - } - if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { - // Remove device local, if it's not supported by the requested resource - return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - } - ASSERT_MSG(false, "No compatible memory types found"); - return 0; -} - -std::optional MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const { - for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { - const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags; - if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) { - // The type matches in type and in the wanted properties. - return type_index; - } - } - // Failed to find index - return std::nullopt; -} } // namespace Vulkan