diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt index b209b48db9..f66423a672 100644 --- a/externals/CMakeLists.txt +++ b/externals/CMakeLists.txt @@ -147,6 +147,10 @@ add_subdirectory(nx_tzdb) # VMA AddJsonPackage(vulkan-memory-allocator) +if (VulkanMemoryAllocator_ADDED AND MSVC) + target_compile_options(VulkanMemoryAllocator INTERFACE /wd4189) +endif() + if (NOT TARGET LLVM::Demangle) add_library(demangle demangle/ItaniumDemangle.cpp) target_include_directories(demangle PUBLIC ./demangle) diff --git a/src/android/app/src/main/jni/CMakeLists.txt b/src/android/app/src/main/jni/CMakeLists.txt index 1e30b16d96..9dbee1fcef 100644 --- a/src/android/app/src/main/jni/CMakeLists.txt +++ b/src/android/app/src/main/jni/CMakeLists.txt @@ -17,7 +17,7 @@ add_library(yuzu-android SHARED set_property(TARGET yuzu-android PROPERTY IMPORTED_LOCATION ${FFmpeg_LIBRARY_DIR}) -target_link_libraries(yuzu-android PRIVATE audio_core common core input_common frontend_common Vulkan::Headers) +target_link_libraries(yuzu-android PRIVATE audio_core common core input_common frontend_common Vulkan::Headers GPUOpen::VulkanMemoryAllocator) target_link_libraries(yuzu-android PRIVATE android camera2ndk EGL glad jnigraphics log) if (ARCHITECTURE_arm64) target_link_libraries(yuzu-android PRIVATE adrenotools) diff --git a/src/video_core/vulkan_common/vma.h b/src/video_core/vulkan_common/vma.h index 6e25aa1bdf..911c1114b2 100644 --- a/src/video_core/vulkan_common/vma.h +++ b/src/video_core/vulkan_common/vma.h @@ -1,3 +1,5 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -8,4 +10,4 @@ #define VMA_STATIC_VULKAN_FUNCTIONS 0 #define VMA_DYNAMIC_VULKAN_FUNCTIONS 1 -#include +#include "vk_mem_alloc.h" diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 95c0d974cc..4d74bf00a5 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -753,18 +753,24 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR functions.vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr; functions.vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr; - const VmaAllocatorCreateInfo allocator_info = { - .flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT, - .physicalDevice = physical, - .device = *logical, - .preferredLargeHeapBlockSize = 0, - .pAllocationCallbacks = nullptr, - .pDeviceMemoryCallbacks = nullptr, - .pHeapSizeLimit = nullptr, - .pVulkanFunctions = &functions, - .instance = instance, - .vulkanApiVersion = VK_API_VERSION_1_1, - .pTypeExternalMemoryHandleTypes = nullptr, + VmaAllocatorCreateFlags flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT; + if (extensions.memory_budget) { + flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT; + } + const VmaAllocatorCreateInfo allocator_info{ + .flags = flags, + .physicalDevice = physical, + .device = *logical, + .preferredLargeHeapBlockSize = is_integrated + ? (64u * 1024u * 1024u) + : (256u * 1024u * 1024u), + .pAllocationCallbacks = nullptr, + .pDeviceMemoryCallbacks = nullptr, + .pHeapSizeLimit = nullptr, + .pVulkanFunctions = &functions, + .instance = instance, + .vulkanApiVersion = ApiVersion(), + .pTypeExternalMemoryHandleTypes = nullptr, }; vk::Check(vmaCreateAllocator(&allocator_info, &allocator)); diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 2e37615f99..4ab420afea 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -6,7 +6,10 @@ #include #include +#include #include +#include +#include #include #include "common/alignment.h" @@ -21,379 +24,302 @@ #include "video_core/vulkan_common/vulkan_wrapper.h" namespace Vulkan { -namespace { -struct Range { - u64 begin; - u64 end; + namespace { - [[nodiscard]] bool Contains(u64 iterator, u64 size) const noexcept { - return iterator < end && begin < iterator + size; - } -}; +// Helpers translating MemoryUsage to flags/usage -[[nodiscard]] u64 AllocationChunkSize(u64 required_size) { - static constexpr std::array sizes{ - 0x1000ULL << 10, 0x1400ULL << 10, 0x1800ULL << 10, 0x1c00ULL << 10, 0x2000ULL << 10, - 0x3200ULL << 10, 0x4000ULL << 10, 0x6000ULL << 10, 0x8000ULL << 10, 0xA000ULL << 10, - 0x10000ULL << 10, 0x18000ULL << 10, 0x20000ULL << 10, - }; - static_assert(std::is_sorted(sizes.begin(), sizes.end())); - - const auto it = std::ranges::lower_bound(sizes, required_size); - return it != sizes.end() ? *it : Common::AlignUp(required_size, 4ULL << 20); -} - -[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) { - switch (usage) { - case MemoryUsage::DeviceLocal: - return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; - case MemoryUsage::Upload: - return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - case MemoryUsage::Download: - return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | - VK_MEMORY_PROPERTY_HOST_CACHED_BIT; - case MemoryUsage::Stream: - return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | - VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - } - ASSERT_MSG(false, "Invalid memory usage={}", usage); - return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; -} - -[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) { - return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT - : VkMemoryPropertyFlagBits{}; -} - -[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) { - switch (usage) { - case MemoryUsage::Upload: - case MemoryUsage::Stream: - return VMA_ALLOCATION_CREATE_MAPPED_BIT | - VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; - case MemoryUsage::Download: - return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; - case MemoryUsage::DeviceLocal: - return {}; - } - return {}; -} - -[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) { - switch (usage) { - case MemoryUsage::DeviceLocal: - case MemoryUsage::Stream: - return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; - case MemoryUsage::Upload: - case MemoryUsage::Download: - return VMA_MEMORY_USAGE_AUTO_PREFER_HOST; - } - return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; -} - -} // Anonymous namespace - -class MemoryAllocation { -public: - explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_, - VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) - : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, - property_flags{properties}, shifted_memory_type{1U << type} {} - - MemoryAllocation& operator=(const MemoryAllocation&) = delete; - MemoryAllocation(const MemoryAllocation&) = delete; - - MemoryAllocation& operator=(MemoryAllocation&&) = delete; - MemoryAllocation(MemoryAllocation&&) = delete; - - [[nodiscard]] std::optional Commit(VkDeviceSize size, VkDeviceSize alignment) { - const std::optional alloc = FindFreeRegion(size, alignment); - if (!alloc) { - // Signal out of memory, it'll try to do more allocations. - return std::nullopt; + [[maybe_unused]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::DeviceLocal: + return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + case MemoryUsage::Upload: + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + case MemoryUsage::Download: + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | + VK_MEMORY_PROPERTY_HOST_CACHED_BIT; + case MemoryUsage::Stream: + return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | + VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | + VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + } + ASSERT_MSG(false, "Invalid memory usage={}", usage); + return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; } - const Range range{ - .begin = *alloc, - .end = *alloc + size, + + [[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) { + return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT + : VkMemoryPropertyFlagBits{}; + } + + [[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::Upload: + case MemoryUsage::Stream: + return VMA_ALLOCATION_CREATE_MAPPED_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT; + case MemoryUsage::Download: + return VMA_ALLOCATION_CREATE_MAPPED_BIT | + VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT; + case MemoryUsage::DeviceLocal: + return {}; + } + return {}; + } + + [[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) { + switch (usage) { + case MemoryUsage::DeviceLocal: + case MemoryUsage::Stream: + return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + case MemoryUsage::Upload: + case MemoryUsage::Download: + return VMA_MEMORY_USAGE_AUTO_PREFER_HOST; + } + return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE; + } + + +// This avoids calling vkGetBufferMemoryRequirements* directly. + template + static VkBuffer GetVkHandleFromBuffer(const T &buf) { + if constexpr (requires { static_cast(buf); }) { + return static_cast(buf); + } else if constexpr (requires {{ buf.GetHandle() } -> std::convertible_to; }) { + return buf.GetHandle(); + } else if constexpr (requires {{ buf.Handle() } -> std::convertible_to; }) { + return buf.Handle(); + } else if constexpr (requires {{ buf.vk_handle() } -> std::convertible_to; }) { + return buf.vk_handle(); + } else { + static_assert(sizeof(T) == 0, "Cannot extract VkBuffer handle from vk::Buffer"); + return VK_NULL_HANDLE; + } + } + + } // namespace + +//MemoryCommit is now VMA-backed + MemoryCommit::MemoryCommit(VmaAllocator alloc, VmaAllocation a, + const VmaAllocationInfo &info) noexcept + : allocator{alloc}, allocation{a}, memory{info.deviceMemory}, + offset{info.offset}, size{info.size}, mapped_ptr{info.pMappedData} {} + + MemoryCommit::~MemoryCommit() { Release(); } + + MemoryCommit::MemoryCommit(MemoryCommit &&rhs) noexcept + : allocator{std::exchange(rhs.allocator, nullptr)}, + allocation{std::exchange(rhs.allocation, nullptr)}, + memory{std::exchange(rhs.memory, VK_NULL_HANDLE)}, + offset{std::exchange(rhs.offset, 0)}, + size{std::exchange(rhs.size, 0)}, + mapped_ptr{std::exchange(rhs.mapped_ptr, nullptr)} {} + + MemoryCommit &MemoryCommit::operator=(MemoryCommit &&rhs) noexcept { + if (this != &rhs) { + Release(); + allocator = std::exchange(rhs.allocator, nullptr); + allocation = std::exchange(rhs.allocation, nullptr); + memory = std::exchange(rhs.memory, VK_NULL_HANDLE); + offset = std::exchange(rhs.offset, 0); + size = std::exchange(rhs.size, 0); + mapped_ptr = std::exchange(rhs.mapped_ptr, nullptr); + } + return *this; + } + + std::span MemoryCommit::Map() + { + if (!allocation) return {}; + if (!mapped_ptr) { + if (vmaMapMemory(allocator, allocation, &mapped_ptr) != VK_SUCCESS) return {}; + } + const size_t n = static_cast(std::min(size, + std::numeric_limits::max())); + return std::span{static_cast(mapped_ptr), n}; + } + + std::span MemoryCommit::Map() const + { + if (!allocation) return {}; + if (!mapped_ptr) { + void *p = nullptr; + if (vmaMapMemory(allocator, allocation, &p) != VK_SUCCESS) return {}; + const_cast(this)->mapped_ptr = p; + } + const size_t n = static_cast(std::min(size, + std::numeric_limits::max())); + return std::span{static_cast(mapped_ptr), n}; + } + + void MemoryCommit::Unmap() + { + if (allocation && mapped_ptr) { + vmaUnmapMemory(allocator, allocation); + mapped_ptr = nullptr; + } + } + + void MemoryCommit::Release() { + if (allocation && allocator) { + if (mapped_ptr) { + vmaUnmapMemory(allocator, allocation); + mapped_ptr = nullptr; + } + vmaFreeMemory(allocator, allocation); + } + allocation = nullptr; + allocator = nullptr; + memory = VK_NULL_HANDLE; + offset = 0; + size = 0; + } + + MemoryAllocator::MemoryAllocator(const Device &device_) + : device{device_}, allocator{device.GetAllocator()}, + properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, + buffer_image_granularity{ + device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { + + // Preserve the previous "RenderDoc small heap" trimming behavior that we had in original vma minus the heap bug + if (device.HasDebuggingToolAttached()) + { + using namespace Common::Literals; + ForEachDeviceLocalHostVisibleHeap(device, [this](size_t heap_idx, VkMemoryHeap &heap) { + if (heap.size <= 256_MiB) { + for (u32 t = 0; t < properties.memoryTypeCount; ++t) { + if (properties.memoryTypes[t].heapIndex == heap_idx) { + valid_memory_types &= ~(1u << t); + } + } + } + }); + } + } + + MemoryAllocator::~MemoryAllocator() = default; + + vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo &ci) const + { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .memoryTypeBits = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, }; - commits.insert(std::ranges::upper_bound(commits, *alloc, {}, &Range::begin), range); - return std::make_optional(this, *memory, *alloc, *alloc + size); + + VkImage handle{}; + VmaAllocation allocation{}; + vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr)); + return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation, + device.GetDispatchLoader()); } - void Free(u64 begin) { - const auto it = std::ranges::find(commits, begin, &Range::begin); - ASSERT_MSG(it != commits.end(), "Invalid commit"); - commits.erase(it); - if (commits.empty()) { - // Do not call any code involving 'this' after this call, the object will be destroyed - allocator->ReleaseMemory(this); - } + vk::Buffer + MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const + { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), + .usage = MemoryUsageVma(usage), + .requiredFlags = 0, + .preferredFlags = MemoryUsagePreferredVmaFlags(usage), + .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, + }; + + VkBuffer handle{}; + VmaAllocationInfo alloc_info{}; + VmaAllocation allocation{}; + VkMemoryPropertyFlags property_flags{}; + + vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); + vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); + + u8 *data = reinterpret_cast(alloc_info.pMappedData); + const std::span mapped_data = data ? std::span{data, ci.size} : std::span{}; + const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0; + + return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, + is_coherent, + device.GetDispatchLoader()); } - [[nodiscard]] std::span Map() { - if (memory_mapped_span.empty()) { - u8* const raw_pointer = memory.Map(0, allocation_size); - memory_mapped_span = std::span(raw_pointer, allocation_size); - } - return memory_mapped_span; - } + MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements &reqs, MemoryUsage usage) + { + const auto vma_usage = MemoryUsageVma(usage); + VmaAllocationCreateInfo ci{}; + ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage); + ci.usage = vma_usage; + ci.memoryTypeBits = reqs.memoryTypeBits & valid_memory_types; + ci.requiredFlags = 0; + ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage); - /// Returns whether this allocation is compatible with the arguments. - [[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const { - return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0; - } + VmaAllocation a{}; + VmaAllocationInfo info{}; + VkResult res = vmaAllocateMemory(allocator, &reqs, &ci, &a, &info); -private: - [[nodiscard]] static constexpr u32 ShiftType(u32 type) { - return 1U << type; - } + if (res != VK_SUCCESS) { + // Relax 1: drop budget constraint + auto ci2 = ci; + ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT; + res = vmaAllocateMemory(allocator, &reqs, &ci2, &a, &info); - [[nodiscard]] std::optional FindFreeRegion(u64 size, u64 alignment) noexcept { - ASSERT(std::has_single_bit(alignment)); - const u64 alignment_log2 = std::countr_zero(alignment); - std::optional candidate; - u64 iterator = 0; - auto commit = commits.begin(); - while (iterator + size <= allocation_size) { - candidate = candidate.value_or(iterator); - if (commit == commits.end()) { - break; + // Relax 2: if we preferred DEVICE_LOCAL, drop that preference + if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) { + auto ci3 = ci2; + ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + res = vmaAllocateMemory(allocator, &reqs, &ci3, &a, &info); } - if (commit->Contains(*candidate, size)) { - candidate = std::nullopt; + } + + vk::Check(res); + return MemoryCommit(allocator, a, info); + } + + MemoryCommit MemoryAllocator::Commit(const vk::Buffer &buffer, MemoryUsage usage) { + // Allocate memory appropriate for this buffer automatically + const auto vma_usage = MemoryUsageVma(usage); + + VmaAllocationCreateInfo ci{}; + ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage); + ci.usage = vma_usage; + ci.requiredFlags = 0; + ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage); + ci.pool = VK_NULL_HANDLE; + ci.pUserData = nullptr; + ci.priority = 0.0f; + + const VkBuffer raw = *buffer; + + VmaAllocation a{}; + VmaAllocationInfo info{}; + + // Let VMA infer memory requirements from the buffer + VkResult res = vmaAllocateMemoryForBuffer(allocator, raw, &ci, &a, &info); + + if (res != VK_SUCCESS) { + auto ci2 = ci; + ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT; + res = vmaAllocateMemoryForBuffer(allocator, raw, &ci2, &a, &info); + + if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) { + auto ci3 = ci2; + ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT; + res = vmaAllocateMemoryForBuffer(allocator, raw, &ci3, &a, &info); } - iterator = Common::AlignUpLog2(commit->end, alignment_log2); - ++commit; } - return candidate; + + vk::Check(res); + vk::Check(vmaBindBufferMemory2(allocator, a, 0, raw, nullptr)); + return MemoryCommit(allocator, a, info); } - MemoryAllocator* const allocator; ///< Parent memory allocation. - const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. - const u64 allocation_size; ///< Size of this allocation. - const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. - const u32 shifted_memory_type; ///< Shifted Vulkan memory type. - std::vector commits; ///< All commit ranges done from this allocation. - std::span memory_mapped_span; ///< Memory mapped span. Empty if not queried before. -}; - -MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, - u64 end_) noexcept - : allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {} - -MemoryCommit::~MemoryCommit() { - Release(); -} - -MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept { - Release(); - allocation = std::exchange(rhs.allocation, nullptr); - memory = rhs.memory; - begin = rhs.begin; - end = rhs.end; - span = std::exchange(rhs.span, std::span{}); - return *this; -} - -MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept - : allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin}, - end{rhs.end}, span{std::exchange(rhs.span, std::span{})} {} - -std::span MemoryCommit::Map() { - if (span.empty()) { - span = allocation->Map().subspan(begin, end - begin); - } - return span; -} - -void MemoryCommit::Release() { - if (allocation) { - allocation->Free(begin); - } -} - -MemoryAllocator::MemoryAllocator(const Device& device_) - : device{device_}, allocator{device.GetAllocator()}, - properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, - buffer_image_granularity{ - device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { - // GPUs not supporting rebar may only have a region with less than 256MB host visible/device - // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to - // the heap running out of memory. With RenderDoc attached and only a small host/device region, - // only allow the stream buffer in this memory heap. - if (device.HasDebuggingToolAttached()) { - using namespace Common::Literals; - ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { - if (heap.size <= 256_MiB) { - valid_memory_types &= ~(1u << index); - } - }); - } -} - -MemoryAllocator::~MemoryAllocator() = default; - -vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const { - const VmaAllocationCreateInfo alloc_ci = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, - .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, - .requiredFlags = 0, - .preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, - .memoryTypeBits = 0, - .pool = VK_NULL_HANDLE, - .pUserData = nullptr, - .priority = 0.f, - }; - - VkImage handle{}; - VmaAllocation allocation{}; - - vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr)); - - return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation, - device.GetDispatchLoader()); -} - -vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const { - const VmaAllocationCreateInfo alloc_ci = { - .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), - .usage = MemoryUsageVma(usage), - .requiredFlags = 0, - .preferredFlags = MemoryUsagePreferredVmaFlags(usage), - .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, - .pool = VK_NULL_HANDLE, - .pUserData = nullptr, - .priority = 0.f, - }; - - VkBuffer handle{}; - VmaAllocationInfo alloc_info{}; - VmaAllocation allocation{}; - VkMemoryPropertyFlags property_flags{}; - - vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info)); - vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); - - u8* data = reinterpret_cast(alloc_info.pMappedData); - const std::span mapped_data = data ? std::span{data, ci.size} : std::span{}; - const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; - - return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent, - device.GetDispatchLoader()); -} - -MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { - // Find the fastest memory flags we can afford with the current requirements - const u32 type_mask = requirements.memoryTypeBits; - const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage); - const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags); - if (std::optional commit = TryCommit(requirements, flags)) { - return std::move(*commit); - } - // Commit has failed, allocate more memory. - const u64 chunk_size = AllocationChunkSize(requirements.size); - if (!TryAllocMemory(flags, type_mask, chunk_size)) { - // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. - throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); - } - // Commit again, this time it won't fail since there's a fresh allocation above. - // If it does, there's a bug. - return TryCommit(requirements, flags).value(); - } - -bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { - const auto type_opt = FindType(flags, type_mask); - if (!type_opt) { - return false; - } - - // Adreno stands firm - const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ? - Common::AlignUp(size, 4096) : - size; - - vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ - .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, - .pNext = nullptr, - .allocationSize = aligned_size, - .memoryTypeIndex = *type_opt, - }); - - if (!memory) { - return false; - } - - allocations.push_back( - std::make_unique(this, std::move(memory), flags, aligned_size, *type_opt)); - return true; -} - -void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) { - const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr::get); - ASSERT(it != allocations.end()); - allocations.erase(it); -} - -std::optional MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, - VkMemoryPropertyFlags flags) { - // Conservative, spec-compliant alignment for suballocation - VkDeviceSize eff_align = requirements.alignment; - const auto& limits = device.GetPhysical().GetProperties().limits; - if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && - !(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { - // Non-coherent memory must be invalidated on atom boundary - if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize; - } - // Separate buffers to avoid stalls on tilers - if (buffer_image_granularity > eff_align) { - eff_align = buffer_image_granularity; - } - eff_align = std::bit_ceil(eff_align); - - for (auto& allocation : allocations) { - if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) { - continue; - } - if (auto commit = allocation->Commit(requirements.size, eff_align)) { - return commit; - } - } - if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { - // Look for non device local commits on failure - return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - } - return std::nullopt; -} - -VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, - VkMemoryPropertyFlags flags) const { - if (FindType(flags, type_mask)) { - // Found a memory type with those requirements - return flags; - } - if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) { - // Remove host cached bit in case it's not supported - return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT); - } - if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { - // Remove device local, if it's not supported by the requested resource - return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); - } - ASSERT_MSG(false, "No compatible memory types found"); - return 0; -} - -std::optional MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const { - for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { - const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags; - if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) { - // The type matches in type and in the wanted properties. - return type_index; - } - } - // Failed to find index - return std::nullopt; -} - } // namespace Vulkan diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index 38a182bcba..581f2e66d2 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -6,138 +9,134 @@ #include #include #include + #include "common/common_types.h" #include "video_core/vulkan_common/vulkan_device.h" #include "video_core/vulkan_common/vulkan_wrapper.h" - -VK_DEFINE_HANDLE(VmaAllocator) +#include "video_core/vulkan_common/vma.h" namespace Vulkan { -class Device; -class MemoryMap; -class MemoryAllocation; + class Device; /// Hints and requirements for the backing memory type of a commit -enum class MemoryUsage { - DeviceLocal, ///< Requests device local host visible buffer, falling back to device local - ///< memory. - Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads - Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks - Stream, ///< Requests device local host visible buffer, falling back host memory. -}; + enum class MemoryUsage { + DeviceLocal, ///< Requests device local host visible buffer, falling back to device local memory. + Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads + Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks + Stream, ///< Requests device local host visible buffer, falling back host memory. + }; -template -void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) { - auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; - for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { - auto& memory_type = memory_props.memoryTypes[i]; - if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && - (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { - f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); + template + void ForEachDeviceLocalHostVisibleHeap(const Device &device, F &&f) { + auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties; + for (size_t i = 0; i < memory_props.memoryTypeCount; i++) { + auto &memory_type = memory_props.memoryTypes[i]; + if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) && + (memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) { + f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]); + } } } -} -/// Ownership handle of a memory commitment. -/// Points to a subregion of a memory allocation. -class MemoryCommit { -public: - explicit MemoryCommit() noexcept = default; - explicit MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, - u64 end_) noexcept; - ~MemoryCommit(); +/// Ownership handle of a memory commitment (real VMA allocation). + class MemoryCommit { + public: + MemoryCommit() noexcept = default; - MemoryCommit& operator=(MemoryCommit&&) noexcept; - MemoryCommit(MemoryCommit&&) noexcept; + MemoryCommit(VmaAllocator allocator, VmaAllocation allocation, + const VmaAllocationInfo &info) noexcept; - MemoryCommit& operator=(const MemoryCommit&) = delete; - MemoryCommit(const MemoryCommit&) = delete; + ~MemoryCommit(); - /// Returns a host visible memory map. - /// It will map the backing allocation if it hasn't been mapped before. - std::span Map(); + MemoryCommit(const MemoryCommit &) = delete; - /// Returns the Vulkan memory handler. - VkDeviceMemory Memory() const { - return memory; - } + MemoryCommit &operator=(const MemoryCommit &) = delete; - /// Returns the start position of the commit relative to the allocation. - VkDeviceSize Offset() const { - return static_cast(begin); - } + MemoryCommit(MemoryCommit &&) noexcept; -private: - void Release(); + MemoryCommit &operator=(MemoryCommit &&) noexcept; - MemoryAllocation* allocation{}; ///< Pointer to the large memory allocation. - VkDeviceMemory memory{}; ///< Vulkan device memory handler. - u64 begin{}; ///< Beginning offset in bytes to where the commit exists. - u64 end{}; ///< Offset in bytes where the commit ends. - std::span span; ///< Host visible memory span. Empty if not queried before. -}; + [[nodiscard]] std::span Map(); + + [[nodiscard]] std::span Map() const; + + void Unmap(); + + explicit operator bool() const noexcept { return allocation != nullptr; } + + VkDeviceMemory Memory() const noexcept { return memory; } + + VkDeviceSize Offset() const noexcept { return offset; } + + VkDeviceSize Size() const noexcept { return size; } + + VmaAllocation Allocation() const noexcept { return allocation; } + + private: + void Release(); + + VmaAllocator allocator{}; ///< VMA allocator + VmaAllocation allocation{}; ///< VMA allocation handle + VkDeviceMemory memory{}; ///< Underlying VkDeviceMemory chosen by VMA + VkDeviceSize offset{}; ///< Offset of this allocation inside VkDeviceMemory + VkDeviceSize size{}; ///< Size of the allocation + void *mapped_ptr{}; ///< Optional persistent mapped pointer + }; /// Memory allocator container. /// Allocates and releases memory allocations on demand. -class MemoryAllocator { - friend MemoryAllocation; + class MemoryAllocator { + public: + /** + * Construct memory allocator + * + * @param device_ Device to allocate from + * + * @throw vk::Exception on failure + */ + explicit MemoryAllocator(const Device &device_); -public: - /** - * Construct memory allocator - * - * @param device_ Device to allocate from - * - * @throw vk::Exception on failure - */ - explicit MemoryAllocator(const Device& device_); - ~MemoryAllocator(); + ~MemoryAllocator(); - MemoryAllocator& operator=(const MemoryAllocator&) = delete; - MemoryAllocator(const MemoryAllocator&) = delete; + MemoryAllocator &operator=(const MemoryAllocator &) = delete; - vk::Image CreateImage(const VkImageCreateInfo& ci) const; + MemoryAllocator(const MemoryAllocator &) = delete; - vk::Buffer CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const; + vk::Image CreateImage(const VkImageCreateInfo &ci) const; - /** - * Commits a memory with the specified requirements. - * - * @param requirements Requirements returned from a Vulkan call. - * @param usage Indicates how the memory will be used. - * - * @returns A memory commit. - */ - MemoryCommit Commit(const VkMemoryRequirements& requirements, MemoryUsage usage); + vk::Buffer CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const; - /// Commits memory required by the buffer and binds it. - MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage); + /** + * Commits a memory with the specified requirements. + * + * @param requirements Requirements returned from a Vulkan call. + * @param usage Indicates how the memory will be used. + * + * @returns A memory commit. + */ + MemoryCommit Commit(const VkMemoryRequirements &requirements, MemoryUsage usage); -private: - /// Tries to allocate a chunk of memory. - bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); + /// Commits memory required by the buffer and binds it (for buffers created outside VMA). + MemoryCommit Commit(const vk::Buffer &buffer, MemoryUsage usage); - /// Releases a chunk of memory. - void ReleaseMemory(MemoryAllocation* alloc); + private: + static bool IsAutoUsage(VmaMemoryUsage u) noexcept { + switch (u) { + case VMA_MEMORY_USAGE_AUTO: + case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE: + case VMA_MEMORY_USAGE_AUTO_PREFER_HOST: + return true; + default: + return false; + } + } - /// Tries to allocate a memory commit. - std::optional TryCommit(const VkMemoryRequirements& requirements, - VkMemoryPropertyFlags flags); - - /// Returns the fastest compatible memory property flags from the wanted flags. - VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const; - - /// Returns index to the fastest memory type compatible with the passed requirements. - std::optional FindType(VkMemoryPropertyFlags flags, u32 type_mask) const; - - const Device& device; ///< Device handle. - VmaAllocator allocator; ///< Vma allocator. - const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties. - std::vector> allocations; ///< Current allocations. - VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers - // and optimal images - u32 valid_memory_types{~0u}; -}; + const Device &device; ///< Device handle. + VmaAllocator allocator; ///< VMA allocator. + const VkPhysicalDeviceMemoryProperties properties; ///< Physical device memory properties. + VkDeviceSize buffer_image_granularity; ///< Adjacent buffer/image granularity + u32 valid_memory_types{~0u}; + }; } // namespace Vulkan