[VMA] Phase 3:- Hand all allocation & binding to VMA (#362)

This patch completely removes the Custom Sub allocator with VMA and delegates everything to the VMA.
Overall, the patch integrates VMA and simplifies memory management.
Once these changes pass the testing, it will be used as a base for further improvement.
Note to testers, test for stability and performance.

Co-authored-by: crueter <crueter@eden-emu.dev>
Reviewed-on: #362
Reviewed-by: crueter <crueter@eden-emu.dev>
Reviewed-by: MaranBr <maranbr@outlook.com>
Co-authored-by: wildcard <wildcard@eden-emu.dev>
Co-committed-by: wildcard <wildcard@eden-emu.dev>
This commit is contained in:
wildcard 2025-09-01 00:20:03 +02:00 committed by crueter
parent 10c76568b8
commit e60fd4b68b
Signed by: crueter
GPG key ID: 425ACD2D4830EBC6
6 changed files with 411 additions and 474 deletions

View file

@ -147,6 +147,10 @@ add_subdirectory(nx_tzdb)
# VMA
AddJsonPackage(vulkan-memory-allocator)
if (VulkanMemoryAllocator_ADDED AND MSVC)
target_compile_options(VulkanMemoryAllocator INTERFACE /wd4189)
endif()
if (NOT TARGET LLVM::Demangle)
add_library(demangle demangle/ItaniumDemangle.cpp)
target_include_directories(demangle PUBLIC ./demangle)

View file

@ -17,7 +17,7 @@ add_library(yuzu-android SHARED
set_property(TARGET yuzu-android PROPERTY IMPORTED_LOCATION ${FFmpeg_LIBRARY_DIR})
target_link_libraries(yuzu-android PRIVATE audio_core common core input_common frontend_common Vulkan::Headers)
target_link_libraries(yuzu-android PRIVATE audio_core common core input_common frontend_common Vulkan::Headers GPUOpen::VulkanMemoryAllocator)
target_link_libraries(yuzu-android PRIVATE android camera2ndk EGL glad jnigraphics log)
if (ARCHITECTURE_arm64)
target_link_libraries(yuzu-android PRIVATE adrenotools)

View file

@ -1,3 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -8,4 +10,4 @@
#define VMA_STATIC_VULKAN_FUNCTIONS 0
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
#include <vk_mem_alloc.h>
#include "vk_mem_alloc.h"

View file

@ -753,18 +753,24 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
functions.vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr;
functions.vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr;
const VmaAllocatorCreateInfo allocator_info = {
.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT,
.physicalDevice = physical,
.device = *logical,
.preferredLargeHeapBlockSize = 0,
.pAllocationCallbacks = nullptr,
.pDeviceMemoryCallbacks = nullptr,
.pHeapSizeLimit = nullptr,
.pVulkanFunctions = &functions,
.instance = instance,
.vulkanApiVersion = VK_API_VERSION_1_1,
.pTypeExternalMemoryHandleTypes = nullptr,
VmaAllocatorCreateFlags flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
if (extensions.memory_budget) {
flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
}
const VmaAllocatorCreateInfo allocator_info{
.flags = flags,
.physicalDevice = physical,
.device = *logical,
.preferredLargeHeapBlockSize = is_integrated
? (64u * 1024u * 1024u)
: (256u * 1024u * 1024u),
.pAllocationCallbacks = nullptr,
.pDeviceMemoryCallbacks = nullptr,
.pHeapSizeLimit = nullptr,
.pVulkanFunctions = &functions,
.instance = instance,
.vulkanApiVersion = ApiVersion(),
.pTypeExternalMemoryHandleTypes = nullptr,
};
vk::Check(vmaCreateAllocator(&allocator_info, &allocator));

View file

@ -6,7 +6,10 @@
#include <algorithm>
#include <bit>
#include <limits>
#include <optional>
#include <type_traits>
#include <utility>
#include <vector>
#include "common/alignment.h"
@ -21,379 +24,302 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
namespace Vulkan {
namespace {
struct Range {
u64 begin;
u64 end;
namespace {
[[nodiscard]] bool Contains(u64 iterator, u64 size) const noexcept {
return iterator < end && begin < iterator + size;
}
};
// Helpers translating MemoryUsage to flags/usage
[[nodiscard]] u64 AllocationChunkSize(u64 required_size) {
static constexpr std::array sizes{
0x1000ULL << 10, 0x1400ULL << 10, 0x1800ULL << 10, 0x1c00ULL << 10, 0x2000ULL << 10,
0x3200ULL << 10, 0x4000ULL << 10, 0x6000ULL << 10, 0x8000ULL << 10, 0xA000ULL << 10,
0x10000ULL << 10, 0x18000ULL << 10, 0x20000ULL << 10,
};
static_assert(std::is_sorted(sizes.begin(), sizes.end()));
const auto it = std::ranges::lower_bound(sizes, required_size);
return it != sizes.end() ? *it : Common::AlignUp(required_size, 4ULL << 20);
}
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
case MemoryUsage::Upload:
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
case MemoryUsage::Download:
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
case MemoryUsage::Stream:
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) {
return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
: VkMemoryPropertyFlagBits{};
}
[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::Upload:
case MemoryUsage::Stream:
return VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
case MemoryUsage::Download:
return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
case MemoryUsage::DeviceLocal:
return {};
}
return {};
}
[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
case MemoryUsage::Stream:
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
case MemoryUsage::Upload:
case MemoryUsage::Download:
return VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
}
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
}
} // Anonymous namespace
class MemoryAllocation {
public:
explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_,
VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
: allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
property_flags{properties}, shifted_memory_type{1U << type} {}
MemoryAllocation& operator=(const MemoryAllocation&) = delete;
MemoryAllocation(const MemoryAllocation&) = delete;
MemoryAllocation& operator=(MemoryAllocation&&) = delete;
MemoryAllocation(MemoryAllocation&&) = delete;
[[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
const std::optional<u64> alloc = FindFreeRegion(size, alignment);
if (!alloc) {
// Signal out of memory, it'll try to do more allocations.
return std::nullopt;
[[maybe_unused]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
case MemoryUsage::Upload:
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
case MemoryUsage::Download:
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
case MemoryUsage::Stream:
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
const Range range{
.begin = *alloc,
.end = *alloc + size,
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) {
return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
: VkMemoryPropertyFlagBits{};
}
[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::Upload:
case MemoryUsage::Stream:
return VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
case MemoryUsage::Download:
return VMA_ALLOCATION_CREATE_MAPPED_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
case MemoryUsage::DeviceLocal:
return {};
}
return {};
}
[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
case MemoryUsage::Stream:
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
case MemoryUsage::Upload:
case MemoryUsage::Download:
return VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
}
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
}
// This avoids calling vkGetBufferMemoryRequirements* directly.
template<typename T>
static VkBuffer GetVkHandleFromBuffer(const T &buf) {
if constexpr (requires { static_cast<VkBuffer>(buf); }) {
return static_cast<VkBuffer>(buf);
} else if constexpr (requires {{ buf.GetHandle() } -> std::convertible_to<VkBuffer>; }) {
return buf.GetHandle();
} else if constexpr (requires {{ buf.Handle() } -> std::convertible_to<VkBuffer>; }) {
return buf.Handle();
} else if constexpr (requires {{ buf.vk_handle() } -> std::convertible_to<VkBuffer>; }) {
return buf.vk_handle();
} else {
static_assert(sizeof(T) == 0, "Cannot extract VkBuffer handle from vk::Buffer");
return VK_NULL_HANDLE;
}
}
} // namespace
//MemoryCommit is now VMA-backed
MemoryCommit::MemoryCommit(VmaAllocator alloc, VmaAllocation a,
const VmaAllocationInfo &info) noexcept
: allocator{alloc}, allocation{a}, memory{info.deviceMemory},
offset{info.offset}, size{info.size}, mapped_ptr{info.pMappedData} {}
MemoryCommit::~MemoryCommit() { Release(); }
MemoryCommit::MemoryCommit(MemoryCommit &&rhs) noexcept
: allocator{std::exchange(rhs.allocator, nullptr)},
allocation{std::exchange(rhs.allocation, nullptr)},
memory{std::exchange(rhs.memory, VK_NULL_HANDLE)},
offset{std::exchange(rhs.offset, 0)},
size{std::exchange(rhs.size, 0)},
mapped_ptr{std::exchange(rhs.mapped_ptr, nullptr)} {}
MemoryCommit &MemoryCommit::operator=(MemoryCommit &&rhs) noexcept {
if (this != &rhs) {
Release();
allocator = std::exchange(rhs.allocator, nullptr);
allocation = std::exchange(rhs.allocation, nullptr);
memory = std::exchange(rhs.memory, VK_NULL_HANDLE);
offset = std::exchange(rhs.offset, 0);
size = std::exchange(rhs.size, 0);
mapped_ptr = std::exchange(rhs.mapped_ptr, nullptr);
}
return *this;
}
std::span<u8> MemoryCommit::Map()
{
if (!allocation) return {};
if (!mapped_ptr) {
if (vmaMapMemory(allocator, allocation, &mapped_ptr) != VK_SUCCESS) return {};
}
const size_t n = static_cast<size_t>(std::min<VkDeviceSize>(size,
std::numeric_limits<size_t>::max()));
return std::span<u8>{static_cast<u8 *>(mapped_ptr), n};
}
std::span<const u8> MemoryCommit::Map() const
{
if (!allocation) return {};
if (!mapped_ptr) {
void *p = nullptr;
if (vmaMapMemory(allocator, allocation, &p) != VK_SUCCESS) return {};
const_cast<MemoryCommit *>(this)->mapped_ptr = p;
}
const size_t n = static_cast<size_t>(std::min<VkDeviceSize>(size,
std::numeric_limits<size_t>::max()));
return std::span<const u8>{static_cast<const u8 *>(mapped_ptr), n};
}
void MemoryCommit::Unmap()
{
if (allocation && mapped_ptr) {
vmaUnmapMemory(allocator, allocation);
mapped_ptr = nullptr;
}
}
void MemoryCommit::Release() {
if (allocation && allocator) {
if (mapped_ptr) {
vmaUnmapMemory(allocator, allocation);
mapped_ptr = nullptr;
}
vmaFreeMemory(allocator, allocation);
}
allocation = nullptr;
allocator = nullptr;
memory = VK_NULL_HANDLE;
offset = 0;
size = 0;
}
MemoryAllocator::MemoryAllocator(const Device &device_)
: device{device_}, allocator{device.GetAllocator()},
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
// Preserve the previous "RenderDoc small heap" trimming behavior that we had in original vma minus the heap bug
if (device.HasDebuggingToolAttached())
{
using namespace Common::Literals;
ForEachDeviceLocalHostVisibleHeap(device, [this](size_t heap_idx, VkMemoryHeap &heap) {
if (heap.size <= 256_MiB) {
for (u32 t = 0; t < properties.memoryTypeCount; ++t) {
if (properties.memoryTypes[t].heapIndex == heap_idx) {
valid_memory_types &= ~(1u << t);
}
}
}
});
}
}
MemoryAllocator::~MemoryAllocator() = default;
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo &ci) const
{
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = 0,
.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
.memoryTypeBits = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
};
commits.insert(std::ranges::upper_bound(commits, *alloc, {}, &Range::begin), range);
return std::make_optional<MemoryCommit>(this, *memory, *alloc, *alloc + size);
VkImage handle{};
VmaAllocation allocation{};
vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr));
return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation,
device.GetDispatchLoader());
}
void Free(u64 begin) {
const auto it = std::ranges::find(commits, begin, &Range::begin);
ASSERT_MSG(it != commits.end(), "Invalid commit");
commits.erase(it);
if (commits.empty()) {
// Do not call any code involving 'this' after this call, the object will be destroyed
allocator->ReleaseMemory(this);
}
vk::Buffer
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
{
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
};
VkBuffer handle{};
VmaAllocationInfo alloc_info{};
VmaAllocation allocation{};
VkMemoryPropertyFlags property_flags{};
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data,
is_coherent,
device.GetDispatchLoader());
}
[[nodiscard]] std::span<u8> Map() {
if (memory_mapped_span.empty()) {
u8* const raw_pointer = memory.Map(0, allocation_size);
memory_mapped_span = std::span<u8>(raw_pointer, allocation_size);
}
return memory_mapped_span;
}
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements &reqs, MemoryUsage usage)
{
const auto vma_usage = MemoryUsageVma(usage);
VmaAllocationCreateInfo ci{};
ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage);
ci.usage = vma_usage;
ci.memoryTypeBits = reqs.memoryTypeBits & valid_memory_types;
ci.requiredFlags = 0;
ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage);
/// Returns whether this allocation is compatible with the arguments.
[[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0;
}
VmaAllocation a{};
VmaAllocationInfo info{};
VkResult res = vmaAllocateMemory(allocator, &reqs, &ci, &a, &info);
private:
[[nodiscard]] static constexpr u32 ShiftType(u32 type) {
return 1U << type;
}
if (res != VK_SUCCESS) {
// Relax 1: drop budget constraint
auto ci2 = ci;
ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT;
res = vmaAllocateMemory(allocator, &reqs, &ci2, &a, &info);
[[nodiscard]] std::optional<u64> FindFreeRegion(u64 size, u64 alignment) noexcept {
ASSERT(std::has_single_bit(alignment));
const u64 alignment_log2 = std::countr_zero(alignment);
std::optional<u64> candidate;
u64 iterator = 0;
auto commit = commits.begin();
while (iterator + size <= allocation_size) {
candidate = candidate.value_or(iterator);
if (commit == commits.end()) {
break;
// Relax 2: if we preferred DEVICE_LOCAL, drop that preference
if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
auto ci3 = ci2;
ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
res = vmaAllocateMemory(allocator, &reqs, &ci3, &a, &info);
}
if (commit->Contains(*candidate, size)) {
candidate = std::nullopt;
}
vk::Check(res);
return MemoryCommit(allocator, a, info);
}
MemoryCommit MemoryAllocator::Commit(const vk::Buffer &buffer, MemoryUsage usage) {
// Allocate memory appropriate for this buffer automatically
const auto vma_usage = MemoryUsageVma(usage);
VmaAllocationCreateInfo ci{};
ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage);
ci.usage = vma_usage;
ci.requiredFlags = 0;
ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage);
ci.pool = VK_NULL_HANDLE;
ci.pUserData = nullptr;
ci.priority = 0.0f;
const VkBuffer raw = *buffer;
VmaAllocation a{};
VmaAllocationInfo info{};
// Let VMA infer memory requirements from the buffer
VkResult res = vmaAllocateMemoryForBuffer(allocator, raw, &ci, &a, &info);
if (res != VK_SUCCESS) {
auto ci2 = ci;
ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT;
res = vmaAllocateMemoryForBuffer(allocator, raw, &ci2, &a, &info);
if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
auto ci3 = ci2;
ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
res = vmaAllocateMemoryForBuffer(allocator, raw, &ci3, &a, &info);
}
iterator = Common::AlignUpLog2(commit->end, alignment_log2);
++commit;
}
return candidate;
vk::Check(res);
vk::Check(vmaBindBufferMemory2(allocator, a, 0, raw, nullptr));
return MemoryCommit(allocator, a, info);
}
MemoryAllocator* const allocator; ///< Parent memory allocation.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const u64 allocation_size; ///< Size of this allocation.
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
std::vector<Range> commits; ///< All commit ranges done from this allocation.
std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
};
MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
u64 end_) noexcept
: allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {}
MemoryCommit::~MemoryCommit() {
Release();
}
MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept {
Release();
allocation = std::exchange(rhs.allocation, nullptr);
memory = rhs.memory;
begin = rhs.begin;
end = rhs.end;
span = std::exchange(rhs.span, std::span<u8>{});
return *this;
}
MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept
: allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin},
end{rhs.end}, span{std::exchange(rhs.span, std::span<u8>{})} {}
std::span<u8> MemoryCommit::Map() {
if (span.empty()) {
span = allocation->Map().subspan(begin, end - begin);
}
return span;
}
void MemoryCommit::Release() {
if (allocation) {
allocation->Free(begin);
}
}
MemoryAllocator::MemoryAllocator(const Device& device_)
: device{device_}, allocator{device.GetAllocator()},
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
// GPUs not supporting rebar may only have a region with less than 256MB host visible/device
// local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
// the heap running out of memory. With RenderDoc attached and only a small host/device region,
// only allow the stream buffer in this memory heap.
if (device.HasDebuggingToolAttached()) {
using namespace Common::Literals;
ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) {
if (heap.size <= 256_MiB) {
valid_memory_types &= ~(1u << index);
}
});
}
}
MemoryAllocator::~MemoryAllocator() = default;
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = 0,
.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
.memoryTypeBits = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
};
VkImage handle{};
VmaAllocation allocation{};
vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr));
return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation,
device.GetDispatchLoader());
}
vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
};
VkBuffer handle{};
VmaAllocationInfo alloc_info{};
VmaAllocation allocation{};
VkMemoryPropertyFlags property_flags{};
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData);
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent,
device.GetDispatchLoader());
}
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
// Find the fastest memory flags we can afford with the current requirements
const u32 type_mask = requirements.memoryTypeBits;
const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage);
const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags);
if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) {
return std::move(*commit);
}
// Commit has failed, allocate more memory.
const u64 chunk_size = AllocationChunkSize(requirements.size);
if (!TryAllocMemory(flags, type_mask, chunk_size)) {
// TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
// Commit again, this time it won't fail since there's a fresh allocation above.
// If it does, there's a bug.
return TryCommit(requirements, flags).value();
}
bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
const auto type_opt = FindType(flags, type_mask);
if (!type_opt) {
return false;
}
// Adreno stands firm
const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ?
Common::AlignUp(size, 4096) :
size;
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = nullptr,
.allocationSize = aligned_size,
.memoryTypeIndex = *type_opt,
});
if (!memory) {
return false;
}
allocations.push_back(
std::make_unique<MemoryAllocation>(this, std::move(memory), flags, aligned_size, *type_opt));
return true;
}
void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
ASSERT(it != allocations.end());
allocations.erase(it);
}
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags flags) {
// Conservative, spec-compliant alignment for suballocation
VkDeviceSize eff_align = requirements.alignment;
const auto& limits = device.GetPhysical().GetProperties().limits;
if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
!(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
// Non-coherent memory must be invalidated on atom boundary
if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize;
}
// Separate buffers to avoid stalls on tilers
if (buffer_image_granularity > eff_align) {
eff_align = buffer_image_granularity;
}
eff_align = std::bit_ceil(eff_align);
for (auto& allocation : allocations) {
if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) {
continue;
}
if (auto commit = allocation->Commit(requirements.size, eff_align)) {
return commit;
}
}
if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
// Look for non device local commits on failure
return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
}
return std::nullopt;
}
VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
VkMemoryPropertyFlags flags) const {
if (FindType(flags, type_mask)) {
// Found a memory type with those requirements
return flags;
}
if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) {
// Remove host cached bit in case it's not supported
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
}
if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
// Remove device local, if it's not supported by the requested resource
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
}
ASSERT_MSG(false, "No compatible memory types found");
return 0;
}
std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const {
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags;
if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) {
// The type matches in type and in the wanted properties.
return type_index;
}
}
// Failed to find index
return std::nullopt;
}
} // namespace Vulkan

View file

@ -1,3 +1,6 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
@ -6,138 +9,134 @@
#include <memory>
#include <span>
#include <vector>
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_device.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
VK_DEFINE_HANDLE(VmaAllocator)
#include "video_core/vulkan_common/vma.h"
namespace Vulkan {
class Device;
class MemoryMap;
class MemoryAllocation;
class Device;
/// Hints and requirements for the backing memory type of a commit
enum class MemoryUsage {
DeviceLocal, ///< Requests device local host visible buffer, falling back to device local
///< memory.
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
Stream, ///< Requests device local host visible buffer, falling back host memory.
};
enum class MemoryUsage {
DeviceLocal, ///< Requests device local host visible buffer, falling back to device local memory.
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
Stream, ///< Requests device local host visible buffer, falling back host memory.
};
template <typename F>
void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) {
auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties;
for (size_t i = 0; i < memory_props.memoryTypeCount; i++) {
auto& memory_type = memory_props.memoryTypes[i];
if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
(memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]);
template<typename F>
void ForEachDeviceLocalHostVisibleHeap(const Device &device, F &&f) {
auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties;
for (size_t i = 0; i < memory_props.memoryTypeCount; i++) {
auto &memory_type = memory_props.memoryTypes[i];
if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
(memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]);
}
}
}
}
/// Ownership handle of a memory commitment.
/// Points to a subregion of a memory allocation.
class MemoryCommit {
public:
explicit MemoryCommit() noexcept = default;
explicit MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
u64 end_) noexcept;
~MemoryCommit();
/// Ownership handle of a memory commitment (real VMA allocation).
class MemoryCommit {
public:
MemoryCommit() noexcept = default;
MemoryCommit& operator=(MemoryCommit&&) noexcept;
MemoryCommit(MemoryCommit&&) noexcept;
MemoryCommit(VmaAllocator allocator, VmaAllocation allocation,
const VmaAllocationInfo &info) noexcept;
MemoryCommit& operator=(const MemoryCommit&) = delete;
MemoryCommit(const MemoryCommit&) = delete;
~MemoryCommit();
/// Returns a host visible memory map.
/// It will map the backing allocation if it hasn't been mapped before.
std::span<u8> Map();
MemoryCommit(const MemoryCommit &) = delete;
/// Returns the Vulkan memory handler.
VkDeviceMemory Memory() const {
return memory;
}
MemoryCommit &operator=(const MemoryCommit &) = delete;
/// Returns the start position of the commit relative to the allocation.
VkDeviceSize Offset() const {
return static_cast<VkDeviceSize>(begin);
}
MemoryCommit(MemoryCommit &&) noexcept;
private:
void Release();
MemoryCommit &operator=(MemoryCommit &&) noexcept;
MemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
VkDeviceMemory memory{}; ///< Vulkan device memory handler.
u64 begin{}; ///< Beginning offset in bytes to where the commit exists.
u64 end{}; ///< Offset in bytes where the commit ends.
std::span<u8> span; ///< Host visible memory span. Empty if not queried before.
};
[[nodiscard]] std::span<u8> Map();
[[nodiscard]] std::span<const u8> Map() const;
void Unmap();
explicit operator bool() const noexcept { return allocation != nullptr; }
VkDeviceMemory Memory() const noexcept { return memory; }
VkDeviceSize Offset() const noexcept { return offset; }
VkDeviceSize Size() const noexcept { return size; }
VmaAllocation Allocation() const noexcept { return allocation; }
private:
void Release();
VmaAllocator allocator{}; ///< VMA allocator
VmaAllocation allocation{}; ///< VMA allocation handle
VkDeviceMemory memory{}; ///< Underlying VkDeviceMemory chosen by VMA
VkDeviceSize offset{}; ///< Offset of this allocation inside VkDeviceMemory
VkDeviceSize size{}; ///< Size of the allocation
void *mapped_ptr{}; ///< Optional persistent mapped pointer
};
/// Memory allocator container.
/// Allocates and releases memory allocations on demand.
class MemoryAllocator {
friend MemoryAllocation;
class MemoryAllocator {
public:
/**
* Construct memory allocator
*
* @param device_ Device to allocate from
*
* @throw vk::Exception on failure
*/
explicit MemoryAllocator(const Device &device_);
public:
/**
* Construct memory allocator
*
* @param device_ Device to allocate from
*
* @throw vk::Exception on failure
*/
explicit MemoryAllocator(const Device& device_);
~MemoryAllocator();
~MemoryAllocator();
MemoryAllocator& operator=(const MemoryAllocator&) = delete;
MemoryAllocator(const MemoryAllocator&) = delete;
MemoryAllocator &operator=(const MemoryAllocator &) = delete;
vk::Image CreateImage(const VkImageCreateInfo& ci) const;
MemoryAllocator(const MemoryAllocator &) = delete;
vk::Buffer CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const;
vk::Image CreateImage(const VkImageCreateInfo &ci) const;
/**
* Commits a memory with the specified requirements.
*
* @param requirements Requirements returned from a Vulkan call.
* @param usage Indicates how the memory will be used.
*
* @returns A memory commit.
*/
MemoryCommit Commit(const VkMemoryRequirements& requirements, MemoryUsage usage);
vk::Buffer CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const;
/// Commits memory required by the buffer and binds it.
MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
/**
* Commits a memory with the specified requirements.
*
* @param requirements Requirements returned from a Vulkan call.
* @param usage Indicates how the memory will be used.
*
* @returns A memory commit.
*/
MemoryCommit Commit(const VkMemoryRequirements &requirements, MemoryUsage usage);
private:
/// Tries to allocate a chunk of memory.
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
/// Commits memory required by the buffer and binds it (for buffers created outside VMA).
MemoryCommit Commit(const vk::Buffer &buffer, MemoryUsage usage);
/// Releases a chunk of memory.
void ReleaseMemory(MemoryAllocation* alloc);
private:
static bool IsAutoUsage(VmaMemoryUsage u) noexcept {
switch (u) {
case VMA_MEMORY_USAGE_AUTO:
case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE:
case VMA_MEMORY_USAGE_AUTO_PREFER_HOST:
return true;
default:
return false;
}
}
/// Tries to allocate a memory commit.
std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags flags);
/// Returns the fastest compatible memory property flags from the wanted flags.
VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const;
/// Returns index to the fastest memory type compatible with the passed requirements.
std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
const Device& device; ///< Device handle.
VmaAllocator allocator; ///< Vma allocator.
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
// and optimal images
u32 valid_memory_types{~0u};
};
const Device &device; ///< Device handle.
VmaAllocator allocator; ///< VMA allocator.
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device memory properties.
VkDeviceSize buffer_image_granularity; ///< Adjacent buffer/image granularity
u32 valid_memory_types{~0u};
};
} // namespace Vulkan