[VMA] Phase 3:- Hand all allocation & binding to VMA (#362)
This patch completely removes the Custom Sub allocator with VMA and delegates everything to the VMA. Overall, the patch integrates VMA and simplifies memory management. Once these changes pass the testing, it will be used as a base for further improvement. Note to testers, test for stability and performance. Co-authored-by: crueter <crueter@eden-emu.dev> Reviewed-on: #362 Reviewed-by: crueter <crueter@eden-emu.dev> Reviewed-by: MaranBr <maranbr@outlook.com> Co-authored-by: wildcard <wildcard@eden-emu.dev> Co-committed-by: wildcard <wildcard@eden-emu.dev>
This commit is contained in:
parent
10c76568b8
commit
e60fd4b68b
6 changed files with 411 additions and 474 deletions
4
externals/CMakeLists.txt
vendored
4
externals/CMakeLists.txt
vendored
|
@ -147,6 +147,10 @@ add_subdirectory(nx_tzdb)
|
|||
# VMA
|
||||
AddJsonPackage(vulkan-memory-allocator)
|
||||
|
||||
if (VulkanMemoryAllocator_ADDED AND MSVC)
|
||||
target_compile_options(VulkanMemoryAllocator INTERFACE /wd4189)
|
||||
endif()
|
||||
|
||||
if (NOT TARGET LLVM::Demangle)
|
||||
add_library(demangle demangle/ItaniumDemangle.cpp)
|
||||
target_include_directories(demangle PUBLIC ./demangle)
|
||||
|
|
|
@ -17,7 +17,7 @@ add_library(yuzu-android SHARED
|
|||
|
||||
set_property(TARGET yuzu-android PROPERTY IMPORTED_LOCATION ${FFmpeg_LIBRARY_DIR})
|
||||
|
||||
target_link_libraries(yuzu-android PRIVATE audio_core common core input_common frontend_common Vulkan::Headers)
|
||||
target_link_libraries(yuzu-android PRIVATE audio_core common core input_common frontend_common Vulkan::Headers GPUOpen::VulkanMemoryAllocator)
|
||||
target_link_libraries(yuzu-android PRIVATE android camera2ndk EGL glad jnigraphics log)
|
||||
if (ARCHITECTURE_arm64)
|
||||
target_link_libraries(yuzu-android PRIVATE adrenotools)
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -8,4 +10,4 @@
|
|||
#define VMA_STATIC_VULKAN_FUNCTIONS 0
|
||||
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
|
||||
|
||||
#include <vk_mem_alloc.h>
|
||||
#include "vk_mem_alloc.h"
|
||||
|
|
|
@ -753,17 +753,23 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
|
|||
functions.vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr;
|
||||
functions.vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr;
|
||||
|
||||
const VmaAllocatorCreateInfo allocator_info = {
|
||||
.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT,
|
||||
VmaAllocatorCreateFlags flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT;
|
||||
if (extensions.memory_budget) {
|
||||
flags |= VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT;
|
||||
}
|
||||
const VmaAllocatorCreateInfo allocator_info{
|
||||
.flags = flags,
|
||||
.physicalDevice = physical,
|
||||
.device = *logical,
|
||||
.preferredLargeHeapBlockSize = 0,
|
||||
.preferredLargeHeapBlockSize = is_integrated
|
||||
? (64u * 1024u * 1024u)
|
||||
: (256u * 1024u * 1024u),
|
||||
.pAllocationCallbacks = nullptr,
|
||||
.pDeviceMemoryCallbacks = nullptr,
|
||||
.pHeapSizeLimit = nullptr,
|
||||
.pVulkanFunctions = &functions,
|
||||
.instance = instance,
|
||||
.vulkanApiVersion = VK_API_VERSION_1_1,
|
||||
.vulkanApiVersion = ApiVersion(),
|
||||
.pTypeExternalMemoryHandleTypes = nullptr,
|
||||
};
|
||||
|
||||
|
|
|
@ -6,7 +6,10 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <bit>
|
||||
#include <limits>
|
||||
#include <optional>
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "common/alignment.h"
|
||||
|
@ -21,65 +24,51 @@
|
|||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
namespace Vulkan {
|
||||
namespace {
|
||||
struct Range {
|
||||
u64 begin;
|
||||
u64 end;
|
||||
namespace {
|
||||
|
||||
[[nodiscard]] bool Contains(u64 iterator, u64 size) const noexcept {
|
||||
return iterator < end && begin < iterator + size;
|
||||
}
|
||||
};
|
||||
// Helpers translating MemoryUsage to flags/usage
|
||||
|
||||
[[nodiscard]] u64 AllocationChunkSize(u64 required_size) {
|
||||
static constexpr std::array sizes{
|
||||
0x1000ULL << 10, 0x1400ULL << 10, 0x1800ULL << 10, 0x1c00ULL << 10, 0x2000ULL << 10,
|
||||
0x3200ULL << 10, 0x4000ULL << 10, 0x6000ULL << 10, 0x8000ULL << 10, 0xA000ULL << 10,
|
||||
0x10000ULL << 10, 0x18000ULL << 10, 0x20000ULL << 10,
|
||||
};
|
||||
static_assert(std::is_sorted(sizes.begin(), sizes.end()));
|
||||
|
||||
const auto it = std::ranges::lower_bound(sizes, required_size);
|
||||
return it != sizes.end() ? *it : Common::AlignUp(required_size, 4ULL << 20);
|
||||
}
|
||||
|
||||
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) {
|
||||
[[maybe_unused]] VkMemoryPropertyFlags MemoryUsagePropertyFlags(MemoryUsage usage) {
|
||||
switch (usage) {
|
||||
case MemoryUsage::DeviceLocal:
|
||||
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
case MemoryUsage::Upload:
|
||||
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
case MemoryUsage::Download:
|
||||
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
|
||||
case MemoryUsage::Stream:
|
||||
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
}
|
||||
ASSERT_MSG(false, "Invalid memory usage={}", usage);
|
||||
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) {
|
||||
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferredVmaFlags(MemoryUsage usage) {
|
||||
return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
|
||||
: VkMemoryPropertyFlagBits{};
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
|
||||
[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
|
||||
switch (usage) {
|
||||
case MemoryUsage::Upload:
|
||||
case MemoryUsage::Stream:
|
||||
return VMA_ALLOCATION_CREATE_MAPPED_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
|
||||
case MemoryUsage::Download:
|
||||
return VMA_ALLOCATION_CREATE_MAPPED_BIT | VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
|
||||
return VMA_ALLOCATION_CREATE_MAPPED_BIT |
|
||||
VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
|
||||
case MemoryUsage::DeviceLocal:
|
||||
return {};
|
||||
}
|
||||
return {};
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) {
|
||||
[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) {
|
||||
switch (usage) {
|
||||
case MemoryUsage::DeviceLocal:
|
||||
case MemoryUsage::Stream:
|
||||
|
@ -89,152 +78,130 @@ struct Range {
|
|||
return VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
|
||||
}
|
||||
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
class MemoryAllocation {
|
||||
public:
|
||||
explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_,
|
||||
VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type)
|
||||
: allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
|
||||
property_flags{properties}, shifted_memory_type{1U << type} {}
|
||||
|
||||
MemoryAllocation& operator=(const MemoryAllocation&) = delete;
|
||||
MemoryAllocation(const MemoryAllocation&) = delete;
|
||||
|
||||
MemoryAllocation& operator=(MemoryAllocation&&) = delete;
|
||||
MemoryAllocation(MemoryAllocation&&) = delete;
|
||||
|
||||
[[nodiscard]] std::optional<MemoryCommit> Commit(VkDeviceSize size, VkDeviceSize alignment) {
|
||||
const std::optional<u64> alloc = FindFreeRegion(size, alignment);
|
||||
if (!alloc) {
|
||||
// Signal out of memory, it'll try to do more allocations.
|
||||
return std::nullopt;
|
||||
}
|
||||
const Range range{
|
||||
.begin = *alloc,
|
||||
.end = *alloc + size,
|
||||
};
|
||||
commits.insert(std::ranges::upper_bound(commits, *alloc, {}, &Range::begin), range);
|
||||
return std::make_optional<MemoryCommit>(this, *memory, *alloc, *alloc + size);
|
||||
}
|
||||
|
||||
void Free(u64 begin) {
|
||||
const auto it = std::ranges::find(commits, begin, &Range::begin);
|
||||
ASSERT_MSG(it != commits.end(), "Invalid commit");
|
||||
commits.erase(it);
|
||||
if (commits.empty()) {
|
||||
// Do not call any code involving 'this' after this call, the object will be destroyed
|
||||
allocator->ReleaseMemory(this);
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] std::span<u8> Map() {
|
||||
if (memory_mapped_span.empty()) {
|
||||
u8* const raw_pointer = memory.Map(0, allocation_size);
|
||||
memory_mapped_span = std::span<u8>(raw_pointer, allocation_size);
|
||||
}
|
||||
return memory_mapped_span;
|
||||
}
|
||||
|
||||
/// Returns whether this allocation is compatible with the arguments.
|
||||
[[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
|
||||
return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0;
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
[[nodiscard]] static constexpr u32 ShiftType(u32 type) {
|
||||
return 1U << type;
|
||||
// This avoids calling vkGetBufferMemoryRequirements* directly.
|
||||
template<typename T>
|
||||
static VkBuffer GetVkHandleFromBuffer(const T &buf) {
|
||||
if constexpr (requires { static_cast<VkBuffer>(buf); }) {
|
||||
return static_cast<VkBuffer>(buf);
|
||||
} else if constexpr (requires {{ buf.GetHandle() } -> std::convertible_to<VkBuffer>; }) {
|
||||
return buf.GetHandle();
|
||||
} else if constexpr (requires {{ buf.Handle() } -> std::convertible_to<VkBuffer>; }) {
|
||||
return buf.Handle();
|
||||
} else if constexpr (requires {{ buf.vk_handle() } -> std::convertible_to<VkBuffer>; }) {
|
||||
return buf.vk_handle();
|
||||
} else {
|
||||
static_assert(sizeof(T) == 0, "Cannot extract VkBuffer handle from vk::Buffer");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
[[nodiscard]] std::optional<u64> FindFreeRegion(u64 size, u64 alignment) noexcept {
|
||||
ASSERT(std::has_single_bit(alignment));
|
||||
const u64 alignment_log2 = std::countr_zero(alignment);
|
||||
std::optional<u64> candidate;
|
||||
u64 iterator = 0;
|
||||
auto commit = commits.begin();
|
||||
while (iterator + size <= allocation_size) {
|
||||
candidate = candidate.value_or(iterator);
|
||||
if (commit == commits.end()) {
|
||||
break;
|
||||
}
|
||||
if (commit->Contains(*candidate, size)) {
|
||||
candidate = std::nullopt;
|
||||
}
|
||||
iterator = Common::AlignUpLog2(commit->end, alignment_log2);
|
||||
++commit;
|
||||
}
|
||||
return candidate;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
MemoryAllocator* const allocator; ///< Parent memory allocation.
|
||||
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
|
||||
const u64 allocation_size; ///< Size of this allocation.
|
||||
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
|
||||
const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
|
||||
std::vector<Range> commits; ///< All commit ranges done from this allocation.
|
||||
std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
|
||||
};
|
||||
//MemoryCommit is now VMA-backed
|
||||
MemoryCommit::MemoryCommit(VmaAllocator alloc, VmaAllocation a,
|
||||
const VmaAllocationInfo &info) noexcept
|
||||
: allocator{alloc}, allocation{a}, memory{info.deviceMemory},
|
||||
offset{info.offset}, size{info.size}, mapped_ptr{info.pMappedData} {}
|
||||
|
||||
MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
|
||||
u64 end_) noexcept
|
||||
: allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {}
|
||||
MemoryCommit::~MemoryCommit() { Release(); }
|
||||
|
||||
MemoryCommit::~MemoryCommit() {
|
||||
Release();
|
||||
}
|
||||
|
||||
MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept {
|
||||
MemoryCommit::MemoryCommit(MemoryCommit &&rhs) noexcept
|
||||
: allocator{std::exchange(rhs.allocator, nullptr)},
|
||||
allocation{std::exchange(rhs.allocation, nullptr)},
|
||||
memory{std::exchange(rhs.memory, VK_NULL_HANDLE)},
|
||||
offset{std::exchange(rhs.offset, 0)},
|
||||
size{std::exchange(rhs.size, 0)},
|
||||
mapped_ptr{std::exchange(rhs.mapped_ptr, nullptr)} {}
|
||||
|
||||
MemoryCommit &MemoryCommit::operator=(MemoryCommit &&rhs) noexcept {
|
||||
if (this != &rhs) {
|
||||
Release();
|
||||
allocator = std::exchange(rhs.allocator, nullptr);
|
||||
allocation = std::exchange(rhs.allocation, nullptr);
|
||||
memory = rhs.memory;
|
||||
begin = rhs.begin;
|
||||
end = rhs.end;
|
||||
span = std::exchange(rhs.span, std::span<u8>{});
|
||||
memory = std::exchange(rhs.memory, VK_NULL_HANDLE);
|
||||
offset = std::exchange(rhs.offset, 0);
|
||||
size = std::exchange(rhs.size, 0);
|
||||
mapped_ptr = std::exchange(rhs.mapped_ptr, nullptr);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept
|
||||
: allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin},
|
||||
end{rhs.end}, span{std::exchange(rhs.span, std::span<u8>{})} {}
|
||||
|
||||
std::span<u8> MemoryCommit::Map() {
|
||||
if (span.empty()) {
|
||||
span = allocation->Map().subspan(begin, end - begin);
|
||||
}
|
||||
return span;
|
||||
}
|
||||
|
||||
void MemoryCommit::Release() {
|
||||
if (allocation) {
|
||||
allocation->Free(begin);
|
||||
std::span<u8> MemoryCommit::Map()
|
||||
{
|
||||
if (!allocation) return {};
|
||||
if (!mapped_ptr) {
|
||||
if (vmaMapMemory(allocator, allocation, &mapped_ptr) != VK_SUCCESS) return {};
|
||||
}
|
||||
const size_t n = static_cast<size_t>(std::min<VkDeviceSize>(size,
|
||||
std::numeric_limits<size_t>::max()));
|
||||
return std::span<u8>{static_cast<u8 *>(mapped_ptr), n};
|
||||
}
|
||||
}
|
||||
|
||||
MemoryAllocator::MemoryAllocator(const Device& device_)
|
||||
std::span<const u8> MemoryCommit::Map() const
|
||||
{
|
||||
if (!allocation) return {};
|
||||
if (!mapped_ptr) {
|
||||
void *p = nullptr;
|
||||
if (vmaMapMemory(allocator, allocation, &p) != VK_SUCCESS) return {};
|
||||
const_cast<MemoryCommit *>(this)->mapped_ptr = p;
|
||||
}
|
||||
const size_t n = static_cast<size_t>(std::min<VkDeviceSize>(size,
|
||||
std::numeric_limits<size_t>::max()));
|
||||
return std::span<const u8>{static_cast<const u8 *>(mapped_ptr), n};
|
||||
}
|
||||
|
||||
void MemoryCommit::Unmap()
|
||||
{
|
||||
if (allocation && mapped_ptr) {
|
||||
vmaUnmapMemory(allocator, allocation);
|
||||
mapped_ptr = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void MemoryCommit::Release() {
|
||||
if (allocation && allocator) {
|
||||
if (mapped_ptr) {
|
||||
vmaUnmapMemory(allocator, allocation);
|
||||
mapped_ptr = nullptr;
|
||||
}
|
||||
vmaFreeMemory(allocator, allocation);
|
||||
}
|
||||
allocation = nullptr;
|
||||
allocator = nullptr;
|
||||
memory = VK_NULL_HANDLE;
|
||||
offset = 0;
|
||||
size = 0;
|
||||
}
|
||||
|
||||
MemoryAllocator::MemoryAllocator(const Device &device_)
|
||||
: device{device_}, allocator{device.GetAllocator()},
|
||||
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
|
||||
buffer_image_granularity{
|
||||
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
|
||||
// GPUs not supporting rebar may only have a region with less than 256MB host visible/device
|
||||
// local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
|
||||
// the heap running out of memory. With RenderDoc attached and only a small host/device region,
|
||||
// only allow the stream buffer in this memory heap.
|
||||
if (device.HasDebuggingToolAttached()) {
|
||||
|
||||
// Preserve the previous "RenderDoc small heap" trimming behavior that we had in original vma minus the heap bug
|
||||
if (device.HasDebuggingToolAttached())
|
||||
{
|
||||
using namespace Common::Literals;
|
||||
ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) {
|
||||
ForEachDeviceLocalHostVisibleHeap(device, [this](size_t heap_idx, VkMemoryHeap &heap) {
|
||||
if (heap.size <= 256_MiB) {
|
||||
valid_memory_types &= ~(1u << index);
|
||||
for (u32 t = 0; t < properties.memoryTypeCount; ++t) {
|
||||
if (properties.memoryTypes[t].heapIndex == heap_idx) {
|
||||
valid_memory_types &= ~(1u << t);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MemoryAllocator::~MemoryAllocator() = default;
|
||||
MemoryAllocator::~MemoryAllocator() = default;
|
||||
|
||||
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
|
||||
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo &ci) const
|
||||
{
|
||||
const VmaAllocationCreateInfo alloc_ci = {
|
||||
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
|
||||
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
|
||||
|
@ -248,14 +215,14 @@ vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
|
|||
|
||||
VkImage handle{};
|
||||
VmaAllocation allocation{};
|
||||
|
||||
vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr));
|
||||
|
||||
return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation,
|
||||
device.GetDispatchLoader());
|
||||
}
|
||||
}
|
||||
|
||||
vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
|
||||
vk::Buffer
|
||||
MemoryAllocator::CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const
|
||||
{
|
||||
const VmaAllocationCreateInfo alloc_ci = {
|
||||
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
|
||||
.usage = MemoryUsageVma(usage),
|
||||
|
@ -275,125 +242,84 @@ vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsa
|
|||
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
|
||||
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
|
||||
|
||||
u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData);
|
||||
u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
|
||||
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
|
||||
const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
const bool is_coherent = (property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
|
||||
|
||||
return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent,
|
||||
return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data,
|
||||
is_coherent,
|
||||
device.GetDispatchLoader());
|
||||
}
|
||||
|
||||
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
|
||||
// Find the fastest memory flags we can afford with the current requirements
|
||||
const u32 type_mask = requirements.memoryTypeBits;
|
||||
const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage);
|
||||
const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags);
|
||||
if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) {
|
||||
return std::move(*commit);
|
||||
}
|
||||
// Commit has failed, allocate more memory.
|
||||
const u64 chunk_size = AllocationChunkSize(requirements.size);
|
||||
if (!TryAllocMemory(flags, type_mask, chunk_size)) {
|
||||
// TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
|
||||
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
|
||||
}
|
||||
// Commit again, this time it won't fail since there's a fresh allocation above.
|
||||
// If it does, there's a bug.
|
||||
return TryCommit(requirements, flags).value();
|
||||
}
|
||||
|
||||
bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
|
||||
const auto type_opt = FindType(flags, type_mask);
|
||||
if (!type_opt) {
|
||||
return false;
|
||||
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements &reqs, MemoryUsage usage)
|
||||
{
|
||||
const auto vma_usage = MemoryUsageVma(usage);
|
||||
VmaAllocationCreateInfo ci{};
|
||||
ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage);
|
||||
ci.usage = vma_usage;
|
||||
ci.memoryTypeBits = reqs.memoryTypeBits & valid_memory_types;
|
||||
ci.requiredFlags = 0;
|
||||
ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage);
|
||||
|
||||
VmaAllocation a{};
|
||||
VmaAllocationInfo info{};
|
||||
|
||||
VkResult res = vmaAllocateMemory(allocator, &reqs, &ci, &a, &info);
|
||||
|
||||
if (res != VK_SUCCESS) {
|
||||
// Relax 1: drop budget constraint
|
||||
auto ci2 = ci;
|
||||
ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT;
|
||||
res = vmaAllocateMemory(allocator, &reqs, &ci2, &a, &info);
|
||||
|
||||
// Relax 2: if we preferred DEVICE_LOCAL, drop that preference
|
||||
if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
|
||||
auto ci3 = ci2;
|
||||
ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
res = vmaAllocateMemory(allocator, &reqs, &ci3, &a, &info);
|
||||
}
|
||||
}
|
||||
|
||||
// Adreno stands firm
|
||||
const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ?
|
||||
Common::AlignUp(size, 4096) :
|
||||
size;
|
||||
|
||||
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
|
||||
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
|
||||
.pNext = nullptr,
|
||||
.allocationSize = aligned_size,
|
||||
.memoryTypeIndex = *type_opt,
|
||||
});
|
||||
|
||||
if (!memory) {
|
||||
return false;
|
||||
vk::Check(res);
|
||||
return MemoryCommit(allocator, a, info);
|
||||
}
|
||||
|
||||
allocations.push_back(
|
||||
std::make_unique<MemoryAllocation>(this, std::move(memory), flags, aligned_size, *type_opt));
|
||||
return true;
|
||||
}
|
||||
MemoryCommit MemoryAllocator::Commit(const vk::Buffer &buffer, MemoryUsage usage) {
|
||||
// Allocate memory appropriate for this buffer automatically
|
||||
const auto vma_usage = MemoryUsageVma(usage);
|
||||
|
||||
void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
|
||||
const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
|
||||
ASSERT(it != allocations.end());
|
||||
allocations.erase(it);
|
||||
}
|
||||
VmaAllocationCreateInfo ci{};
|
||||
ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage);
|
||||
ci.usage = vma_usage;
|
||||
ci.requiredFlags = 0;
|
||||
ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage);
|
||||
ci.pool = VK_NULL_HANDLE;
|
||||
ci.pUserData = nullptr;
|
||||
ci.priority = 0.0f;
|
||||
|
||||
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
|
||||
VkMemoryPropertyFlags flags) {
|
||||
// Conservative, spec-compliant alignment for suballocation
|
||||
VkDeviceSize eff_align = requirements.alignment;
|
||||
const auto& limits = device.GetPhysical().GetProperties().limits;
|
||||
if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
|
||||
!(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
|
||||
// Non-coherent memory must be invalidated on atom boundary
|
||||
if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize;
|
||||
}
|
||||
// Separate buffers to avoid stalls on tilers
|
||||
if (buffer_image_granularity > eff_align) {
|
||||
eff_align = buffer_image_granularity;
|
||||
}
|
||||
eff_align = std::bit_ceil(eff_align);
|
||||
const VkBuffer raw = *buffer;
|
||||
|
||||
for (auto& allocation : allocations) {
|
||||
if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) {
|
||||
continue;
|
||||
}
|
||||
if (auto commit = allocation->Commit(requirements.size, eff_align)) {
|
||||
return commit;
|
||||
}
|
||||
}
|
||||
if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
|
||||
// Look for non device local commits on failure
|
||||
return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
VmaAllocation a{};
|
||||
VmaAllocationInfo info{};
|
||||
|
||||
VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
|
||||
VkMemoryPropertyFlags flags) const {
|
||||
if (FindType(flags, type_mask)) {
|
||||
// Found a memory type with those requirements
|
||||
return flags;
|
||||
}
|
||||
if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) {
|
||||
// Remove host cached bit in case it's not supported
|
||||
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
|
||||
}
|
||||
if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
|
||||
// Remove device local, if it's not supported by the requested resource
|
||||
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
|
||||
}
|
||||
ASSERT_MSG(false, "No compatible memory types found");
|
||||
return 0;
|
||||
}
|
||||
// Let VMA infer memory requirements from the buffer
|
||||
VkResult res = vmaAllocateMemoryForBuffer(allocator, raw, &ci, &a, &info);
|
||||
|
||||
std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const {
|
||||
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
|
||||
const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags;
|
||||
if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) {
|
||||
// The type matches in type and in the wanted properties.
|
||||
return type_index;
|
||||
if (res != VK_SUCCESS) {
|
||||
auto ci2 = ci;
|
||||
ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT;
|
||||
res = vmaAllocateMemoryForBuffer(allocator, raw, &ci2, &a, &info);
|
||||
|
||||
if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
|
||||
auto ci3 = ci2;
|
||||
ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
res = vmaAllocateMemoryForBuffer(allocator, raw, &ci3, &a, &info);
|
||||
}
|
||||
}
|
||||
// Failed to find index
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
vk::Check(res);
|
||||
vk::Check(vmaBindBufferMemory2(allocator, a, 0, raw, nullptr));
|
||||
return MemoryCommit(allocator, a, info);
|
||||
}
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2019 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -6,84 +9,85 @@
|
|||
#include <memory>
|
||||
#include <span>
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "video_core/vulkan_common/vulkan_device.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
VK_DEFINE_HANDLE(VmaAllocator)
|
||||
#include "video_core/vulkan_common/vma.h"
|
||||
|
||||
namespace Vulkan {
|
||||
|
||||
class Device;
|
||||
class MemoryMap;
|
||||
class MemoryAllocation;
|
||||
class Device;
|
||||
|
||||
/// Hints and requirements for the backing memory type of a commit
|
||||
enum class MemoryUsage {
|
||||
DeviceLocal, ///< Requests device local host visible buffer, falling back to device local
|
||||
///< memory.
|
||||
enum class MemoryUsage {
|
||||
DeviceLocal, ///< Requests device local host visible buffer, falling back to device local memory.
|
||||
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
|
||||
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
|
||||
Stream, ///< Requests device local host visible buffer, falling back host memory.
|
||||
};
|
||||
};
|
||||
|
||||
template <typename F>
|
||||
void ForEachDeviceLocalHostVisibleHeap(const Device& device, F&& f) {
|
||||
template<typename F>
|
||||
void ForEachDeviceLocalHostVisibleHeap(const Device &device, F &&f) {
|
||||
auto memory_props = device.GetPhysical().GetMemoryProperties().memoryProperties;
|
||||
for (size_t i = 0; i < memory_props.memoryTypeCount; i++) {
|
||||
auto& memory_type = memory_props.memoryTypes[i];
|
||||
auto &memory_type = memory_props.memoryTypes[i];
|
||||
if ((memory_type.propertyFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
|
||||
(memory_type.propertyFlags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) {
|
||||
f(memory_type.heapIndex, memory_props.memoryHeaps[memory_type.heapIndex]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Ownership handle of a memory commitment (real VMA allocation).
|
||||
class MemoryCommit {
|
||||
public:
|
||||
MemoryCommit() noexcept = default;
|
||||
|
||||
MemoryCommit(VmaAllocator allocator, VmaAllocation allocation,
|
||||
const VmaAllocationInfo &info) noexcept;
|
||||
|
||||
/// Ownership handle of a memory commitment.
|
||||
/// Points to a subregion of a memory allocation.
|
||||
class MemoryCommit {
|
||||
public:
|
||||
explicit MemoryCommit() noexcept = default;
|
||||
explicit MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
|
||||
u64 end_) noexcept;
|
||||
~MemoryCommit();
|
||||
|
||||
MemoryCommit& operator=(MemoryCommit&&) noexcept;
|
||||
MemoryCommit(MemoryCommit&&) noexcept;
|
||||
MemoryCommit(const MemoryCommit &) = delete;
|
||||
|
||||
MemoryCommit& operator=(const MemoryCommit&) = delete;
|
||||
MemoryCommit(const MemoryCommit&) = delete;
|
||||
MemoryCommit &operator=(const MemoryCommit &) = delete;
|
||||
|
||||
/// Returns a host visible memory map.
|
||||
/// It will map the backing allocation if it hasn't been mapped before.
|
||||
std::span<u8> Map();
|
||||
MemoryCommit(MemoryCommit &&) noexcept;
|
||||
|
||||
/// Returns the Vulkan memory handler.
|
||||
VkDeviceMemory Memory() const {
|
||||
return memory;
|
||||
}
|
||||
MemoryCommit &operator=(MemoryCommit &&) noexcept;
|
||||
|
||||
/// Returns the start position of the commit relative to the allocation.
|
||||
VkDeviceSize Offset() const {
|
||||
return static_cast<VkDeviceSize>(begin);
|
||||
}
|
||||
[[nodiscard]] std::span<u8> Map();
|
||||
|
||||
private:
|
||||
[[nodiscard]] std::span<const u8> Map() const;
|
||||
|
||||
void Unmap();
|
||||
|
||||
explicit operator bool() const noexcept { return allocation != nullptr; }
|
||||
|
||||
VkDeviceMemory Memory() const noexcept { return memory; }
|
||||
|
||||
VkDeviceSize Offset() const noexcept { return offset; }
|
||||
|
||||
VkDeviceSize Size() const noexcept { return size; }
|
||||
|
||||
VmaAllocation Allocation() const noexcept { return allocation; }
|
||||
|
||||
private:
|
||||
void Release();
|
||||
|
||||
MemoryAllocation* allocation{}; ///< Pointer to the large memory allocation.
|
||||
VkDeviceMemory memory{}; ///< Vulkan device memory handler.
|
||||
u64 begin{}; ///< Beginning offset in bytes to where the commit exists.
|
||||
u64 end{}; ///< Offset in bytes where the commit ends.
|
||||
std::span<u8> span; ///< Host visible memory span. Empty if not queried before.
|
||||
};
|
||||
VmaAllocator allocator{}; ///< VMA allocator
|
||||
VmaAllocation allocation{}; ///< VMA allocation handle
|
||||
VkDeviceMemory memory{}; ///< Underlying VkDeviceMemory chosen by VMA
|
||||
VkDeviceSize offset{}; ///< Offset of this allocation inside VkDeviceMemory
|
||||
VkDeviceSize size{}; ///< Size of the allocation
|
||||
void *mapped_ptr{}; ///< Optional persistent mapped pointer
|
||||
};
|
||||
|
||||
/// Memory allocator container.
|
||||
/// Allocates and releases memory allocations on demand.
|
||||
class MemoryAllocator {
|
||||
friend MemoryAllocation;
|
||||
|
||||
public:
|
||||
class MemoryAllocator {
|
||||
public:
|
||||
/**
|
||||
* Construct memory allocator
|
||||
*
|
||||
|
@ -91,15 +95,17 @@ public:
|
|||
*
|
||||
* @throw vk::Exception on failure
|
||||
*/
|
||||
explicit MemoryAllocator(const Device& device_);
|
||||
explicit MemoryAllocator(const Device &device_);
|
||||
|
||||
~MemoryAllocator();
|
||||
|
||||
MemoryAllocator& operator=(const MemoryAllocator&) = delete;
|
||||
MemoryAllocator(const MemoryAllocator&) = delete;
|
||||
MemoryAllocator &operator=(const MemoryAllocator &) = delete;
|
||||
|
||||
vk::Image CreateImage(const VkImageCreateInfo& ci) const;
|
||||
MemoryAllocator(const MemoryAllocator &) = delete;
|
||||
|
||||
vk::Buffer CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const;
|
||||
vk::Image CreateImage(const VkImageCreateInfo &ci) const;
|
||||
|
||||
vk::Buffer CreateBuffer(const VkBufferCreateInfo &ci, MemoryUsage usage) const;
|
||||
|
||||
/**
|
||||
* Commits a memory with the specified requirements.
|
||||
|
@ -109,35 +115,28 @@ public:
|
|||
*
|
||||
* @returns A memory commit.
|
||||
*/
|
||||
MemoryCommit Commit(const VkMemoryRequirements& requirements, MemoryUsage usage);
|
||||
MemoryCommit Commit(const VkMemoryRequirements &requirements, MemoryUsage usage);
|
||||
|
||||
/// Commits memory required by the buffer and binds it.
|
||||
MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
|
||||
/// Commits memory required by the buffer and binds it (for buffers created outside VMA).
|
||||
MemoryCommit Commit(const vk::Buffer &buffer, MemoryUsage usage);
|
||||
|
||||
private:
|
||||
/// Tries to allocate a chunk of memory.
|
||||
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
|
||||
private:
|
||||
static bool IsAutoUsage(VmaMemoryUsage u) noexcept {
|
||||
switch (u) {
|
||||
case VMA_MEMORY_USAGE_AUTO:
|
||||
case VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE:
|
||||
case VMA_MEMORY_USAGE_AUTO_PREFER_HOST:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/// Releases a chunk of memory.
|
||||
void ReleaseMemory(MemoryAllocation* alloc);
|
||||
|
||||
/// Tries to allocate a memory commit.
|
||||
std::optional<MemoryCommit> TryCommit(const VkMemoryRequirements& requirements,
|
||||
VkMemoryPropertyFlags flags);
|
||||
|
||||
/// Returns the fastest compatible memory property flags from the wanted flags.
|
||||
VkMemoryPropertyFlags MemoryPropertyFlags(u32 type_mask, VkMemoryPropertyFlags flags) const;
|
||||
|
||||
/// Returns index to the fastest memory type compatible with the passed requirements.
|
||||
std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
|
||||
|
||||
const Device& device; ///< Device handle.
|
||||
VmaAllocator allocator; ///< Vma allocator.
|
||||
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
|
||||
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
|
||||
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
|
||||
// and optimal images
|
||||
const Device &device; ///< Device handle.
|
||||
VmaAllocator allocator; ///< VMA allocator.
|
||||
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device memory properties.
|
||||
VkDeviceSize buffer_image_granularity; ///< Adjacent buffer/image granularity
|
||||
u32 valid_memory_types{~0u};
|
||||
};
|
||||
};
|
||||
|
||||
} // namespace Vulkan
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue