Fix conflicts after new VMA, restore some lost fixes for A6XX (#387)

There were a lot of a duplicate definitions and conflicts after the new VMA changes in master.
All of them were removed and/or deduplicated.
Some A6XX-specific fixes that would have been lost in translation have been restored.

Reviewed-on: #387
Reviewed-by: crueter <crueter@eden-emu.dev>
Co-authored-by: Calchan <denis.dupeyron@gmail.com>
Co-committed-by: Calchan <denis.dupeyron@gmail.com>
This commit is contained in:
Calchan 2025-09-05 19:43:35 +02:00 committed by crueter
parent 60415ad1db
commit f8f5abefb1

View file

@ -242,7 +242,11 @@ namespace Vulkan {
VmaAllocation allocation{};
VkMemoryPropertyFlags property_flags{};
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info);
if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size);
}
vk::Check(result);
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
u8 *data = reinterpret_cast<u8 *>(alloc_info.pMappedData);
@ -256,30 +260,36 @@ namespace Vulkan {
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements &reqs, MemoryUsage usage)
{
// Adreno stands firm - ensure 4KB alignment for Qualcomm GPUs
VkMemoryRequirements adjusted_reqs = reqs;
if (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) {
adjusted_reqs.size = Common::AlignUp(reqs.size, 4096);
}
const auto vma_usage = MemoryUsageVma(usage);
VmaAllocationCreateInfo ci{};
ci.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage);
ci.usage = vma_usage;
ci.memoryTypeBits = reqs.memoryTypeBits & valid_memory_types;
ci.memoryTypeBits = adjusted_reqs.memoryTypeBits & valid_memory_types;
ci.requiredFlags = 0;
ci.preferredFlags = MemoryUsagePreferredVmaFlags(usage);
VmaAllocation a{};
VmaAllocationInfo info{};
VkResult res = vmaAllocateMemory(allocator, &reqs, &ci, &a, &info);
VkResult res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci, &a, &info);
if (res != VK_SUCCESS) {
// Relax 1: drop budget constraint
auto ci2 = ci;
ci2.flags &= ~VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT;
res = vmaAllocateMemory(allocator, &reqs, &ci2, &a, &info);
res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci2, &a, &info);
// Relax 2: if we preferred DEVICE_LOCAL, drop that preference
if (res != VK_SUCCESS && (ci.preferredFlags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT)) {
auto ci3 = ci2;
ci3.preferredFlags &= ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
res = vmaAllocateMemory(allocator, &reqs, &ci3, &a, &info);
res = vmaAllocateMemory(allocator, &adjusted_reqs, &ci3, &a, &info);
}
}
@ -325,234 +335,6 @@ namespace Vulkan {
return MemoryCommit(allocator, a, info);
}
MemoryAllocator* const allocator; ///< Parent memory allocation.
const vk::DeviceMemory memory; ///< Vulkan memory allocation handler.
const u64 allocation_size; ///< Size of this allocation.
const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags.
const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
std::vector<Range> commits; ///< All commit ranges done from this allocation.
std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
};
MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
u64 end_) noexcept
: allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {}
MemoryCommit::~MemoryCommit() {
Release();
}
MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept {
Release();
allocation = std::exchange(rhs.allocation, nullptr);
memory = rhs.memory;
begin = rhs.begin;
end = rhs.end;
span = std::exchange(rhs.span, std::span<u8>{});
return *this;
}
MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept
: allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin},
end{rhs.end}, span{std::exchange(rhs.span, std::span<u8>{})} {}
std::span<u8> MemoryCommit::Map() {
if (span.empty()) {
span = allocation->Map().subspan(begin, end - begin);
}
return span;
}
void MemoryCommit::Release() {
if (allocation) {
allocation->Free(begin);
}
}
MemoryAllocator::MemoryAllocator(const Device& device_)
: device{device_}, allocator{device.GetAllocator()},
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {
// GPUs not supporting rebar may only have a region with less than 256MB host visible/device
// local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to
// the heap running out of memory. With RenderDoc attached and only a small host/device region,
// only allow the stream buffer in this memory heap.
if (device.HasDebuggingToolAttached()) {
using namespace Common::Literals;
ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) {
if (heap.size <= 256_MiB) {
valid_memory_types &= ~(1u << index);
}
});
}
}
MemoryAllocator::~MemoryAllocator() = default;
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = 0,
.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
.memoryTypeBits = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
};
VkImage handle{};
VmaAllocation allocation{};
vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr));
return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation,
device.GetDispatchLoader());
}
vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferredVmaFlags(usage),
.memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
};
VkBuffer handle{};
VmaAllocationInfo alloc_info{};
VmaAllocation allocation{};
VkMemoryPropertyFlags property_flags{};
VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info);
if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size);
}
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData);
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent,
device.GetDispatchLoader());
}
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
// Find the fastest memory flags we can afford with the current requirements
const u32 type_mask = requirements.memoryTypeBits;
const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage);
const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags);
if (std::optional<MemoryCommit> commit = TryCommit(requirements, flags)) {
return std::move(*commit);
}
// Commit has failed, allocate more memory.
const u64 chunk_size = AllocationChunkSize(requirements.size);
if (!TryAllocMemory(flags, type_mask, chunk_size)) {
// TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory.
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
// Commit again, this time it won't fail since there's a fresh allocation above.
// If it does, there's a bug.
return TryCommit(requirements, flags).value();
}
bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
const auto type_opt = FindType(flags, type_mask);
if (!type_opt) {
return false;
}
// Adreno stands firm
const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ?
Common::AlignUp(size, 4096) :
size;
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = nullptr,
.allocationSize = aligned_size,
.memoryTypeIndex = *type_opt,
});
if (!memory) {
return false;
}
allocations.push_back(
std::make_unique<MemoryAllocation>(this, std::move(memory), flags, aligned_size, *type_opt));
return true;
}
void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) {
const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr<MemoryAllocation>::get);
ASSERT(it != allocations.end());
allocations.erase(it);
}
std::optional<MemoryCommit> MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements,
VkMemoryPropertyFlags flags) {
// Conservative, spec-compliant alignment for suballocation
VkDeviceSize eff_align = requirements.alignment;
const auto& limits = device.GetPhysical().GetProperties().limits;
if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) &&
!(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) {
// Non-coherent memory must be invalidated on atom boundary
if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize;
}
// Separate buffers to avoid stalls on tilers
if (buffer_image_granularity > eff_align) {
eff_align = buffer_image_granularity;
}
eff_align = std::bit_ceil(eff_align);
for (auto& allocation : allocations) {
if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) {
continue;
}
if (auto commit = allocation->Commit(requirements.size, eff_align)) {
return commit;
}
}
if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
// Look for non device local commits on failure
return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
}
return std::nullopt;
}
VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask,
VkMemoryPropertyFlags flags) const {
if (FindType(flags, type_mask)) {
// Found a memory type with those requirements
return flags;
}
if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) {
// Remove host cached bit in case it's not supported
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT);
}
if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) {
// Remove device local, if it's not supported by the requested resource
return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
}
ASSERT_MSG(false, "No compatible memory types found");
return 0;
}
std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const {
for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) {
const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags;
if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) {
// The type matches in type and in the wanted properties.
return type_index;
}
}
// Failed to find index
return std::nullopt;
}
} // namespace Vulkan