From 84b8e7bd50c4838c872f2c563f1170ea83466f13 Mon Sep 17 00:00:00 2001 From: Bix Date: Sat, 12 Jul 2025 20:15:19 +0200 Subject: [PATCH] revert cd394fc40f3847878e3646c07fa78a13df4cce30 revert revert [android] Snapdragon 865 patches (#23) revert [android] Snapdragon 865 patches (#23) Co-authored-by: Aleksandr Popovich Reviewed-on: https://git.bixed.xyz/Bix/eden/pulls/23 Reverted due to heavy performance hits on Android with higher specifications, will be adjusted to be included in a specific build for older A6XX devices, as 855, 860, 865, 870, meanwhile it does fix critical issues with certain games crashing due to memory and VRAM usage, hits performance on SoC that can do it without this special flags. --- src/core/arm/nce/arm_nce.cpp | 2 +- src/core/hle/service/sockets/sfdnsres.cpp | 15 +- .../maxwell/translate/impl/warp_shuffle.cpp | 17 +- src/video_core/buffer_cache/buffer_cache.h | 10 +- .../buffer_cache/buffer_cache_base.h | 18 +- src/video_core/host1x/host1x.cpp | 5 + src/video_core/host1x/host1x.h | 5 + .../texture_cache/texture_cache_base.h | 11 + .../vulkan_common/vulkan_memory_allocator.cpp | 230 ++++++++++++++++++ 9 files changed, 305 insertions(+), 8 deletions(-) diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 877e8ac3c7..0e0d72fc8a 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -227,7 +227,7 @@ HaltReason ArmNce::RunThread(Kernel::KThread* thread) { if (auto it = post_handlers.find(m_guest_ctx.pc); it != post_handlers.end()) { hr = ReturnToRunCodeByTrampoline(thread_params, &m_guest_ctx, it->second); } else { - hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params); + hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params); // Android: Use "process handle SIGUSR2 -n true -p true -s false" (and SIGURG) in LLDB when debugging } // Critical section for thread cleanup diff --git a/src/core/hle/service/sockets/sfdnsres.cpp b/src/core/hle/service/sockets/sfdnsres.cpp index b07bd3e58e..d9f01a65b8 100644 --- a/src/core/hle/service/sockets/sfdnsres.cpp +++ b/src/core/hle/service/sockets/sfdnsres.cpp @@ -53,6 +53,16 @@ enum class NetDbError : s32 { NoData = 4, }; +const std::vector blockedDomains = {"srv.nintendo.net", "battle.net", + "microsoft.com", "mojang.com", + "xboxlive.com", "minecraftservices.com"}; + +static bool IsBlockedHost(const std::string& host) { + return std::any_of( + blockedDomains.begin(), blockedDomains.end(), + [&host](const std::string& domain) { return host.find(domain) != std::string::npos; }); +} + static NetDbError GetAddrInfoErrorToNetDbError(GetAddrInfoError result) { // These combinations have been verified on console (but are not // exhaustive). @@ -154,7 +164,7 @@ static std::pair GetHostByNameRequestImpl(HLERequestConte // For now, ignore options, which are in input buffer 1 for GetHostByNameRequestWithOptions. // Prevent resolution of Nintendo servers - if (host.find("srv.nintendo.net") != std::string::npos) { + if (IsBlockedHost(host)) { LOG_WARNING(Network, "Resolution of hostname {} requested, returning EAI_AGAIN", host); return {0, GetAddrInfoError::AGAIN}; } @@ -271,7 +281,7 @@ static std::pair GetAddrInfoRequestImpl(HLERequestContext const std::string host = Common::StringFromBuffer(host_buffer); // Prevent resolution of Nintendo servers - if (host.find("srv.nintendo.net") != std::string::npos) { + if (IsBlockedHost(host)) { LOG_WARNING(Network, "Resolution of hostname {} requested, returning EAI_AGAIN", host); return {0, GetAddrInfoError::AGAIN}; } @@ -359,5 +369,4 @@ void SFDNSRES::ResolverSetOptionRequest(HLERequestContext& ctx) { rb.Push(ResultSuccess); rb.Push(0); // bsd errno } - } // namespace Service::Sockets diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp index 972eec8276..b904821619 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include namespace Shader::Maxwell { namespace { @@ -36,6 +37,17 @@ enum class ShuffleMode : u64 { } } +bool IsKONA() { + std::ifstream machineFile("/sys/devices/soc0/machine"); + if (machineFile.is_open()) { + std::string line; + std::getline(machineFile, line); + if (line == "KONA") + return true; + } + return false; +} + void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { union { u64 insn; @@ -47,7 +59,10 @@ void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32 const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); - v.X(shfl.dest_reg, result); + if (IsKONA()) + v.X(shfl.dest_reg, v.ir.Imm32(0xffffffff)); // This fixes the freeze for Retroid / Snapdragon SD865 + else + v.X(shfl.dest_reg, result); } } // Anonymous namespace diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0cd6861b6d..42510f166f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -26,7 +26,9 @@ BufferCache

::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R void(slot_buffers.insert(runtime, NullBufferParams{})); gpu_modified_ranges.Clear(); inline_buffer_id = NULL_BUFFER_ID; - +#ifdef ANDROID + immediately_free = (Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Aggressive); +#endif if (!runtime.CanReportMemoryUsage()) { minimum_memory = DEFAULT_EXPECTED_MEMORY; critical_memory = DEFAULT_CRITICAL_MEMORY; @@ -1381,6 +1383,8 @@ void BufferCache

::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, }); new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size); runtime.CopyBuffer(new_buffer, overlap, copies, true); + if (immediately_free) + runtime.Finish(); DeleteBuffer(overlap_id, true); } @@ -1671,7 +1675,9 @@ void BufferCache

::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { } Unregister(buffer_id); - delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); + + if (!do_not_mark || !immediately_free) + delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); slot_buffers.erase(buffer_id); if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index a45e9b35f1..eef21a18e5 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -154,7 +154,11 @@ template class BufferCache : public VideoCommon::ChannelSetupCaches { // Page size for caching purposes. // This is unrelated to the CPU page size and it can be changed as it seems optimal. +#ifdef ANDROID + static constexpr u32 CACHING_PAGEBITS = 12; +#else static constexpr u32 CACHING_PAGEBITS = 16; +#endif static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr bool IS_OPENGL = P::IS_OPENGL; @@ -168,9 +172,15 @@ class BufferCache : public VideoCommon::ChannelSetupCaches slot_buffers; - DelayedDestructionRing delayed_destruction_ring; +#ifdef ANDROID + static constexpr size_t TICKS_TO_DESTROY = 6; +#else + static constexpr size_t TICKS_TO_DESTROY = 8; +#endif + DelayedDestructionRing delayed_destruction_ring; const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; @@ -478,6 +493,7 @@ private: u64 minimum_memory = 0; u64 critical_memory = 0; BufferId inline_buffer_id; + bool immediately_free = false; std::array> CACHING_PAGEBITS)> page_table; Common::ScratchBuffer tmp_buffer; diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index 293bca6d79..652d387031 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -18,9 +21,11 @@ Host1x::~Host1x() = default; void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) { switch (type) { case ChannelType::NvDec: + std::call_once(nvdec_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer devices[fd] = std::make_unique(*this, fd, syncpt, frame_queue); break; case ChannelType::VIC: + std::call_once(vic_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer devices[fd] = std::make_unique(*this, fd, syncpt, frame_queue); break; default: diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 8debac93dd..5ecffa442c 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -201,6 +204,8 @@ private: std::unique_ptr> allocator; FrameQueue frame_queue; std::unordered_map> devices; + std::once_flag nvdec_first_init; + std::once_flag vic_first_init; }; } // namespace Tegra::Host1x diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index cbc27344b0..3f67828b86 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -110,10 +110,17 @@ class TextureCache : public VideoCommon::ChannelSetupCaches::max()}; + #ifdef ANDROID + static constexpr s64 TARGET_THRESHOLD = 3_GiB; + static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; + static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; + static constexpr size_t GC_EMERGENCY_COUNTS = 2; + #else static constexpr s64 TARGET_THRESHOLD = 4_GiB; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; static constexpr size_t GC_EMERGENCY_COUNTS = 2; + #endif using Runtime = typename P::Runtime; using Image = typename P::Image; @@ -479,7 +486,11 @@ private: }; Common::LeastRecentlyUsedCache lru_cache; + #ifdef ANDROID + static constexpr size_t TICKS_TO_DESTROY = 6; + #else static constexpr size_t TICKS_TO_DESTROY = 8; +#endif DelayedDestructionRing sentenced_images; DelayedDestructionRing sentenced_image_view; DelayedDestructionRing sentenced_framebuffers; diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 4ab420afea..83e4857672 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -322,4 +322,234 @@ namespace Vulkan { return MemoryCommit(allocator, a, info); } + MemoryAllocator* const allocator; ///< Parent memory allocation. + const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. + const u64 allocation_size; ///< Size of this allocation. + const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. + const u32 shifted_memory_type; ///< Shifted Vulkan memory type. + std::vector commits; ///< All commit ranges done from this allocation. + std::span memory_mapped_span; ///< Memory mapped span. Empty if not queried before. +}; + +MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, + u64 end_) noexcept + : allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {} + +MemoryCommit::~MemoryCommit() { + Release(); +} + +MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept { + Release(); + allocation = std::exchange(rhs.allocation, nullptr); + memory = rhs.memory; + begin = rhs.begin; + end = rhs.end; + span = std::exchange(rhs.span, std::span{}); + return *this; +} + +MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept + : allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin}, + end{rhs.end}, span{std::exchange(rhs.span, std::span{})} {} + +std::span MemoryCommit::Map() { + if (span.empty()) { + span = allocation->Map().subspan(begin, end - begin); + } + return span; +} + +void MemoryCommit::Release() { + if (allocation) { + allocation->Free(begin); + } +} + +MemoryAllocator::MemoryAllocator(const Device& device_) + : device{device_}, allocator{device.GetAllocator()}, + properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, + buffer_image_granularity{ + device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { + // GPUs not supporting rebar may only have a region with less than 256MB host visible/device + // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to + // the heap running out of memory. With RenderDoc attached and only a small host/device region, + // only allow the stream buffer in this memory heap. + if (device.HasDebuggingToolAttached()) { + using namespace Common::Literals; + ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { + if (heap.size <= 256_MiB) { + valid_memory_types &= ~(1u << index); + } + }); + } +} + +MemoryAllocator::~MemoryAllocator() = default; + +vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .memoryTypeBits = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, + }; + + VkImage handle{}; + VmaAllocation allocation{}; + + vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr)); + + return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation, + device.GetDispatchLoader()); +} + +vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), + .usage = MemoryUsageVma(usage), + .requiredFlags = 0, + .preferredFlags = MemoryUsagePreferredVmaFlags(usage), + .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, + }; + + VkBuffer handle{}; + VmaAllocationInfo alloc_info{}; + VmaAllocation allocation{}; + VkMemoryPropertyFlags property_flags{}; + + VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info); + if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) { + LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size); + } + vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); + + u8* data = reinterpret_cast(alloc_info.pMappedData); + const std::span mapped_data = data ? std::span{data, ci.size} : std::span{}; + const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent, + device.GetDispatchLoader()); +} + +MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { + // Find the fastest memory flags we can afford with the current requirements + const u32 type_mask = requirements.memoryTypeBits; + const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage); + const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags); + if (std::optional commit = TryCommit(requirements, flags)) { + return std::move(*commit); + } + // Commit has failed, allocate more memory. + const u64 chunk_size = AllocationChunkSize(requirements.size); + if (!TryAllocMemory(flags, type_mask, chunk_size)) { + // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. + throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + // Commit again, this time it won't fail since there's a fresh allocation above. + // If it does, there's a bug. + return TryCommit(requirements, flags).value(); + } + +bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { + const auto type_opt = FindType(flags, type_mask); + if (!type_opt) { + return false; + } + + // Adreno stands firm + const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ? + Common::AlignUp(size, 4096) : + size; + + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = aligned_size, + .memoryTypeIndex = *type_opt, + }); + + if (!memory) { + return false; + } + + allocations.push_back( + std::make_unique(this, std::move(memory), flags, aligned_size, *type_opt)); + return true; +} + +void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) { + const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr::get); + ASSERT(it != allocations.end()); + allocations.erase(it); +} + +std::optional MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags flags) { + // Conservative, spec-compliant alignment for suballocation + VkDeviceSize eff_align = requirements.alignment; + const auto& limits = device.GetPhysical().GetProperties().limits; + if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + !(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + // Non-coherent memory must be invalidated on atom boundary + if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize; + } + // Separate buffers to avoid stalls on tilers + if (buffer_image_granularity > eff_align) { + eff_align = buffer_image_granularity; + } + eff_align = std::bit_ceil(eff_align); + + for (auto& allocation : allocations) { + if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) { + continue; + } + if (auto commit = allocation->Commit(requirements.size, eff_align)) { + return commit; + } + } + if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + // Look for non device local commits on failure + return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + return std::nullopt; +} + +VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, + VkMemoryPropertyFlags flags) const { + if (FindType(flags, type_mask)) { + // Found a memory type with those requirements + return flags; + } + if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) { + // Remove host cached bit in case it's not supported + return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + } + if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + // Remove device local, if it's not supported by the requested resource + return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + ASSERT_MSG(false, "No compatible memory types found"); + return 0; +} + +std::optional MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const { + for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { + const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags; + if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) { + // The type matches in type and in the wanted properties. + return type_index; + } + } + // Failed to find index + return std::nullopt; +} + } // namespace Vulkan