diff --git a/src/core/arm/nce/arm_nce.cpp b/src/core/arm/nce/arm_nce.cpp index 877e8ac3c7..0e0d72fc8a 100644 --- a/src/core/arm/nce/arm_nce.cpp +++ b/src/core/arm/nce/arm_nce.cpp @@ -227,7 +227,7 @@ HaltReason ArmNce::RunThread(Kernel::KThread* thread) { if (auto it = post_handlers.find(m_guest_ctx.pc); it != post_handlers.end()) { hr = ReturnToRunCodeByTrampoline(thread_params, &m_guest_ctx, it->second); } else { - hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params); + hr = ReturnToRunCodeByExceptionLevelChange(m_thread_id, thread_params); // Android: Use "process handle SIGUSR2 -n true -p true -s false" (and SIGURG) in LLDB when debugging } // Critical section for thread cleanup diff --git a/src/core/hle/service/sockets/sfdnsres.cpp b/src/core/hle/service/sockets/sfdnsres.cpp index b07bd3e58e..d9f01a65b8 100644 --- a/src/core/hle/service/sockets/sfdnsres.cpp +++ b/src/core/hle/service/sockets/sfdnsres.cpp @@ -53,6 +53,16 @@ enum class NetDbError : s32 { NoData = 4, }; +const std::vector blockedDomains = {"srv.nintendo.net", "battle.net", + "microsoft.com", "mojang.com", + "xboxlive.com", "minecraftservices.com"}; + +static bool IsBlockedHost(const std::string& host) { + return std::any_of( + blockedDomains.begin(), blockedDomains.end(), + [&host](const std::string& domain) { return host.find(domain) != std::string::npos; }); +} + static NetDbError GetAddrInfoErrorToNetDbError(GetAddrInfoError result) { // These combinations have been verified on console (but are not // exhaustive). @@ -154,7 +164,7 @@ static std::pair GetHostByNameRequestImpl(HLERequestConte // For now, ignore options, which are in input buffer 1 for GetHostByNameRequestWithOptions. // Prevent resolution of Nintendo servers - if (host.find("srv.nintendo.net") != std::string::npos) { + if (IsBlockedHost(host)) { LOG_WARNING(Network, "Resolution of hostname {} requested, returning EAI_AGAIN", host); return {0, GetAddrInfoError::AGAIN}; } @@ -271,7 +281,7 @@ static std::pair GetAddrInfoRequestImpl(HLERequestContext const std::string host = Common::StringFromBuffer(host_buffer); // Prevent resolution of Nintendo servers - if (host.find("srv.nintendo.net") != std::string::npos) { + if (IsBlockedHost(host)) { LOG_WARNING(Network, "Resolution of hostname {} requested, returning EAI_AGAIN", host); return {0, GetAddrInfoError::AGAIN}; } @@ -359,5 +369,4 @@ void SFDNSRES::ResolverSetOptionRequest(HLERequestContext& ctx) { rb.Push(ResultSuccess); rb.Push(0); // bsd errno } - } // namespace Service::Sockets diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp index 972eec8276..b904821619 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/warp_shuffle.cpp @@ -7,6 +7,7 @@ #include "common/bit_field.h" #include "common/common_types.h" #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" +#include namespace Shader::Maxwell { namespace { @@ -36,6 +37,17 @@ enum class ShuffleMode : u64 { } } +bool IsKONA() { + std::ifstream machineFile("/sys/devices/soc0/machine"); + if (machineFile.is_open()) { + std::string line; + std::getline(machineFile, line); + if (line == "KONA") + return true; + } + return false; +} + void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32& mask) { union { u64 insn; @@ -47,7 +59,10 @@ void Shuffle(TranslatorVisitor& v, u64 insn, const IR::U32& index, const IR::U32 const IR::U32 result{ShuffleOperation(v.ir, v.X(shfl.src_reg), index, mask, shfl.mode)}; v.ir.SetPred(shfl.pred, v.ir.GetInBoundsFromOp(result)); - v.X(shfl.dest_reg, result); + if (IsKONA()) + v.X(shfl.dest_reg, v.ir.Imm32(0xffffffff)); // This fixes the freeze for Retroid / Snapdragon SD865 + else + v.X(shfl.dest_reg, result); } } // Anonymous namespace diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0cd6861b6d..42510f166f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -26,7 +26,9 @@ BufferCache

::BufferCache(Tegra::MaxwellDeviceMemoryManager& device_memory_, R void(slot_buffers.insert(runtime, NullBufferParams{})); gpu_modified_ranges.Clear(); inline_buffer_id = NULL_BUFFER_ID; - +#ifdef ANDROID + immediately_free = (Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Aggressive); +#endif if (!runtime.CanReportMemoryUsage()) { minimum_memory = DEFAULT_EXPECTED_MEMORY; critical_memory = DEFAULT_CRITICAL_MEMORY; @@ -1381,6 +1383,8 @@ void BufferCache

::JoinOverlap(BufferId new_buffer_id, BufferId overlap_id, }); new_buffer.MarkUsage(copies[0].dst_offset, copies[0].size); runtime.CopyBuffer(new_buffer, overlap, copies, true); + if (immediately_free) + runtime.Finish(); DeleteBuffer(overlap_id, true); } @@ -1671,7 +1675,9 @@ void BufferCache

::DeleteBuffer(BufferId buffer_id, bool do_not_mark) { } Unregister(buffer_id); - delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); + + if (!do_not_mark || !immediately_free) + delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); slot_buffers.erase(buffer_id); if constexpr (HAS_PERSISTENT_UNIFORM_BUFFER_BINDINGS) { diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index a45e9b35f1..eef21a18e5 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -154,7 +154,11 @@ template class BufferCache : public VideoCommon::ChannelSetupCaches { // Page size for caching purposes. // This is unrelated to the CPU page size and it can be changed as it seems optimal. +#ifdef ANDROID + static constexpr u32 CACHING_PAGEBITS = 12; +#else static constexpr u32 CACHING_PAGEBITS = 16; +#endif static constexpr u64 CACHING_PAGESIZE = u64{1} << CACHING_PAGEBITS; static constexpr bool IS_OPENGL = P::IS_OPENGL; @@ -168,9 +172,15 @@ class BufferCache : public VideoCommon::ChannelSetupCaches slot_buffers; - DelayedDestructionRing delayed_destruction_ring; +#ifdef ANDROID + static constexpr size_t TICKS_TO_DESTROY = 6; +#else + static constexpr size_t TICKS_TO_DESTROY = 8; +#endif + DelayedDestructionRing delayed_destruction_ring; const Tegra::Engines::DrawManager::IndirectParams* current_draw_indirect{}; @@ -478,6 +493,7 @@ private: u64 minimum_memory = 0; u64 critical_memory = 0; BufferId inline_buffer_id; + bool immediately_free = false; std::array> CACHING_PAGEBITS)> page_table; Common::ScratchBuffer tmp_buffer; diff --git a/src/video_core/host1x/host1x.cpp b/src/video_core/host1x/host1x.cpp index 293bca6d79..652d387031 100644 --- a/src/video_core/host1x/host1x.cpp +++ b/src/video_core/host1x/host1x.cpp @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -18,9 +21,11 @@ Host1x::~Host1x() = default; void Host1x::StartDevice(s32 fd, ChannelType type, u32 syncpt) { switch (type) { case ChannelType::NvDec: + std::call_once(nvdec_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer devices[fd] = std::make_unique(*this, fd, syncpt, frame_queue); break; case ChannelType::VIC: + std::call_once(vic_first_init, []() {std::this_thread::sleep_for(std::chrono::milliseconds{500});}); // HACK: For Astroneer devices[fd] = std::make_unique(*this, fd, syncpt, frame_queue); break; default: diff --git a/src/video_core/host1x/host1x.h b/src/video_core/host1x/host1x.h index 8debac93dd..5ecffa442c 100644 --- a/src/video_core/host1x/host1x.h +++ b/src/video_core/host1x/host1x.h @@ -1,3 +1,6 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + // SPDX-FileCopyrightText: 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later @@ -201,6 +204,8 @@ private: std::unique_ptr> allocator; FrameQueue frame_queue; std::unordered_map> devices; + std::once_flag nvdec_first_init; + std::once_flag vic_first_init; }; } // namespace Tegra::Host1x diff --git a/src/video_core/texture_cache/texture_cache_base.h b/src/video_core/texture_cache/texture_cache_base.h index cbc27344b0..3f67828b86 100644 --- a/src/video_core/texture_cache/texture_cache_base.h +++ b/src/video_core/texture_cache/texture_cache_base.h @@ -110,10 +110,17 @@ class TextureCache : public VideoCommon::ChannelSetupCaches::max()}; + #ifdef ANDROID + static constexpr s64 TARGET_THRESHOLD = 3_GiB; + static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; + static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; + static constexpr size_t GC_EMERGENCY_COUNTS = 2; + #else static constexpr s64 TARGET_THRESHOLD = 4_GiB; static constexpr s64 DEFAULT_EXPECTED_MEMORY = 1_GiB + 125_MiB; static constexpr s64 DEFAULT_CRITICAL_MEMORY = 1_GiB + 625_MiB; static constexpr size_t GC_EMERGENCY_COUNTS = 2; + #endif using Runtime = typename P::Runtime; using Image = typename P::Image; @@ -479,7 +486,11 @@ private: }; Common::LeastRecentlyUsedCache lru_cache; + #ifdef ANDROID + static constexpr size_t TICKS_TO_DESTROY = 6; + #else static constexpr size_t TICKS_TO_DESTROY = 8; +#endif DelayedDestructionRing sentenced_images; DelayedDestructionRing sentenced_image_view; DelayedDestructionRing sentenced_framebuffers; diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 4ab420afea..83e4857672 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -322,4 +322,234 @@ namespace Vulkan { return MemoryCommit(allocator, a, info); } + MemoryAllocator* const allocator; ///< Parent memory allocation. + const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. + const u64 allocation_size; ///< Size of this allocation. + const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. + const u32 shifted_memory_type; ///< Shifted Vulkan memory type. + std::vector commits; ///< All commit ranges done from this allocation. + std::span memory_mapped_span; ///< Memory mapped span. Empty if not queried before. +}; + +MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_, + u64 end_) noexcept + : allocation{allocation_}, memory{memory_}, begin{begin_}, end{end_} {} + +MemoryCommit::~MemoryCommit() { + Release(); +} + +MemoryCommit& MemoryCommit::operator=(MemoryCommit&& rhs) noexcept { + Release(); + allocation = std::exchange(rhs.allocation, nullptr); + memory = rhs.memory; + begin = rhs.begin; + end = rhs.end; + span = std::exchange(rhs.span, std::span{}); + return *this; +} + +MemoryCommit::MemoryCommit(MemoryCommit&& rhs) noexcept + : allocation{std::exchange(rhs.allocation, nullptr)}, memory{rhs.memory}, begin{rhs.begin}, + end{rhs.end}, span{std::exchange(rhs.span, std::span{})} {} + +std::span MemoryCommit::Map() { + if (span.empty()) { + span = allocation->Map().subspan(begin, end - begin); + } + return span; +} + +void MemoryCommit::Release() { + if (allocation) { + allocation->Free(begin); + } +} + +MemoryAllocator::MemoryAllocator(const Device& device_) + : device{device_}, allocator{device.GetAllocator()}, + properties{device_.GetPhysical().GetMemoryProperties().memoryProperties}, + buffer_image_granularity{ + device_.GetPhysical().GetProperties().limits.bufferImageGranularity} { + // GPUs not supporting rebar may only have a region with less than 256MB host visible/device + // local memory. In that case, opening 2 RenderDoc captures side-by-side is not possible due to + // the heap running out of memory. With RenderDoc attached and only a small host/device region, + // only allow the stream buffer in this memory heap. + if (device.HasDebuggingToolAttached()) { + using namespace Common::Literals; + ForEachDeviceLocalHostVisibleHeap(device, [this](size_t index, VkMemoryHeap& heap) { + if (heap.size <= 256_MiB) { + valid_memory_types &= ~(1u << index); + } + }); + } +} + +MemoryAllocator::~MemoryAllocator() = default; + +vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT, + .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE, + .requiredFlags = 0, + .preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, + .memoryTypeBits = 0, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, + }; + + VkImage handle{}; + VmaAllocation allocation{}; + + vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr)); + + return vk::Image(handle, ci.usage, *device.GetLogical(), allocator, allocation, + device.GetDispatchLoader()); +} + +vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const { + const VmaAllocationCreateInfo alloc_ci = { + .flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | MemoryUsageVmaFlags(usage), + .usage = MemoryUsageVma(usage), + .requiredFlags = 0, + .preferredFlags = MemoryUsagePreferredVmaFlags(usage), + .memoryTypeBits = usage == MemoryUsage::Stream ? 0u : valid_memory_types, + .pool = VK_NULL_HANDLE, + .pUserData = nullptr, + .priority = 0.f, + }; + + VkBuffer handle{}; + VmaAllocationInfo alloc_info{}; + VmaAllocation allocation{}; + VkMemoryPropertyFlags property_flags{}; + + VkResult result = vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info); + if (result == VK_ERROR_OUT_OF_DEVICE_MEMORY) { + LOG_ERROR(Render_Vulkan, "Out of memory creating buffer (size: {})", ci.size); + } + vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags); + + u8* data = reinterpret_cast(alloc_info.pMappedData); + const std::span mapped_data = data ? std::span{data, ci.size} : std::span{}; + const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + + return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent, + device.GetDispatchLoader()); +} + +MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) { + // Find the fastest memory flags we can afford with the current requirements + const u32 type_mask = requirements.memoryTypeBits; + const VkMemoryPropertyFlags usage_flags = MemoryUsagePropertyFlags(usage); + const VkMemoryPropertyFlags flags = MemoryPropertyFlags(type_mask, usage_flags); + if (std::optional commit = TryCommit(requirements, flags)) { + return std::move(*commit); + } + // Commit has failed, allocate more memory. + const u64 chunk_size = AllocationChunkSize(requirements.size); + if (!TryAllocMemory(flags, type_mask, chunk_size)) { + // TODO(Rodrigo): Handle out of memory situations in some way like flushing to guest memory. + throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY); + } + // Commit again, this time it won't fail since there's a fresh allocation above. + // If it does, there's a bug. + return TryCommit(requirements, flags).value(); + } + +bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) { + const auto type_opt = FindType(flags, type_mask); + if (!type_opt) { + return false; + } + + // Adreno stands firm + const u64 aligned_size = (device.GetDriverID() == VK_DRIVER_ID_QUALCOMM_PROPRIETARY) ? + Common::AlignUp(size, 4096) : + size; + + vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({ + .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + .pNext = nullptr, + .allocationSize = aligned_size, + .memoryTypeIndex = *type_opt, + }); + + if (!memory) { + return false; + } + + allocations.push_back( + std::make_unique(this, std::move(memory), flags, aligned_size, *type_opt)); + return true; +} + +void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) { + const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr::get); + ASSERT(it != allocations.end()); + allocations.erase(it); +} + +std::optional MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, + VkMemoryPropertyFlags flags) { + // Conservative, spec-compliant alignment for suballocation + VkDeviceSize eff_align = requirements.alignment; + const auto& limits = device.GetPhysical().GetProperties().limits; + if ((flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) && + !(flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)) { + // Non-coherent memory must be invalidated on atom boundary + if (limits.nonCoherentAtomSize > eff_align) eff_align = limits.nonCoherentAtomSize; + } + // Separate buffers to avoid stalls on tilers + if (buffer_image_granularity > eff_align) { + eff_align = buffer_image_granularity; + } + eff_align = std::bit_ceil(eff_align); + + for (auto& allocation : allocations) { + if (!allocation->IsCompatible(flags, requirements.memoryTypeBits)) { + continue; + } + if (auto commit = allocation->Commit(requirements.size, eff_align)) { + return commit; + } + } + if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + // Look for non device local commits on failure + return TryCommit(requirements, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + return std::nullopt; +} + +VkMemoryPropertyFlags MemoryAllocator::MemoryPropertyFlags(u32 type_mask, + VkMemoryPropertyFlags flags) const { + if (FindType(flags, type_mask)) { + // Found a memory type with those requirements + return flags; + } + if ((flags & VK_MEMORY_PROPERTY_HOST_CACHED_BIT) != 0) { + // Remove host cached bit in case it's not supported + return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_HOST_CACHED_BIT); + } + if ((flags & VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + // Remove device local, if it's not supported by the requested resource + return MemoryPropertyFlags(type_mask, flags & ~VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT); + } + ASSERT_MSG(false, "No compatible memory types found"); + return 0; +} + +std::optional MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 type_mask) const { + for (u32 type_index = 0; type_index < properties.memoryTypeCount; ++type_index) { + const VkMemoryPropertyFlags type_flags = properties.memoryTypes[type_index].propertyFlags; + if ((type_mask & (1U << type_index)) != 0 && (type_flags & flags) == flags) { + // The type matches in type and in the wanted properties. + return type_index; + } + } + // Failed to find index + return std::nullopt; +} + } // namespace Vulkan