diff --git a/src/core/arm/nce/patcher.h b/src/core/arm/nce/patcher.h index 53a923138c..7f54608e3f 100644 --- a/src/core/arm/nce/patcher.h +++ b/src/core/arm/nce/patcher.h @@ -16,6 +16,24 @@ #include "core/hle/kernel/physical_memory.h" #include "lru_cache.h" #include +using ModuleID = std::array; // NSO build ID +struct PatchCacheKey { + ModuleID module_id; + uintptr_t offset; + bool operator==(const PatchCacheKey&) const = default; +}; + +template <> +struct std::hash { + size_t operator()(const PatchCacheKey& key) const { + // Simple XOR hash of first few bytes + size_t hash = 0; + for (size_t i = 0; i < key.module_id.size(); ++i) { + hash ^= static_cast(key.module_id[i]) << ((i % sizeof(size_t)) * 8); + } + return hash ^ std::hash{}(key.offset); + } +}; namespace Core::NCE { @@ -31,13 +49,15 @@ using EntryTrampolines = std::unordered_map class Patcher { public: + void SetModuleID(const ModuleID& id) { + module_id = id; + } Patcher(const Patcher&) = delete; Patcher& operator=(const Patcher&) = delete; Patcher(Patcher&& other) noexcept; Patcher& operator=(Patcher&&) noexcept = delete; explicit Patcher(); ~Patcher(); - bool PatchText(const Kernel::PhysicalMemory& program_image, const Kernel::CodeSet::Segment& code); bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code, @@ -50,7 +70,7 @@ public: private: using ModuleDestLabel = uintptr_t; - + ModuleID module_id{}; struct Trampoline { ptrdiff_t patch_offset; uintptr_t module_offset; @@ -68,26 +88,25 @@ private: private: static constexpr size_t CACHE_SIZE = 16384; // Cache size for patch entries - LRUCache patch_cache{CACHE_SIZE, Settings::values.lru_cache_enabled.GetValue()}; + LRUCache patch_cache{CACHE_SIZE, Settings::values.lru_cache_enabled.GetValue()}; void BranchToPatch(uintptr_t module_dest) { if (patch_cache.isEnabled()) { - LOG_DEBUG(Core_ARM, "LRU cache lookup for address {:#x}", module_dest); + PatchCacheKey key{module_id, module_dest}; + LOG_DEBUG(Core_ARM, "LRU cache lookup for module={}, offset={:#x}", fmt::ptr(module_id.data()), module_dest); // Try to get existing patch entry from cache - if (auto* cached_patch = patch_cache.get(module_dest)) { - LOG_WARNING(Core_ARM, "LRU cache hit for address {:#x}", module_dest); + if (auto* cached_patch = patch_cache.get(key)) { + LOG_WARNING(Core_ARM, "LRU cache hit for module offset {:#x}", module_dest); curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), *cached_patch}); return; } - LOG_DEBUG(Core_ARM, "LRU cache miss for address {:#x}, creating new patch", module_dest); - - // If not in cache, create new entry and cache it + LOG_DEBUG(Core_ARM, "LRU cache miss for module offset {:#x}, creating new patch", module_dest); + // Not in cache: create and store const auto patch_addr = c.offset(); curr_patch->m_branch_to_patch_relocations.push_back({patch_addr, module_dest}); - patch_cache.put(module_dest, patch_addr); + patch_cache.put(key, patch_addr); } else { - LOG_DEBUG(Core_ARM, "LRU cache disabled - creating direct patch for address {:#x}", module_dest); - // LRU disabled - use pre-LRU approach + LOG_DEBUG(Core_ARM, "LRU cache disabled - direct patch for offset {:#x}", module_dest); curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), module_dest}); } } diff --git a/src/core/loader/nso.cpp b/src/core/loader/nso.cpp index 583b7e9270..92370f115b 100644 --- a/src/core/loader/nso.cpp +++ b/src/core/loader/nso.cpp @@ -166,6 +166,8 @@ std::optional AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core:: const auto& code = codeset.CodeSegment(); auto* patch = patches ? &patches->operator[](patch_index) : nullptr; if (patch && !load_into_process) { + //Set module ID using build_id from the NSO header + patch->SetModuleID(nso_header.build_id); // Patch SVCs and MRS calls in the guest code while (!patch->PatchText(program_image, code)) { patch = &patches->emplace_back(); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 9259639107..963e7b1c40 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -507,58 +507,84 @@ TransformBufferCopies(std::span copies, size_t bu return value; } } +struct RangedBarrierRange { + u32 min_mip = std::numeric_limits::max(); + u32 max_mip = std::numeric_limits::min(); + u32 min_layer = std::numeric_limits::max(); + u32 max_layer = std::numeric_limits::min(); + void AddLayers(const VkImageSubresourceLayers& layers) { + min_mip = std::min(min_mip, layers.mipLevel); + max_mip = std::max(max_mip, layers.mipLevel + 1); + min_layer = std::min(min_layer, layers.baseArrayLayer); + max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount); + } + + VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { + return VkImageSubresourceRange{ + .aspectMask = aspect_mask, + .baseMipLevel = min_mip, + .levelCount = max_mip - min_mip, + .baseArrayLayer = min_layer, + .layerCount = max_layer - min_layer, + }; + } +}; void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image, VkImageAspectFlags aspect_mask, bool is_initialized, std::span copies) { static constexpr VkAccessFlags WRITE_ACCESS_FLAGS = - VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | - VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | + VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT; + + // Compute exact mip/layer range being written to + RangedBarrierRange range; + for (const auto& region : copies) { + range.AddLayers(region.imageSubresource); + } + const VkImageSubresourceRange subresource_range = range.SubresourceRange(aspect_mask); + const VkImageMemoryBarrier read_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = WRITE_ACCESS_FLAGS, - .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, - .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = WRITE_ACCESS_FLAGS, + .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED, + .newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = subresource_range, }; + const VkImageMemoryBarrier write_barrier{ - .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, - .pNext = nullptr, - .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, - .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS, - .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - .newLayout = VK_IMAGE_LAYOUT_GENERAL, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = image, - .subresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = 0, - .levelCount = VK_REMAINING_MIP_LEVELS, - .baseArrayLayer = 0, - .layerCount = VK_REMAINING_ARRAY_LAYERS, - }, + .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + .pNext = nullptr, + .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT, + .dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS, + .oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + .newLayout = VK_IMAGE_LAYOUT_GENERAL, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = image, + .subresourceRange = subresource_range, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, + + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0, read_barrier); cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies); // TODO: Move this to another API - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, - write_barrier); + cmdbuf.PipelineBarrier( + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, + 0, nullptr, nullptr, write_barrier); } [[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region, @@ -651,29 +677,7 @@ void TryTransformSwizzleIfNeeded(PixelFormat format, std::array::max(); - u32 max_mip = std::numeric_limits::min(); - u32 min_layer = std::numeric_limits::max(); - u32 max_layer = std::numeric_limits::min(); - void AddLayers(const VkImageSubresourceLayers& layers) { - min_mip = std::min(min_mip, layers.mipLevel); - max_mip = std::max(max_mip, layers.mipLevel + 1); - min_layer = std::min(min_layer, layers.baseArrayLayer); - max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount); - } - - VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept { - return VkImageSubresourceRange{ - .aspectMask = aspect_mask, - .baseMipLevel = min_mip, - .levelCount = max_mip - min_mip, - .baseArrayLayer = min_layer, - .layerCount = max_layer - min_layer, - }; - } -}; [[nodiscard]] VkFormat Format(Shader::ImageFormat format) { switch (format) { @@ -1457,12 +1461,18 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, .subresourceRange = dst_range.SubresourceRange(aspect_mask), }, }; - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, - 0, {}, {}, pre_barriers); + cmdbuf.PipelineBarrier( + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, pre_barriers); cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies); - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, - 0, {}, {}, post_barriers); + cmdbuf.PipelineBarrier( + VK_PIPELINE_STAGE_TRANSFER_BIT, + VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | + VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT, + 0, nullptr, nullptr, post_barriers); }); } @@ -1546,7 +1556,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset, } // Handle MSAA upload if necessary - /* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly + /* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/ if (info.num_samples > 1 && runtime->CanUploadMSAA()) { // Only use MSAA copy pass for color formats @@ -2352,7 +2362,7 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) { }; scheduler.RequestOutsideRenderPassOperationContext(); scheduler.Record([barrier](vk::CommandBuffer cmdbuf) { - cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier); }); }