Compare commits

...
Sign in to create a new pull request.

6 commits

Author SHA1 Message Date
f14db9b220 revert 7b9798f842
revert Revert "[shader_recompiler/Maxwell] Unstub ISBERD completely (#160)"

This reverts commit 3f12ae1e6e.

Found that this was not the cause of the performance issue on this PR. Reverting so it can be tested once fix for the slowdown can be merged and this PR rebased.
2025-08-03 04:33:24 +02:00
a2f410f9da revert c267178e84
revert Revert "[vk] Remove improper check for image depth in texture cache. (#164)"

This reverts commit 6fd10fd85e.

Confirmed that this was not the cause of the slowdown issue so can be reverted for proper testing with other changes after a rebase.
2025-08-03 04:33:24 +02:00
d81deeb775 Revert "[shader_recompiler/Maxwell] Unstub ISBERD completely (#160)"
This reverts commit 3f12ae1e6e.
2025-08-03 04:33:24 +02:00
8c6018e378 Revert "[vk] Remove improper check for image depth in texture cache. (#164)"
This reverts commit 6fd10fd85e.
2025-08-03 04:33:24 +02:00
a4c4f37d72 add ranged barrier range, by wildcard 2025-08-03 04:33:24 +02:00
fdc9c7b811 lru fix wildcard 2025-08-03 04:33:24 +02:00
3 changed files with 108 additions and 77 deletions

View file

@ -16,6 +16,24 @@
#include "core/hle/kernel/physical_memory.h"
#include "lru_cache.h"
#include <utility>
using ModuleID = std::array<u8, 32>; // NSO build ID
struct PatchCacheKey {
ModuleID module_id;
uintptr_t offset;
bool operator==(const PatchCacheKey&) const = default;
};
template <>
struct std::hash<PatchCacheKey> {
size_t operator()(const PatchCacheKey& key) const {
// Simple XOR hash of first few bytes
size_t hash = 0;
for (size_t i = 0; i < key.module_id.size(); ++i) {
hash ^= static_cast<size_t>(key.module_id[i]) << ((i % sizeof(size_t)) * 8);
}
return hash ^ std::hash<uintptr_t>{}(key.offset);
}
};
namespace Core::NCE {
@ -31,13 +49,15 @@ using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>
class Patcher {
public:
void SetModuleID(const ModuleID& id) {
module_id = id;
}
Patcher(const Patcher&) = delete;
Patcher& operator=(const Patcher&) = delete;
Patcher(Patcher&& other) noexcept;
Patcher& operator=(Patcher&&) noexcept = delete;
explicit Patcher();
~Patcher();
bool PatchText(const Kernel::PhysicalMemory& program_image,
const Kernel::CodeSet::Segment& code);
bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
@ -50,7 +70,7 @@ public:
private:
using ModuleDestLabel = uintptr_t;
ModuleID module_id{};
struct Trampoline {
ptrdiff_t patch_offset;
uintptr_t module_offset;
@ -68,26 +88,25 @@ private:
private:
static constexpr size_t CACHE_SIZE = 16384; // Cache size for patch entries
LRUCache<uintptr_t, PatchTextAddress> patch_cache{CACHE_SIZE, Settings::values.lru_cache_enabled.GetValue()};
LRUCache<PatchCacheKey, PatchTextAddress> patch_cache{CACHE_SIZE, Settings::values.lru_cache_enabled.GetValue()};
void BranchToPatch(uintptr_t module_dest) {
if (patch_cache.isEnabled()) {
LOG_DEBUG(Core_ARM, "LRU cache lookup for address {:#x}", module_dest);
PatchCacheKey key{module_id, module_dest};
LOG_DEBUG(Core_ARM, "LRU cache lookup for module={}, offset={:#x}", fmt::ptr(module_id.data()), module_dest);
// Try to get existing patch entry from cache
if (auto* cached_patch = patch_cache.get(module_dest)) {
LOG_WARNING(Core_ARM, "LRU cache hit for address {:#x}", module_dest);
if (auto* cached_patch = patch_cache.get(key)) {
LOG_WARNING(Core_ARM, "LRU cache hit for module offset {:#x}", module_dest);
curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), *cached_patch});
return;
}
LOG_DEBUG(Core_ARM, "LRU cache miss for address {:#x}, creating new patch", module_dest);
// If not in cache, create new entry and cache it
LOG_DEBUG(Core_ARM, "LRU cache miss for module offset {:#x}, creating new patch", module_dest);
// Not in cache: create and store
const auto patch_addr = c.offset();
curr_patch->m_branch_to_patch_relocations.push_back({patch_addr, module_dest});
patch_cache.put(module_dest, patch_addr);
patch_cache.put(key, patch_addr);
} else {
LOG_DEBUG(Core_ARM, "LRU cache disabled - creating direct patch for address {:#x}", module_dest);
// LRU disabled - use pre-LRU approach
LOG_DEBUG(Core_ARM, "LRU cache disabled - direct patch for offset {:#x}", module_dest);
curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
}
}

View file

@ -166,6 +166,8 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
const auto& code = codeset.CodeSegment();
auto* patch = patches ? &patches->operator[](patch_index) : nullptr;
if (patch && !load_into_process) {
//Set module ID using build_id from the NSO header
patch->SetModuleID(nso_header.build_id);
// Patch SVCs and MRS calls in the guest code
while (!patch->PatchText(program_image, code)) {
patch = &patches->emplace_back();

View file

@ -507,7 +507,29 @@ TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t bu
return value;
}
}
struct RangedBarrierRange {
u32 min_mip = std::numeric_limits<u32>::max();
u32 max_mip = std::numeric_limits<u32>::min();
u32 min_layer = std::numeric_limits<u32>::max();
u32 max_layer = std::numeric_limits<u32>::min();
void AddLayers(const VkImageSubresourceLayers& layers) {
min_mip = std::min(min_mip, layers.mipLevel);
max_mip = std::max(max_mip, layers.mipLevel + 1);
min_layer = std::min(min_layer, layers.baseArrayLayer);
max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
}
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
return VkImageSubresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = min_mip,
.levelCount = max_mip - min_mip,
.baseArrayLayer = min_layer,
.layerCount = max_layer - min_layer,
};
}
};
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
VkImageAspectFlags aspect_mask, bool is_initialized,
std::span<const VkBufferImageCopy> copies) {
@ -517,6 +539,14 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
// Compute exact mip/layer range being written to
RangedBarrierRange range;
for (const auto& region : copies) {
range.AddLayers(region.imageSubresource);
}
const VkImageSubresourceRange subresource_range = range.SubresourceRange(aspect_mask);
const VkImageMemoryBarrier read_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@ -527,14 +557,9 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
.subresourceRange = subresource_range,
};
const VkImageMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
.pNext = nullptr,
@ -545,20 +570,21 @@ void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage im
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.image = image,
.subresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = 0,
.levelCount = VK_REMAINING_MIP_LEVELS,
.baseArrayLayer = 0,
.layerCount = VK_REMAINING_ARRAY_LAYERS,
},
.subresourceRange = subresource_range,
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
read_barrier);
cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
// TODO: Move this to another API
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
write_barrier);
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
0, nullptr, nullptr, write_barrier);
}
[[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region,
@ -651,29 +677,7 @@ void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4
}
}
struct RangedBarrierRange {
u32 min_mip = std::numeric_limits<u32>::max();
u32 max_mip = std::numeric_limits<u32>::min();
u32 min_layer = std::numeric_limits<u32>::max();
u32 max_layer = std::numeric_limits<u32>::min();
void AddLayers(const VkImageSubresourceLayers& layers) {
min_mip = std::min(min_mip, layers.mipLevel);
max_mip = std::max(max_mip, layers.mipLevel + 1);
min_layer = std::min(min_layer, layers.baseArrayLayer);
max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
}
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
return VkImageSubresourceRange{
.aspectMask = aspect_mask,
.baseMipLevel = min_mip,
.levelCount = max_mip - min_mip,
.baseArrayLayer = min_layer,
.layerCount = max_layer - min_layer,
};
}
};
[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
switch (format) {
@ -1457,12 +1461,18 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
},
};
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
0, {}, {}, pre_barriers);
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_TRANSFER_BIT,
0, nullptr, nullptr, pre_barriers);
cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
0, {}, {}, post_barriers);
cmdbuf.PipelineBarrier(
VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
0, nullptr, nullptr, post_barriers);
});
}
@ -2352,7 +2362,7 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) {
};
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([barrier](vk::CommandBuffer cmdbuf) {
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
});
}