forked from eden-emu/eden
Compare commits
6 commits
Author | SHA1 | Date | |
---|---|---|---|
f14db9b220 | |||
a2f410f9da | |||
d81deeb775 | |||
8c6018e378 | |||
a4c4f37d72 | |||
fdc9c7b811 |
3 changed files with 108 additions and 77 deletions
|
@ -16,6 +16,24 @@
|
|||
#include "core/hle/kernel/physical_memory.h"
|
||||
#include "lru_cache.h"
|
||||
#include <utility>
|
||||
using ModuleID = std::array<u8, 32>; // NSO build ID
|
||||
struct PatchCacheKey {
|
||||
ModuleID module_id;
|
||||
uintptr_t offset;
|
||||
bool operator==(const PatchCacheKey&) const = default;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct std::hash<PatchCacheKey> {
|
||||
size_t operator()(const PatchCacheKey& key) const {
|
||||
// Simple XOR hash of first few bytes
|
||||
size_t hash = 0;
|
||||
for (size_t i = 0; i < key.module_id.size(); ++i) {
|
||||
hash ^= static_cast<size_t>(key.module_id[i]) << ((i % sizeof(size_t)) * 8);
|
||||
}
|
||||
return hash ^ std::hash<uintptr_t>{}(key.offset);
|
||||
}
|
||||
};
|
||||
|
||||
namespace Core::NCE {
|
||||
|
||||
|
@ -31,13 +49,15 @@ using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>
|
|||
|
||||
class Patcher {
|
||||
public:
|
||||
void SetModuleID(const ModuleID& id) {
|
||||
module_id = id;
|
||||
}
|
||||
Patcher(const Patcher&) = delete;
|
||||
Patcher& operator=(const Patcher&) = delete;
|
||||
Patcher(Patcher&& other) noexcept;
|
||||
Patcher& operator=(Patcher&&) noexcept = delete;
|
||||
explicit Patcher();
|
||||
~Patcher();
|
||||
|
||||
bool PatchText(const Kernel::PhysicalMemory& program_image,
|
||||
const Kernel::CodeSet::Segment& code);
|
||||
bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
|
||||
|
@ -50,7 +70,7 @@ public:
|
|||
|
||||
private:
|
||||
using ModuleDestLabel = uintptr_t;
|
||||
|
||||
ModuleID module_id{};
|
||||
struct Trampoline {
|
||||
ptrdiff_t patch_offset;
|
||||
uintptr_t module_offset;
|
||||
|
@ -68,26 +88,25 @@ private:
|
|||
|
||||
private:
|
||||
static constexpr size_t CACHE_SIZE = 16384; // Cache size for patch entries
|
||||
LRUCache<uintptr_t, PatchTextAddress> patch_cache{CACHE_SIZE, Settings::values.lru_cache_enabled.GetValue()};
|
||||
LRUCache<PatchCacheKey, PatchTextAddress> patch_cache{CACHE_SIZE, Settings::values.lru_cache_enabled.GetValue()};
|
||||
|
||||
void BranchToPatch(uintptr_t module_dest) {
|
||||
if (patch_cache.isEnabled()) {
|
||||
LOG_DEBUG(Core_ARM, "LRU cache lookup for address {:#x}", module_dest);
|
||||
PatchCacheKey key{module_id, module_dest};
|
||||
LOG_DEBUG(Core_ARM, "LRU cache lookup for module={}, offset={:#x}", fmt::ptr(module_id.data()), module_dest);
|
||||
// Try to get existing patch entry from cache
|
||||
if (auto* cached_patch = patch_cache.get(module_dest)) {
|
||||
LOG_WARNING(Core_ARM, "LRU cache hit for address {:#x}", module_dest);
|
||||
if (auto* cached_patch = patch_cache.get(key)) {
|
||||
LOG_WARNING(Core_ARM, "LRU cache hit for module offset {:#x}", module_dest);
|
||||
curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), *cached_patch});
|
||||
return;
|
||||
}
|
||||
LOG_DEBUG(Core_ARM, "LRU cache miss for address {:#x}, creating new patch", module_dest);
|
||||
|
||||
// If not in cache, create new entry and cache it
|
||||
LOG_DEBUG(Core_ARM, "LRU cache miss for module offset {:#x}, creating new patch", module_dest);
|
||||
// Not in cache: create and store
|
||||
const auto patch_addr = c.offset();
|
||||
curr_patch->m_branch_to_patch_relocations.push_back({patch_addr, module_dest});
|
||||
patch_cache.put(module_dest, patch_addr);
|
||||
patch_cache.put(key, patch_addr);
|
||||
} else {
|
||||
LOG_DEBUG(Core_ARM, "LRU cache disabled - creating direct patch for address {:#x}", module_dest);
|
||||
// LRU disabled - use pre-LRU approach
|
||||
LOG_DEBUG(Core_ARM, "LRU cache disabled - direct patch for offset {:#x}", module_dest);
|
||||
curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
|
||||
}
|
||||
}
|
||||
|
|
|
@ -166,6 +166,8 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
|
|||
const auto& code = codeset.CodeSegment();
|
||||
auto* patch = patches ? &patches->operator[](patch_index) : nullptr;
|
||||
if (patch && !load_into_process) {
|
||||
//Set module ID using build_id from the NSO header
|
||||
patch->SetModuleID(nso_header.build_id);
|
||||
// Patch SVCs and MRS calls in the guest code
|
||||
while (!patch->PatchText(program_image, code)) {
|
||||
patch = &patches->emplace_back();
|
||||
|
|
|
@ -507,58 +507,84 @@ TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t bu
|
|||
return value;
|
||||
}
|
||||
}
|
||||
struct RangedBarrierRange {
|
||||
u32 min_mip = std::numeric_limits<u32>::max();
|
||||
u32 max_mip = std::numeric_limits<u32>::min();
|
||||
u32 min_layer = std::numeric_limits<u32>::max();
|
||||
u32 max_layer = std::numeric_limits<u32>::min();
|
||||
|
||||
void AddLayers(const VkImageSubresourceLayers& layers) {
|
||||
min_mip = std::min(min_mip, layers.mipLevel);
|
||||
max_mip = std::max(max_mip, layers.mipLevel + 1);
|
||||
min_layer = std::min(min_layer, layers.baseArrayLayer);
|
||||
max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
|
||||
}
|
||||
|
||||
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
|
||||
return VkImageSubresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = min_mip,
|
||||
.levelCount = max_mip - min_mip,
|
||||
.baseArrayLayer = min_layer,
|
||||
.layerCount = max_layer - min_layer,
|
||||
};
|
||||
}
|
||||
};
|
||||
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
|
||||
VkImageAspectFlags aspect_mask, bool is_initialized,
|
||||
std::span<const VkBufferImageCopy> copies) {
|
||||
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
|
||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
||||
|
||||
// Compute exact mip/layer range being written to
|
||||
RangedBarrierRange range;
|
||||
for (const auto& region : copies) {
|
||||
range.AddLayers(region.imageSubresource);
|
||||
}
|
||||
const VkImageSubresourceRange subresource_range = range.SubresourceRange(aspect_mask);
|
||||
|
||||
const VkImageMemoryBarrier read_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = WRITE_ACCESS_FLAGS,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = WRITE_ACCESS_FLAGS,
|
||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange = subresource_range,
|
||||
};
|
||||
|
||||
const VkImageMemoryBarrier write_barrier{
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = 0,
|
||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
||||
.baseArrayLayer = 0,
|
||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
||||
},
|
||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||
.pNext = nullptr,
|
||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
|
||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.image = image,
|
||||
.subresourceRange = subresource_range,
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||
read_barrier);
|
||||
cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
|
||||
// TODO: Move this to another API
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
|
||||
write_barrier);
|
||||
cmdbuf.PipelineBarrier(
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
0, nullptr, nullptr, write_barrier);
|
||||
}
|
||||
|
||||
[[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region,
|
||||
|
@ -651,29 +677,7 @@ void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4
|
|||
}
|
||||
}
|
||||
|
||||
struct RangedBarrierRange {
|
||||
u32 min_mip = std::numeric_limits<u32>::max();
|
||||
u32 max_mip = std::numeric_limits<u32>::min();
|
||||
u32 min_layer = std::numeric_limits<u32>::max();
|
||||
u32 max_layer = std::numeric_limits<u32>::min();
|
||||
|
||||
void AddLayers(const VkImageSubresourceLayers& layers) {
|
||||
min_mip = std::min(min_mip, layers.mipLevel);
|
||||
max_mip = std::max(max_mip, layers.mipLevel + 1);
|
||||
min_layer = std::min(min_layer, layers.baseArrayLayer);
|
||||
max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
|
||||
}
|
||||
|
||||
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
|
||||
return VkImageSubresourceRange{
|
||||
.aspectMask = aspect_mask,
|
||||
.baseMipLevel = min_mip,
|
||||
.levelCount = max_mip - min_mip,
|
||||
.baseArrayLayer = min_layer,
|
||||
.layerCount = max_layer - min_layer,
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
|
||||
switch (format) {
|
||||
|
@ -1457,12 +1461,18 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
|||
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
|
||||
},
|
||||
};
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, {}, {}, pre_barriers);
|
||||
cmdbuf.PipelineBarrier(
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, nullptr, nullptr, pre_barriers);
|
||||
cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
|
||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
0, {}, {}, post_barriers);
|
||||
cmdbuf.PipelineBarrier(
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
0, nullptr, nullptr, post_barriers);
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1546,7 +1556,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
|||
}
|
||||
|
||||
// Handle MSAA upload if necessary
|
||||
/* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly
|
||||
/* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly
|
||||
since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/
|
||||
if (info.num_samples > 1 && runtime->CanUploadMSAA()) {
|
||||
// Only use MSAA copy pass for color formats
|
||||
|
@ -2352,7 +2362,7 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) {
|
|||
};
|
||||
scheduler.RequestOutsideRenderPassOperationContext();
|
||||
scheduler.Record([barrier](vk::CommandBuffer cmdbuf) {
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
|
||||
});
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue