forked from eden-emu/eden
Compare commits
6 commits
Author | SHA1 | Date | |
---|---|---|---|
f14db9b220 | |||
a2f410f9da | |||
d81deeb775 | |||
8c6018e378 | |||
a4c4f37d72 | |||
fdc9c7b811 |
3 changed files with 108 additions and 77 deletions
|
@ -16,6 +16,24 @@
|
||||||
#include "core/hle/kernel/physical_memory.h"
|
#include "core/hle/kernel/physical_memory.h"
|
||||||
#include "lru_cache.h"
|
#include "lru_cache.h"
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
using ModuleID = std::array<u8, 32>; // NSO build ID
|
||||||
|
struct PatchCacheKey {
|
||||||
|
ModuleID module_id;
|
||||||
|
uintptr_t offset;
|
||||||
|
bool operator==(const PatchCacheKey&) const = default;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct std::hash<PatchCacheKey> {
|
||||||
|
size_t operator()(const PatchCacheKey& key) const {
|
||||||
|
// Simple XOR hash of first few bytes
|
||||||
|
size_t hash = 0;
|
||||||
|
for (size_t i = 0; i < key.module_id.size(); ++i) {
|
||||||
|
hash ^= static_cast<size_t>(key.module_id[i]) << ((i % sizeof(size_t)) * 8);
|
||||||
|
}
|
||||||
|
return hash ^ std::hash<uintptr_t>{}(key.offset);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
namespace Core::NCE {
|
namespace Core::NCE {
|
||||||
|
|
||||||
|
@ -31,13 +49,15 @@ using EntryTrampolines = std::unordered_map<ModuleTextAddress, PatchTextAddress>
|
||||||
|
|
||||||
class Patcher {
|
class Patcher {
|
||||||
public:
|
public:
|
||||||
|
void SetModuleID(const ModuleID& id) {
|
||||||
|
module_id = id;
|
||||||
|
}
|
||||||
Patcher(const Patcher&) = delete;
|
Patcher(const Patcher&) = delete;
|
||||||
Patcher& operator=(const Patcher&) = delete;
|
Patcher& operator=(const Patcher&) = delete;
|
||||||
Patcher(Patcher&& other) noexcept;
|
Patcher(Patcher&& other) noexcept;
|
||||||
Patcher& operator=(Patcher&&) noexcept = delete;
|
Patcher& operator=(Patcher&&) noexcept = delete;
|
||||||
explicit Patcher();
|
explicit Patcher();
|
||||||
~Patcher();
|
~Patcher();
|
||||||
|
|
||||||
bool PatchText(const Kernel::PhysicalMemory& program_image,
|
bool PatchText(const Kernel::PhysicalMemory& program_image,
|
||||||
const Kernel::CodeSet::Segment& code);
|
const Kernel::CodeSet::Segment& code);
|
||||||
bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
|
bool RelocateAndCopy(Common::ProcessAddress load_base, const Kernel::CodeSet::Segment& code,
|
||||||
|
@ -50,7 +70,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
using ModuleDestLabel = uintptr_t;
|
using ModuleDestLabel = uintptr_t;
|
||||||
|
ModuleID module_id{};
|
||||||
struct Trampoline {
|
struct Trampoline {
|
||||||
ptrdiff_t patch_offset;
|
ptrdiff_t patch_offset;
|
||||||
uintptr_t module_offset;
|
uintptr_t module_offset;
|
||||||
|
@ -68,26 +88,25 @@ private:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static constexpr size_t CACHE_SIZE = 16384; // Cache size for patch entries
|
static constexpr size_t CACHE_SIZE = 16384; // Cache size for patch entries
|
||||||
LRUCache<uintptr_t, PatchTextAddress> patch_cache{CACHE_SIZE, Settings::values.lru_cache_enabled.GetValue()};
|
LRUCache<PatchCacheKey, PatchTextAddress> patch_cache{CACHE_SIZE, Settings::values.lru_cache_enabled.GetValue()};
|
||||||
|
|
||||||
void BranchToPatch(uintptr_t module_dest) {
|
void BranchToPatch(uintptr_t module_dest) {
|
||||||
if (patch_cache.isEnabled()) {
|
if (patch_cache.isEnabled()) {
|
||||||
LOG_DEBUG(Core_ARM, "LRU cache lookup for address {:#x}", module_dest);
|
PatchCacheKey key{module_id, module_dest};
|
||||||
|
LOG_DEBUG(Core_ARM, "LRU cache lookup for module={}, offset={:#x}", fmt::ptr(module_id.data()), module_dest);
|
||||||
// Try to get existing patch entry from cache
|
// Try to get existing patch entry from cache
|
||||||
if (auto* cached_patch = patch_cache.get(module_dest)) {
|
if (auto* cached_patch = patch_cache.get(key)) {
|
||||||
LOG_WARNING(Core_ARM, "LRU cache hit for address {:#x}", module_dest);
|
LOG_WARNING(Core_ARM, "LRU cache hit for module offset {:#x}", module_dest);
|
||||||
curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), *cached_patch});
|
curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), *cached_patch});
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
LOG_DEBUG(Core_ARM, "LRU cache miss for address {:#x}, creating new patch", module_dest);
|
LOG_DEBUG(Core_ARM, "LRU cache miss for module offset {:#x}, creating new patch", module_dest);
|
||||||
|
// Not in cache: create and store
|
||||||
// If not in cache, create new entry and cache it
|
|
||||||
const auto patch_addr = c.offset();
|
const auto patch_addr = c.offset();
|
||||||
curr_patch->m_branch_to_patch_relocations.push_back({patch_addr, module_dest});
|
curr_patch->m_branch_to_patch_relocations.push_back({patch_addr, module_dest});
|
||||||
patch_cache.put(module_dest, patch_addr);
|
patch_cache.put(key, patch_addr);
|
||||||
} else {
|
} else {
|
||||||
LOG_DEBUG(Core_ARM, "LRU cache disabled - creating direct patch for address {:#x}", module_dest);
|
LOG_DEBUG(Core_ARM, "LRU cache disabled - direct patch for offset {:#x}", module_dest);
|
||||||
// LRU disabled - use pre-LRU approach
|
|
||||||
curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
|
curr_patch->m_branch_to_patch_relocations.push_back({c.offset(), module_dest});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,6 +166,8 @@ std::optional<VAddr> AppLoader_NSO::LoadModule(Kernel::KProcess& process, Core::
|
||||||
const auto& code = codeset.CodeSegment();
|
const auto& code = codeset.CodeSegment();
|
||||||
auto* patch = patches ? &patches->operator[](patch_index) : nullptr;
|
auto* patch = patches ? &patches->operator[](patch_index) : nullptr;
|
||||||
if (patch && !load_into_process) {
|
if (patch && !load_into_process) {
|
||||||
|
//Set module ID using build_id from the NSO header
|
||||||
|
patch->SetModuleID(nso_header.build_id);
|
||||||
// Patch SVCs and MRS calls in the guest code
|
// Patch SVCs and MRS calls in the guest code
|
||||||
while (!patch->PatchText(program_image, code)) {
|
while (!patch->PatchText(program_image, code)) {
|
||||||
patch = &patches->emplace_back();
|
patch = &patches->emplace_back();
|
||||||
|
|
|
@ -507,58 +507,84 @@ TransformBufferCopies(std::span<const VideoCommon::BufferCopy> copies, size_t bu
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
struct RangedBarrierRange {
|
||||||
|
u32 min_mip = std::numeric_limits<u32>::max();
|
||||||
|
u32 max_mip = std::numeric_limits<u32>::min();
|
||||||
|
u32 min_layer = std::numeric_limits<u32>::max();
|
||||||
|
u32 max_layer = std::numeric_limits<u32>::min();
|
||||||
|
|
||||||
|
void AddLayers(const VkImageSubresourceLayers& layers) {
|
||||||
|
min_mip = std::min(min_mip, layers.mipLevel);
|
||||||
|
max_mip = std::max(max_mip, layers.mipLevel + 1);
|
||||||
|
min_layer = std::min(min_layer, layers.baseArrayLayer);
|
||||||
|
max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
|
||||||
|
}
|
||||||
|
|
||||||
|
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
|
||||||
|
return VkImageSubresourceRange{
|
||||||
|
.aspectMask = aspect_mask,
|
||||||
|
.baseMipLevel = min_mip,
|
||||||
|
.levelCount = max_mip - min_mip,
|
||||||
|
.baseArrayLayer = min_layer,
|
||||||
|
.layerCount = max_layer - min_layer,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
};
|
||||||
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
|
void CopyBufferToImage(vk::CommandBuffer cmdbuf, VkBuffer src_buffer, VkImage image,
|
||||||
VkImageAspectFlags aspect_mask, bool is_initialized,
|
VkImageAspectFlags aspect_mask, bool is_initialized,
|
||||||
std::span<const VkBufferImageCopy> copies) {
|
std::span<const VkBufferImageCopy> copies) {
|
||||||
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
|
static constexpr VkAccessFlags WRITE_ACCESS_FLAGS =
|
||||||
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
VK_ACCESS_SHADER_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
|
||||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||||
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
|
static constexpr VkAccessFlags READ_ACCESS_FLAGS = VK_ACCESS_SHADER_READ_BIT |
|
||||||
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
|
||||||
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT;
|
||||||
|
|
||||||
|
// Compute exact mip/layer range being written to
|
||||||
|
RangedBarrierRange range;
|
||||||
|
for (const auto& region : copies) {
|
||||||
|
range.AddLayers(region.imageSubresource);
|
||||||
|
}
|
||||||
|
const VkImageSubresourceRange subresource_range = range.SubresourceRange(aspect_mask);
|
||||||
|
|
||||||
const VkImageMemoryBarrier read_barrier{
|
const VkImageMemoryBarrier read_barrier{
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.srcAccessMask = WRITE_ACCESS_FLAGS,
|
.srcAccessMask = WRITE_ACCESS_FLAGS,
|
||||||
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
.oldLayout = is_initialized ? VK_IMAGE_LAYOUT_GENERAL : VK_IMAGE_LAYOUT_UNDEFINED,
|
||||||
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
.newLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.image = image,
|
.image = image,
|
||||||
.subresourceRange{
|
.subresourceRange = subresource_range,
|
||||||
.aspectMask = aspect_mask,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const VkImageMemoryBarrier write_barrier{
|
const VkImageMemoryBarrier write_barrier{
|
||||||
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
|
||||||
.pNext = nullptr,
|
.pNext = nullptr,
|
||||||
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
|
||||||
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
|
.dstAccessMask = WRITE_ACCESS_FLAGS | READ_ACCESS_FLAGS,
|
||||||
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
|
||||||
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
.newLayout = VK_IMAGE_LAYOUT_GENERAL,
|
||||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||||
.image = image,
|
.image = image,
|
||||||
.subresourceRange{
|
.subresourceRange = subresource_range,
|
||||||
.aspectMask = aspect_mask,
|
|
||||||
.baseMipLevel = 0,
|
|
||||||
.levelCount = VK_REMAINING_MIP_LEVELS,
|
|
||||||
.baseArrayLayer = 0,
|
|
||||||
.layerCount = VK_REMAINING_ARRAY_LAYERS,
|
|
||||||
},
|
|
||||||
};
|
};
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
|
||||||
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||||
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT |
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0,
|
||||||
read_barrier);
|
read_barrier);
|
||||||
cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
|
cmdbuf.CopyBufferToImage(src_buffer, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, copies);
|
||||||
// TODO: Move this to another API
|
// TODO: Move this to another API
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0,
|
cmdbuf.PipelineBarrier(
|
||||||
write_barrier);
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |
|
||||||
|
VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
|
||||||
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||||
|
0, nullptr, nullptr, write_barrier);
|
||||||
}
|
}
|
||||||
|
|
||||||
[[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region,
|
[[nodiscard]] VkImageBlit MakeImageBlit(const Region2D& dst_region, const Region2D& src_region,
|
||||||
|
@ -651,29 +677,7 @@ void TryTransformSwizzleIfNeeded(PixelFormat format, std::array<SwizzleSource, 4
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct RangedBarrierRange {
|
|
||||||
u32 min_mip = std::numeric_limits<u32>::max();
|
|
||||||
u32 max_mip = std::numeric_limits<u32>::min();
|
|
||||||
u32 min_layer = std::numeric_limits<u32>::max();
|
|
||||||
u32 max_layer = std::numeric_limits<u32>::min();
|
|
||||||
|
|
||||||
void AddLayers(const VkImageSubresourceLayers& layers) {
|
|
||||||
min_mip = std::min(min_mip, layers.mipLevel);
|
|
||||||
max_mip = std::max(max_mip, layers.mipLevel + 1);
|
|
||||||
min_layer = std::min(min_layer, layers.baseArrayLayer);
|
|
||||||
max_layer = std::max(max_layer, layers.baseArrayLayer + layers.layerCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
VkImageSubresourceRange SubresourceRange(VkImageAspectFlags aspect_mask) const noexcept {
|
|
||||||
return VkImageSubresourceRange{
|
|
||||||
.aspectMask = aspect_mask,
|
|
||||||
.baseMipLevel = min_mip,
|
|
||||||
.levelCount = max_mip - min_mip,
|
|
||||||
.baseArrayLayer = min_layer,
|
|
||||||
.layerCount = max_layer - min_layer,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
|
[[nodiscard]] VkFormat Format(Shader::ImageFormat format) {
|
||||||
switch (format) {
|
switch (format) {
|
||||||
|
@ -1457,12 +1461,18 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src,
|
||||||
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
|
.subresourceRange = dst_range.SubresourceRange(aspect_mask),
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
|
cmdbuf.PipelineBarrier(
|
||||||
0, {}, {}, pre_barriers);
|
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
|
||||||
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
0, nullptr, nullptr, pre_barriers);
|
||||||
cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
|
cmdbuf.CopyImage(src_image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, dst_image,
|
||||||
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
|
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, vk_copies);
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
cmdbuf.PipelineBarrier(
|
||||||
0, {}, {}, post_barriers);
|
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT |
|
||||||
|
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||||
|
0, nullptr, nullptr, post_barriers);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1546,7 +1556,7 @@ void Image::UploadMemory(VkBuffer buffer, VkDeviceSize offset,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle MSAA upload if necessary
|
// Handle MSAA upload if necessary
|
||||||
/* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly
|
/* WARNING, TODO: This code uses some hacks, besides being fundamentally ugly
|
||||||
since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/
|
since tropic didn't want to touch it for a long time, so it needs a rewrite from someone better than me at vulkan.*/
|
||||||
if (info.num_samples > 1 && runtime->CanUploadMSAA()) {
|
if (info.num_samples > 1 && runtime->CanUploadMSAA()) {
|
||||||
// Only use MSAA copy pass for color formats
|
// Only use MSAA copy pass for color formats
|
||||||
|
@ -2352,7 +2362,7 @@ void TextureCacheRuntime::TransitionImageLayout(Image& image) {
|
||||||
};
|
};
|
||||||
scheduler.RequestOutsideRenderPassOperationContext();
|
scheduler.RequestOutsideRenderPassOperationContext();
|
||||||
scheduler.Record([barrier](vk::CommandBuffer cmdbuf) {
|
scheduler.Record([barrier](vk::CommandBuffer cmdbuf) {
|
||||||
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
|
||||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
|
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, barrier);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue