Compare commits
12 commits
2e8d71b406
...
97933fe7e8
Author | SHA1 | Date | |
---|---|---|---|
97933fe7e8 | |||
de8dc44ab1 | |||
21b0964df6 | |||
5c00af4a02 | |||
2a47248755 | |||
ad472ad288 | |||
8c9cdf0d70 | |||
d623e04606 | |||
4b558e5303 | |||
28b8159da1 | |||
87d42cf542 | |||
725407b989 |
29 changed files with 317 additions and 388 deletions
|
@ -166,7 +166,7 @@ ENUM(ResolutionSetup,
|
|||
Res7X,
|
||||
Res8X);
|
||||
|
||||
ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Gaussian, Lanczos, ScaleForce, Fsr, Area, MaxEnum);
|
||||
ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, MaxEnum);
|
||||
|
||||
ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum);
|
||||
|
||||
|
|
|
@ -107,8 +107,6 @@ add_library(core STATIC
|
|||
file_sys/fssystem/fssystem_nca_header.cpp
|
||||
file_sys/fssystem/fssystem_nca_header.h
|
||||
file_sys/fssystem/fssystem_nca_reader.cpp
|
||||
file_sys/fssystem/fssystem_pooled_buffer.cpp
|
||||
file_sys/fssystem/fssystem_pooled_buffer.h
|
||||
file_sys/fssystem/fssystem_sparse_storage.cpp
|
||||
file_sys/fssystem/fssystem_sparse_storage.h
|
||||
file_sys/fssystem/fssystem_switch_storage.h
|
||||
|
|
|
@ -1,10 +1,12 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/swap.h"
|
||||
#include "core/file_sys/fssystem/fssystem_aes_ctr_storage.h"
|
||||
#include "core/file_sys/fssystem/fssystem_pooled_buffer.h"
|
||||
#include "core/file_sys/fssystem/fssystem_utility.h"
|
||||
|
||||
namespace FileSys {
|
||||
|
@ -76,13 +78,6 @@ size_t AesCtrStorage::Write(const u8* buffer, size_t size, size_t offset) {
|
|||
ASSERT(Common::IsAligned(offset, BlockSize));
|
||||
ASSERT(Common::IsAligned(size, BlockSize));
|
||||
|
||||
// Get a pooled buffer.
|
||||
PooledBuffer pooled_buffer;
|
||||
const bool use_work_buffer = true;
|
||||
if (use_work_buffer) {
|
||||
pooled_buffer.Allocate(size, BlockSize);
|
||||
}
|
||||
|
||||
// Setup the counter.
|
||||
std::array<u8, IvSize> ctr;
|
||||
std::memcpy(ctr.data(), m_iv.data(), IvSize);
|
||||
|
@ -91,25 +86,20 @@ size_t AesCtrStorage::Write(const u8* buffer, size_t size, size_t offset) {
|
|||
// Loop until all data is written.
|
||||
size_t remaining = size;
|
||||
s64 cur_offset = 0;
|
||||
|
||||
// Get a pooled buffer.
|
||||
std::vector<char> pooled_buffer(BlockSize);
|
||||
while (remaining > 0) {
|
||||
// Determine data we're writing and where.
|
||||
const size_t write_size =
|
||||
use_work_buffer ? (std::min)(pooled_buffer.GetSize(), remaining) : remaining;
|
||||
|
||||
void* write_buf;
|
||||
if (use_work_buffer) {
|
||||
write_buf = pooled_buffer.GetBuffer();
|
||||
} else {
|
||||
write_buf = const_cast<u8*>(buffer);
|
||||
}
|
||||
const size_t write_size = std::min(pooled_buffer.size(), remaining);
|
||||
u8* write_buf = reinterpret_cast<u8*>(pooled_buffer.data());
|
||||
|
||||
// Encrypt the data.
|
||||
m_cipher->SetIV(ctr);
|
||||
m_cipher->Transcode(buffer, write_size, reinterpret_cast<u8*>(write_buf),
|
||||
Core::Crypto::Op::Encrypt);
|
||||
m_cipher->Transcode(buffer, write_size, write_buf, Core::Crypto::Op::Encrypt);
|
||||
|
||||
// Write the encrypted data.
|
||||
m_base_storage->Write(reinterpret_cast<u8*>(write_buf), write_size, offset + cur_offset);
|
||||
m_base_storage->Write(write_buf, write_size, offset + cur_offset);
|
||||
|
||||
// Advance.
|
||||
cur_offset += write_size;
|
||||
|
|
|
@ -1,11 +1,12 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "common/swap.h"
|
||||
#include "core/file_sys/errors.h"
|
||||
#include "core/file_sys/fssystem/fssystem_aes_xts_storage.h"
|
||||
#include "core/file_sys/fssystem/fssystem_pooled_buffer.h"
|
||||
#include "core/file_sys/fssystem/fssystem_utility.h"
|
||||
|
||||
namespace FileSys {
|
||||
|
@ -69,17 +70,14 @@ size_t AesXtsStorage::Read(u8* buffer, size_t size, size_t offset) const {
|
|||
|
||||
// Decrypt into a pooled buffer.
|
||||
{
|
||||
PooledBuffer tmp_buf(m_block_size, m_block_size);
|
||||
ASSERT(tmp_buf.GetSize() >= m_block_size);
|
||||
|
||||
std::memset(tmp_buf.GetBuffer(), 0, skip_size);
|
||||
std::memcpy(tmp_buf.GetBuffer() + skip_size, buffer, data_size);
|
||||
std::vector<char> tmp_buf(m_block_size, 0);
|
||||
std::memcpy(tmp_buf.data() + skip_size, buffer, data_size);
|
||||
|
||||
m_cipher->SetIV(ctr);
|
||||
m_cipher->Transcode(tmp_buf.GetBuffer(), m_block_size, tmp_buf.GetBuffer(),
|
||||
m_cipher->Transcode(tmp_buf.data(), m_block_size, tmp_buf.data(),
|
||||
Core::Crypto::Op::Decrypt);
|
||||
|
||||
std::memcpy(buffer, tmp_buf.GetBuffer() + skip_size, data_size);
|
||||
std::memcpy(buffer, tmp_buf.data() + skip_size, data_size);
|
||||
}
|
||||
|
||||
AddCounter(ctr.data(), IvSize, 1);
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "core/file_sys/errors.h"
|
||||
#include "core/file_sys/fssystem/fs_i_storage.h"
|
||||
#include "core/file_sys/fssystem/fssystem_alignment_matching_storage_impl.h"
|
||||
#include "core/file_sys/fssystem/fssystem_pooled_buffer.h"
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
|
@ -89,10 +90,11 @@ private:
|
|||
VirtualFile m_base_storage;
|
||||
s64 m_base_storage_size;
|
||||
size_t m_data_align;
|
||||
mutable std::vector<char> work_buffer;
|
||||
|
||||
public:
|
||||
explicit AlignmentMatchingStoragePooledBuffer(VirtualFile bs, size_t da)
|
||||
: m_base_storage(std::move(bs)), m_data_align(da) {
|
||||
: m_base_storage(std::move(bs)), m_data_align(da), work_buffer(da) {
|
||||
ASSERT(Common::IsPowerOfTwo(da));
|
||||
}
|
||||
|
||||
|
@ -104,16 +106,10 @@ public:
|
|||
|
||||
// Validate arguments.
|
||||
ASSERT(buffer != nullptr);
|
||||
|
||||
s64 bs_size = this->GetSize();
|
||||
ASSERT(R_SUCCEEDED(IStorage::CheckAccessRange(offset, size, bs_size)));
|
||||
|
||||
// Allocate a pooled buffer.
|
||||
PooledBuffer pooled_buffer;
|
||||
pooled_buffer.AllocateParticularlyLarge(m_data_align, m_data_align);
|
||||
|
||||
return AlignmentMatchingStorageImpl::Read(m_base_storage, pooled_buffer.GetBuffer(),
|
||||
pooled_buffer.GetSize(), m_data_align,
|
||||
return AlignmentMatchingStorageImpl::Read(m_base_storage, work_buffer.data(),
|
||||
work_buffer.size(), m_data_align,
|
||||
BufferAlign, offset, buffer, size);
|
||||
}
|
||||
|
||||
|
@ -125,16 +121,10 @@ public:
|
|||
|
||||
// Validate arguments.
|
||||
ASSERT(buffer != nullptr);
|
||||
|
||||
s64 bs_size = this->GetSize();
|
||||
ASSERT(R_SUCCEEDED(IStorage::CheckAccessRange(offset, size, bs_size)));
|
||||
|
||||
// Allocate a pooled buffer.
|
||||
PooledBuffer pooled_buffer;
|
||||
pooled_buffer.AllocateParticularlyLarge(m_data_align, m_data_align);
|
||||
|
||||
return AlignmentMatchingStorageImpl::Write(m_base_storage, pooled_buffer.GetBuffer(),
|
||||
pooled_buffer.GetSize(), m_data_align,
|
||||
return AlignmentMatchingStorageImpl::Write(m_base_storage, work_buffer.data(),
|
||||
work_buffer.size(), m_data_align,
|
||||
BufferAlign, offset, buffer, size);
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
#include "core/file_sys/errors.h"
|
||||
#include "core/file_sys/fssystem/fssystem_bucket_tree.h"
|
||||
#include "core/file_sys/fssystem/fssystem_bucket_tree_utils.h"
|
||||
#include "core/file_sys/fssystem/fssystem_pooled_buffer.h"
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
|
@ -465,16 +464,8 @@ Result BucketTree::Visitor::Find(s64 virtual_address) {
|
|||
}
|
||||
|
||||
Result BucketTree::Visitor::FindEntrySet(s32* out_index, s64 virtual_address, s32 node_index) {
|
||||
const auto node_size = m_tree->m_node_size;
|
||||
|
||||
PooledBuffer pool(node_size, 1);
|
||||
if (node_size <= pool.GetSize()) {
|
||||
R_RETURN(
|
||||
this->FindEntrySetWithBuffer(out_index, virtual_address, node_index, pool.GetBuffer()));
|
||||
} else {
|
||||
pool.Deallocate();
|
||||
R_RETURN(this->FindEntrySetWithoutBuffer(out_index, virtual_address, node_index));
|
||||
}
|
||||
std::vector<char> pool(m_tree->m_node_size);
|
||||
R_RETURN(FindEntrySetWithBuffer(out_index, virtual_address, node_index, pool.data()));
|
||||
}
|
||||
|
||||
Result BucketTree::Visitor::FindEntrySetWithBuffer(s32* out_index, s64 virtual_address,
|
||||
|
@ -525,15 +516,8 @@ Result BucketTree::Visitor::FindEntrySetWithoutBuffer(s32* out_index, s64 virtua
|
|||
}
|
||||
|
||||
Result BucketTree::Visitor::FindEntry(s64 virtual_address, s32 entry_set_index) {
|
||||
const auto entry_set_size = m_tree->m_node_size;
|
||||
|
||||
PooledBuffer pool(entry_set_size, 1);
|
||||
if (entry_set_size <= pool.GetSize()) {
|
||||
R_RETURN(this->FindEntryWithBuffer(virtual_address, entry_set_index, pool.GetBuffer()));
|
||||
} else {
|
||||
pool.Deallocate();
|
||||
R_RETURN(this->FindEntryWithoutBuffer(virtual_address, entry_set_index));
|
||||
}
|
||||
std::vector<char> pool(m_tree->m_node_size);
|
||||
R_RETURN(FindEntryWithBuffer(virtual_address, entry_set_index, pool.data()));
|
||||
}
|
||||
|
||||
Result BucketTree::Visitor::FindEntryWithBuffer(s64 virtual_address, s32 entry_set_index,
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -6,7 +9,6 @@
|
|||
#include "core/file_sys/errors.h"
|
||||
#include "core/file_sys/fssystem/fssystem_bucket_tree.h"
|
||||
#include "core/file_sys/fssystem/fssystem_bucket_tree_utils.h"
|
||||
#include "core/file_sys/fssystem/fssystem_pooled_buffer.h"
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
|
@ -35,23 +37,19 @@ Result BucketTree::ScanContinuousReading(ContinuousReadingInfo* out_info,
|
|||
R_UNLESS(entry.GetVirtualOffset() <= cur_offset, ResultOutOfRange);
|
||||
|
||||
// Create a pooled buffer for our scan.
|
||||
PooledBuffer pool(m_node_size, 1);
|
||||
char* buffer = nullptr;
|
||||
|
||||
std::vector<char> pool(m_node_size);
|
||||
s64 entry_storage_size = m_entry_storage->GetSize();
|
||||
|
||||
// Read the node.
|
||||
if (m_node_size <= pool.GetSize()) {
|
||||
buffer = pool.GetBuffer();
|
||||
const auto ofs = param.entry_set.index * static_cast<s64>(m_node_size);
|
||||
R_UNLESS(m_node_size + ofs <= static_cast<size_t>(entry_storage_size),
|
||||
ResultInvalidBucketTreeNodeEntryCount);
|
||||
u8* buffer = reinterpret_cast<u8*>(pool.data());
|
||||
const auto ofs = param.entry_set.index * s64(m_node_size);
|
||||
R_UNLESS(m_node_size + ofs <= size_t(entry_storage_size),
|
||||
ResultInvalidBucketTreeNodeEntryCount);
|
||||
|
||||
m_entry_storage->Read(reinterpret_cast<u8*>(buffer), m_node_size, ofs);
|
||||
}
|
||||
m_entry_storage->Read(buffer, m_node_size, ofs);
|
||||
|
||||
// Calculate extents.
|
||||
const auto end_offset = cur_offset + static_cast<s64>(param.size);
|
||||
const auto end_offset = cur_offset + s64(param.size);
|
||||
s64 phys_offset = entry.GetPhysicalOffset();
|
||||
|
||||
// Start merge tracking.
|
||||
|
@ -76,14 +74,8 @@ Result BucketTree::ScanContinuousReading(ContinuousReadingInfo* out_info,
|
|||
s64 next_entry_offset;
|
||||
|
||||
if (entry_index + 1 < entry_count) {
|
||||
if (buffer != nullptr) {
|
||||
const auto ofs = impl::GetBucketTreeEntryOffset(0, m_entry_size, entry_index + 1);
|
||||
std::memcpy(std::addressof(next_entry), buffer + ofs, m_entry_size);
|
||||
} else {
|
||||
const auto ofs = impl::GetBucketTreeEntryOffset(param.entry_set.index, m_node_size,
|
||||
m_entry_size, entry_index + 1);
|
||||
m_entry_storage->ReadObject(std::addressof(next_entry), ofs);
|
||||
}
|
||||
const auto offset = impl::GetBucketTreeEntryOffset(0, m_entry_size, entry_index + 1);
|
||||
std::memcpy(std::addressof(next_entry), buffer + offset, m_entry_size);
|
||||
|
||||
next_entry_offset = next_entry.GetVirtualOffset();
|
||||
R_UNLESS(param.offsets.IsInclude(next_entry_offset), ResultInvalidIndirectEntryOffset);
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
|
@ -9,8 +12,6 @@
|
|||
#include "core/file_sys/fssystem/fs_i_storage.h"
|
||||
#include "core/file_sys/fssystem/fssystem_bucket_tree.h"
|
||||
#include "core/file_sys/fssystem/fssystem_compression_common.h"
|
||||
#include "core/file_sys/fssystem/fssystem_pooled_buffer.h"
|
||||
#include "core/file_sys/vfs/vfs.h"
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
|
@ -317,23 +318,11 @@ private:
|
|||
R_SUCCEED_IF(entry_count == 0);
|
||||
|
||||
// Get the remaining size in a convenient form.
|
||||
const size_t total_required_size =
|
||||
static_cast<size_t>(required_access_physical_size);
|
||||
const size_t total_required_size = size_t(required_access_physical_size);
|
||||
|
||||
// Perform the read based on whether we need to allocate a buffer.
|
||||
if (will_allocate_pooled_buffer) {
|
||||
// Allocate a pooled buffer.
|
||||
PooledBuffer pooled_buffer;
|
||||
if (pooled_buffer.GetAllocatableSizeMax() >= total_required_size) {
|
||||
pooled_buffer.Allocate(total_required_size, m_block_size_max);
|
||||
} else {
|
||||
pooled_buffer.AllocateParticularlyLarge(
|
||||
std::min<size_t>(
|
||||
total_required_size,
|
||||
PooledBuffer::GetAllocatableParticularlyLargeSizeMax()),
|
||||
m_block_size_max);
|
||||
}
|
||||
|
||||
std::vector<char> pooled_buffer(std::max(m_block_size_max, total_required_size));
|
||||
// Read each of the entries.
|
||||
for (s32 entry_idx = 0; entry_idx < entry_count; ++entry_idx) {
|
||||
// Determine the current read size.
|
||||
|
@ -342,13 +331,13 @@ private:
|
|||
if (const size_t target_entry_size =
|
||||
static_cast<size_t>(entries[entry_idx].physical_size) +
|
||||
static_cast<size_t>(entries[entry_idx].gap_from_prev);
|
||||
target_entry_size <= pooled_buffer.GetSize()) {
|
||||
target_entry_size <= pooled_buffer.size()) {
|
||||
// We'll be using the pooled buffer.
|
||||
will_use_pooled_buffer = true;
|
||||
|
||||
// Determine how much we can read.
|
||||
const size_t max_size = std::min<size_t>(
|
||||
required_access_physical_size, pooled_buffer.GetSize());
|
||||
required_access_physical_size, pooled_buffer.size());
|
||||
|
||||
size_t read_size = 0;
|
||||
for (auto n = entry_idx; n < entry_count; ++n) {
|
||||
|
@ -376,7 +365,7 @@ private:
|
|||
// Perform the read based on whether or not we'll use the pooled buffer.
|
||||
if (will_use_pooled_buffer) {
|
||||
// Read the compressed data into the pooled buffer.
|
||||
auto* const buffer = pooled_buffer.GetBuffer();
|
||||
auto* const buffer = pooled_buffer.data();
|
||||
m_data_storage->Read(reinterpret_cast<u8*>(buffer), cur_read_size,
|
||||
required_access_physical_offset);
|
||||
|
||||
|
@ -863,11 +852,9 @@ private:
|
|||
static_cast<size_t>(unaligned_range->virtual_size));
|
||||
|
||||
// Get a pooled buffer for our read.
|
||||
PooledBuffer pooled_buffer;
|
||||
pooled_buffer.Allocate(size_buffer_required, size_buffer_required);
|
||||
|
||||
std::vector<char> pooled_buffer(size_buffer_required);
|
||||
// Perform read.
|
||||
Result rc = read_impl(pooled_buffer.GetBuffer(), size_buffer_required);
|
||||
Result rc = read_impl(pooled_buffer.data(), size_buffer_required);
|
||||
if (R_FAILED(rc)) {
|
||||
R_THROW(rc);
|
||||
}
|
||||
|
@ -876,8 +863,7 @@ private:
|
|||
const size_t skip_size = cur_offset - unaligned_range->virtual_offset;
|
||||
const size_t copy_size = std::min<size_t>(
|
||||
cur_size, unaligned_range->GetEndVirtualOffset() - cur_offset);
|
||||
|
||||
std::memcpy(cur_dst, pooled_buffer.GetBuffer() + skip_size, copy_size);
|
||||
std::memcpy(cur_dst, pooled_buffer.data() + skip_size, copy_size);
|
||||
|
||||
// Advance.
|
||||
cur_dst += copy_size;
|
||||
|
|
|
@ -1,61 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "common/alignment.h"
|
||||
#include "core/file_sys/fssystem/fssystem_pooled_buffer.h"
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr size_t HeapBlockSize = BufferPoolAlignment;
|
||||
static_assert(HeapBlockSize == 4_KiB);
|
||||
|
||||
// A heap block is 4KiB. An order is a power of two.
|
||||
// This gives blocks of the order 32KiB, 512KiB, 4MiB.
|
||||
constexpr s32 HeapOrderMax = 7;
|
||||
constexpr s32 HeapOrderMaxForLarge = HeapOrderMax + 3;
|
||||
|
||||
constexpr size_t HeapAllocatableSizeMax = HeapBlockSize * (static_cast<size_t>(1) << HeapOrderMax);
|
||||
constexpr size_t HeapAllocatableSizeMaxForLarge =
|
||||
HeapBlockSize * (static_cast<size_t>(1) << HeapOrderMaxForLarge);
|
||||
|
||||
} // namespace
|
||||
|
||||
size_t PooledBuffer::GetAllocatableSizeMaxCore(bool large) {
|
||||
return large ? HeapAllocatableSizeMaxForLarge : HeapAllocatableSizeMax;
|
||||
}
|
||||
|
||||
void PooledBuffer::AllocateCore(size_t ideal_size, size_t required_size, bool large) {
|
||||
// Ensure preconditions.
|
||||
ASSERT(m_buffer == nullptr);
|
||||
|
||||
// Check that we can allocate this size.
|
||||
ASSERT(required_size <= GetAllocatableSizeMaxCore(large));
|
||||
|
||||
const size_t target_size =
|
||||
(std::min)((std::max)(ideal_size, required_size), GetAllocatableSizeMaxCore(large));
|
||||
|
||||
// Dummy implementation for allocate.
|
||||
if (target_size > 0) {
|
||||
m_buffer =
|
||||
reinterpret_cast<char*>(::operator new(target_size, std::align_val_t{HeapBlockSize}));
|
||||
m_size = target_size;
|
||||
|
||||
// Ensure postconditions.
|
||||
ASSERT(m_buffer != nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
void PooledBuffer::Shrink(size_t ideal_size) {
|
||||
ASSERT(ideal_size <= GetAllocatableSizeMaxCore(true));
|
||||
|
||||
// Shrinking to zero means that we have no buffer.
|
||||
if (ideal_size == 0) {
|
||||
::operator delete(m_buffer, std::align_val_t{HeapBlockSize});
|
||||
m_buffer = nullptr;
|
||||
m_size = ideal_size;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace FileSys
|
|
@ -1,95 +0,0 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/common_funcs.h"
|
||||
#include "common/common_types.h"
|
||||
#include "common/literals.h"
|
||||
#include "core/hle/result.h"
|
||||
|
||||
namespace FileSys {
|
||||
|
||||
using namespace Common::Literals;
|
||||
|
||||
constexpr inline size_t BufferPoolAlignment = 4_KiB;
|
||||
constexpr inline size_t BufferPoolWorkSize = 320;
|
||||
|
||||
class PooledBuffer {
|
||||
YUZU_NON_COPYABLE(PooledBuffer);
|
||||
|
||||
public:
|
||||
// Constructor/Destructor.
|
||||
constexpr PooledBuffer() : m_buffer(), m_size() {}
|
||||
|
||||
PooledBuffer(size_t ideal_size, size_t required_size) : m_buffer(), m_size() {
|
||||
this->Allocate(ideal_size, required_size);
|
||||
}
|
||||
|
||||
~PooledBuffer() {
|
||||
this->Deallocate();
|
||||
}
|
||||
|
||||
// Move and assignment.
|
||||
explicit PooledBuffer(PooledBuffer&& rhs) : m_buffer(rhs.m_buffer), m_size(rhs.m_size) {
|
||||
rhs.m_buffer = nullptr;
|
||||
rhs.m_size = 0;
|
||||
}
|
||||
|
||||
PooledBuffer& operator=(PooledBuffer&& rhs) {
|
||||
PooledBuffer(std::move(rhs)).Swap(*this);
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Allocation API.
|
||||
void Allocate(size_t ideal_size, size_t required_size) {
|
||||
return this->AllocateCore(ideal_size, required_size, false);
|
||||
}
|
||||
|
||||
void AllocateParticularlyLarge(size_t ideal_size, size_t required_size) {
|
||||
return this->AllocateCore(ideal_size, required_size, true);
|
||||
}
|
||||
|
||||
void Shrink(size_t ideal_size);
|
||||
|
||||
void Deallocate() {
|
||||
// Shrink the buffer to empty.
|
||||
this->Shrink(0);
|
||||
ASSERT(m_buffer == nullptr);
|
||||
}
|
||||
|
||||
char* GetBuffer() const {
|
||||
ASSERT(m_buffer != nullptr);
|
||||
return m_buffer;
|
||||
}
|
||||
|
||||
size_t GetSize() const {
|
||||
ASSERT(m_buffer != nullptr);
|
||||
return m_size;
|
||||
}
|
||||
|
||||
public:
|
||||
static size_t GetAllocatableSizeMax() {
|
||||
return GetAllocatableSizeMaxCore(false);
|
||||
}
|
||||
static size_t GetAllocatableParticularlyLargeSizeMax() {
|
||||
return GetAllocatableSizeMaxCore(true);
|
||||
}
|
||||
|
||||
private:
|
||||
static size_t GetAllocatableSizeMaxCore(bool large);
|
||||
|
||||
private:
|
||||
void Swap(PooledBuffer& rhs) {
|
||||
std::swap(m_buffer, rhs.m_buffer);
|
||||
std::swap(m_size, rhs.m_size);
|
||||
}
|
||||
|
||||
void AllocateCore(size_t ideal_size, size_t required_size, bool large);
|
||||
|
||||
private:
|
||||
char* m_buffer;
|
||||
size_t m_size;
|
||||
};
|
||||
|
||||
} // namespace FileSys
|
|
@ -53,6 +53,19 @@ u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, Display& display,
|
|||
// Set default speed limit to 100%.
|
||||
*out_speed_scale = 1.0f;
|
||||
|
||||
// If no layers are available, skip the logic.
|
||||
bool any_visible = false;
|
||||
for (auto& layer : display.stack.layers) {
|
||||
if (layer->visible) {
|
||||
any_visible = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!any_visible) {
|
||||
*out_speed_scale = 1.0f;
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Determine the number of vsync periods to wait before composing again.
|
||||
std::optional<s32> swap_interval{};
|
||||
bool has_acquired_buffer{};
|
||||
|
@ -110,7 +123,7 @@ u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, Display& display,
|
|||
}
|
||||
|
||||
// If any new buffers were acquired, we can present.
|
||||
if (has_acquired_buffer) {
|
||||
if (has_acquired_buffer && !composition_stack.empty()) {
|
||||
// Sort by Z-index.
|
||||
std::stable_sort(composition_stack.begin(), composition_stack.end(),
|
||||
[&](auto& l, auto& r) { return l.z_index < r.z_index; });
|
||||
|
@ -119,6 +132,19 @@ u32 HardwareComposer::ComposeLocked(f32* out_speed_scale, Display& display,
|
|||
nvdisp.Composite(composition_stack);
|
||||
}
|
||||
|
||||
// Batch framebuffer releases, instead of one-into-one.
|
||||
std::vector<std::pair<Layer*, Framebuffer*>> to_release;
|
||||
for (auto& [layer_id, framebuffer] : m_framebuffers) {
|
||||
if (framebuffer.release_frame_number > m_frame_number || !framebuffer.is_acquired)
|
||||
continue;
|
||||
if (auto layer = display.stack.FindLayer(layer_id); layer)
|
||||
to_release.emplace_back(layer.get(), &framebuffer);
|
||||
}
|
||||
for (auto& [layer, framebuffer] : to_release) {
|
||||
layer->buffer_item_consumer->ReleaseBuffer(framebuffer->item, android::Fence::NoFence());
|
||||
framebuffer->is_acquired = false;
|
||||
}
|
||||
|
||||
// Advance by at least one frame.
|
||||
const u32 frame_advance = swap_interval.value_or(1);
|
||||
m_frame_number += frame_advance;
|
||||
|
|
|
@ -273,52 +273,73 @@ Exclusive OR (i.e.: XOR)
|
|||
|
||||
### Callback: {Read,Write}Memory{8,16,32,64}
|
||||
|
||||
<u8> ReadMemory8(<u32> vaddr)
|
||||
<u8> ReadMemory16(<u32> vaddr)
|
||||
<u8> ReadMemory32(<u32> vaddr)
|
||||
<u8> ReadMemory64(<u32> vaddr)
|
||||
<void> WriteMemory8(<u32> vaddr, <u8> value_to_store)
|
||||
<void> WriteMemory16(<u32> vaddr, <u16> value_to_store)
|
||||
<void> WriteMemory32(<u32> vaddr, <u32> value_to_store)
|
||||
<void> WriteMemory64(<u32> vaddr, <u64> value_to_store)
|
||||
```c++
|
||||
<u8> ReadMemory8(<u32> vaddr)
|
||||
<u8> ReadMemory16(<u32> vaddr)
|
||||
<u8> ReadMemory32(<u32> vaddr)
|
||||
<u8> ReadMemory64(<u32> vaddr)
|
||||
<void> WriteMemory8(<u32> vaddr, <u8> value_to_store)
|
||||
<void> WriteMemory16(<u32> vaddr, <u16> value_to_store)
|
||||
<void> WriteMemory32(<u32> vaddr, <u32> value_to_store)
|
||||
<void> WriteMemory64(<u32> vaddr, <u64> value_to_store)
|
||||
```
|
||||
|
||||
Memory access.
|
||||
|
||||
### Terminal: Interpret
|
||||
|
||||
SetTerm(IR::Term::Interpret{next})
|
||||
```c++
|
||||
SetTerm(IR::Term::Interpret{next})
|
||||
```
|
||||
|
||||
This terminal instruction calls the interpreter, starting at `next`.
|
||||
The interpreter must interpret exactly one instruction.
|
||||
|
||||
### Terminal: ReturnToDispatch
|
||||
|
||||
SetTerm(IR::Term::ReturnToDispatch{})
|
||||
```c++
|
||||
SetTerm(IR::Term::ReturnToDispatch{})
|
||||
```
|
||||
|
||||
This terminal instruction returns control to the dispatcher.
|
||||
The dispatcher will use the value in R15 to determine what comes next.
|
||||
|
||||
### Terminal: LinkBlock
|
||||
|
||||
SetTerm(IR::Term::LinkBlock{next})
|
||||
```c++
|
||||
SetTerm(IR::Term::LinkBlock{next})
|
||||
```
|
||||
|
||||
This terminal instruction jumps to the basic block described by `next` if we have enough
|
||||
cycles remaining. If we do not have enough cycles remaining, we return to the
|
||||
dispatcher, which will return control to the host.
|
||||
|
||||
### Terminal: LinkBlockFast
|
||||
|
||||
```c++
|
||||
SetTerm(IR::Term::LinkBlockFast{next})
|
||||
```
|
||||
|
||||
This terminal instruction jumps to the basic block described by `next` unconditionally.
|
||||
This promises guarantees that must be held at runtime - i.e that the program wont hang,
|
||||
|
||||
### Terminal: PopRSBHint
|
||||
|
||||
SetTerm(IR::Term::PopRSBHint{})
|
||||
```c++
|
||||
SetTerm(IR::Term::PopRSBHint{})
|
||||
```
|
||||
|
||||
This terminal instruction checks the top of the Return Stack Buffer against R15.
|
||||
If RSB lookup fails, control is returned to the dispatcher.
|
||||
This is an optimization for faster function calls. A backend that doesn't support
|
||||
this optimization or doesn't have a RSB may choose to implement this exactly as
|
||||
ReturnToDispatch.
|
||||
`ReturnToDispatch`.
|
||||
|
||||
### Terminal: If
|
||||
|
||||
SetTerm(IR::Term::If{cond, term_then, term_else})
|
||||
```c++
|
||||
SetTerm(IR::Term::If{cond, term_then, term_else})
|
||||
```
|
||||
|
||||
This terminal instruction conditionally executes one terminal or another depending
|
||||
on the run-time state of the ARM flags.
|
||||
|
|
19
src/dynarmic/docs/FastMemory.md
Normal file
19
src/dynarmic/docs/FastMemory.md
Normal file
|
@ -0,0 +1,19 @@
|
|||
# Fast memory (Fastmem)
|
||||
|
||||
The main way of accessing memory in JITed programs is via an invoked function, say "Read()" and "Write()". On our translator, such functions usually take a sizable amounts of code space (push + call + pop). Trash the i-cache (due to an indirect call) and overall make code emission more bloated.
|
||||
|
||||
The solution? Delegate invalid accesses to a dedicated arena, similar to a swap. The main idea behind such mechanism is to allow the OS to transmit page faults from invalid accesses into the JIT translator directly, bypassing address space calls, while this sacrifices i-cache coherency, it allows for smaller code-size and "faster" throguhput.
|
||||
|
||||
Many kernels however, do not support fast signal dispatching (Solaris, OpenBSD, FreeBSD). Only Linux and Windows support relatively "fast" signal dispatching. Hence this feature is better suited for them only.
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
In x86_64 for example, when a page fault occurs, the CPU will transmit via control registers and the stack (see `IRETQ`) the appropriate arguments for a page fault handler, the OS then will transform that into something that can be sent into userspace.
|
||||
|
||||
Most modern OSes implement kernel-page-table-isolation, which means a set of system calls will invoke a context switch (not often used syscalls), whereas others are handled by the same process address space (the smaller kernel portion, often used syscalls) without needing a context switch. This effect can be negated on systems with PCID (up to 4096 unique IDs).
|
||||
|
||||
Signal dispatching takes a performance hit from reloading `%cr3` - but Linux does something more clever to avoid reloads: VDSO will take care of the entire thing in the same address space. Making dispatching as costly as an indirect call - without the hazards of increased code size.
|
||||
|
||||
The main downside from this is the constant i-cache trashing and pipeline hazards introduced by the VDSO signal handlers. However on most benchmarks fastmem does perform faster than without (Linux only). This also abuses the fact of continous address space emulation by using an arena - which can then be potentially transparently mapped into a hugepage, reducing TLB walk times.
|
4
src/dynarmic/docs/Fastmem.svg
Normal file
4
src/dynarmic/docs/Fastmem.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 128 KiB |
4
src/dynarmic/docs/HostToGuest.svg
Normal file
4
src/dynarmic/docs/HostToGuest.svg
Normal file
File diff suppressed because one or more lines are too long
After Width: | Height: | Size: 98 KiB |
|
@ -16,19 +16,34 @@ Note that `Use`ing a value decrements its `use_count` by one. When the `use_coun
|
|||
|
||||
The member functions on `RegAlloc` are just a combination of the above concepts.
|
||||
|
||||
The following registers are reserved for internal use and should NOT participate in register allocation:
|
||||
- `%xmm0`, `%xmm1`, `%xmm2`: Used as scratch in exclusive memory access.
|
||||
- `%rsp`: Stack pointer.
|
||||
- `%r15`: JIT pointer
|
||||
- `%r14`: Page table pointer.
|
||||
- `%r13`: Fastmem pointer.
|
||||
|
||||
The layout convenes `%r15` as the JIT state pointer - while it may be tempting to turn it into a synthetic pointer, keeping an entire register (out of 12 available) is preferable over inlining a directly computed immediate.
|
||||
|
||||
Do NEVER modify `%r15`, we must make it clear that this register is "immutable" for the entirety of the JIT block duration.
|
||||
|
||||
### `Scratch`
|
||||
|
||||
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr)
|
||||
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm)
|
||||
```c++
|
||||
Xbyak::Reg64 ScratchGpr(HostLocList desired_locations = any_gpr);
|
||||
Xbyak::Xmm ScratchXmm(HostLocList desired_locations = any_xmm);
|
||||
```
|
||||
|
||||
At runtime, allocate one of the registers in `desired_locations`. You are free to modify the register. The register is discarded at the end of the allocation scope.
|
||||
|
||||
### Pure `Use`
|
||||
|
||||
Xbyak::Reg64 UseGpr(Argument& arg);
|
||||
Xbyak::Xmm UseXmm(Argument& arg);
|
||||
OpArg UseOpArg(Argument& arg);
|
||||
void Use(Argument& arg, HostLoc host_loc);
|
||||
```c++
|
||||
Xbyak::Reg64 UseGpr(Argument& arg);
|
||||
Xbyak::Xmm UseXmm(Argument& arg);
|
||||
OpArg UseOpArg(Argument& arg);
|
||||
void Use(Argument& arg, HostLoc host_loc);
|
||||
```
|
||||
|
||||
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
|
||||
which one of the above functions is called. `UseGpr` places it in an unused GPR, `UseXmm` places it
|
||||
|
@ -39,9 +54,11 @@ This register **must not** have it's value changed.
|
|||
|
||||
### `UseScratch`
|
||||
|
||||
Xbyak::Reg64 UseScratchGpr(Argument& arg);
|
||||
Xbyak::Xmm UseScratchXmm(Argument& arg);
|
||||
void UseScratch(Argument& arg, HostLoc host_loc);
|
||||
```c++
|
||||
Xbyak::Reg64 UseScratchGpr(Argument& arg);
|
||||
Xbyak::Xmm UseScratchXmm(Argument& arg);
|
||||
void UseScratch(Argument& arg, HostLoc host_loc);
|
||||
```
|
||||
|
||||
At runtime, the value corresponding to `arg` will be placed a register. The actual register is determined by
|
||||
which one of the above functions is called. `UseScratchGpr` places it in an unused GPR, `UseScratchXmm` places it
|
||||
|
@ -55,7 +72,9 @@ You are free to modify the value in the register. The register is discarded at t
|
|||
|
||||
A `Define` is the defintion of a value. This is the only time when a value may be set.
|
||||
|
||||
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
|
||||
```c++
|
||||
void DefineValue(IR::Inst* inst, const Xbyak::Reg& reg);
|
||||
```
|
||||
|
||||
By calling `DefineValue`, you are stating that you wish to define the value for `inst`, and you have written the
|
||||
value to the specified register `reg`.
|
||||
|
@ -64,7 +83,9 @@ value to the specified register `reg`.
|
|||
|
||||
Adding a `Define` to an existing value.
|
||||
|
||||
void DefineValue(IR::Inst* inst, Argument& arg);
|
||||
```c++
|
||||
void DefineValue(IR::Inst* inst, Argument& arg);
|
||||
```
|
||||
|
||||
You are declaring that the value for `inst` is the same as the value for `arg`. No host machine instructions are
|
||||
emitted.
|
||||
|
|
|
@ -23,15 +23,17 @@ One complication dynarmic has is that a compiled block is not uniquely identifia
|
|||
the PC alone, but bits in the FPSCR and CPSR are also relevant. We resolve this by
|
||||
computing a 64-bit `UniqueHash` that is guaranteed to uniquely identify a block.
|
||||
|
||||
u64 LocationDescriptor::UniqueHash() const {
|
||||
// This value MUST BE UNIQUE.
|
||||
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
|
||||
u64 pc_u64 = u64(arm_pc) << 32;
|
||||
u64 fpscr_u64 = u64(fpscr.Value());
|
||||
u64 t_u64 = cpsr.T() ? 1 : 0;
|
||||
u64 e_u64 = cpsr.E() ? 2 : 0;
|
||||
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
|
||||
}
|
||||
```c++
|
||||
u64 LocationDescriptor::UniqueHash() const {
|
||||
// This value MUST BE UNIQUE.
|
||||
// This calculation has to match up with EmitX64::EmitTerminalPopRSBHint
|
||||
u64 pc_u64 = u64(arm_pc) << 32;
|
||||
u64 fpscr_u64 = u64(fpscr.Value());
|
||||
u64 t_u64 = cpsr.T() ? 1 : 0;
|
||||
u64 e_u64 = cpsr.E() ? 2 : 0;
|
||||
return pc_u64 | fpscr_u64 | t_u64 | e_u64;
|
||||
}
|
||||
```
|
||||
|
||||
## Our implementation isn't actually a stack
|
||||
|
||||
|
@ -49,97 +51,107 @@ host addresses for the corresponding the compiled blocks.
|
|||
size of the real RSB in hardware (which has 3 entries). Larger RSBs than 8
|
||||
showed degraded performance.
|
||||
|
||||
struct JitState {
|
||||
// ...
|
||||
```c++
|
||||
struct JitState {
|
||||
// ...
|
||||
|
||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||
u32 rsb_ptr = 0;
|
||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||
std::array<u64, RSBSize> rsb_codeptrs;
|
||||
void ResetRSB();
|
||||
static constexpr size_t RSBSize = 8; // MUST be a power of 2.
|
||||
u32 rsb_ptr = 0;
|
||||
std::array<u64, RSBSize> rsb_location_descriptors;
|
||||
std::array<u64, RSBSize> rsb_codeptrs;
|
||||
void ResetRSB();
|
||||
|
||||
// ...
|
||||
};
|
||||
// ...
|
||||
};
|
||||
```
|
||||
|
||||
### RSB Push
|
||||
|
||||
We insert our prediction at the insertion point iff the RSB doesn't already
|
||||
contain a prediction with the same `UniqueHash`.
|
||||
|
||||
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
||||
using namespace Xbyak::util;
|
||||
```c++
|
||||
void EmitX64::EmitPushRSB(IR::Block&, IR::Inst* inst) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
ASSERT(inst->GetArg(0).IsImmediate());
|
||||
u64 imm64 = inst->GetArg(0).GetU64();
|
||||
ASSERT(inst->GetArg(0).IsImmediate());
|
||||
u64 imm64 = inst->GetArg(0).GetU64();
|
||||
|
||||
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
||||
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
||||
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
|
||||
? u64(unique_hash_to_code_ptr[imm64])
|
||||
: u64(code->GetReturnFromRunCodeAddress());
|
||||
Xbyak::Reg64 code_ptr_reg = reg_alloc.ScratchGpr({HostLoc::RCX});
|
||||
Xbyak::Reg64 loc_desc_reg = reg_alloc.ScratchGpr();
|
||||
Xbyak::Reg32 index_reg = reg_alloc.ScratchGpr().cvt32();
|
||||
u64 code_ptr = unique_hash_to_code_ptr.find(imm64) != unique_hash_to_code_ptr.end()
|
||||
? u64(unique_hash_to_code_ptr[imm64])
|
||||
: u64(code->GetReturnFromRunCodeAddress());
|
||||
|
||||
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
|
||||
code->add(index_reg, 1);
|
||||
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
||||
code->mov(index_reg, dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)]);
|
||||
code->add(index_reg, 1);
|
||||
code->and_(index_reg, u32(JitState::RSBSize - 1));
|
||||
|
||||
code->mov(loc_desc_reg, u64(imm64));
|
||||
CodePtr patch_location = code->getCurr<CodePtr>();
|
||||
patch_unique_hash_locations[imm64].emplace_back(patch_location);
|
||||
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
|
||||
code->EnsurePatchLocationSize(patch_location, 10);
|
||||
code->mov(loc_desc_reg, u64(imm64));
|
||||
CodePtr patch_location = code->getCurr<CodePtr>();
|
||||
patch_unique_hash_locations[imm64].emplace_back(patch_location);
|
||||
code->mov(code_ptr_reg, u64(code_ptr)); // This line has to match up with EmitX64::Patch.
|
||||
code->EnsurePatchLocationSize(patch_location, 10);
|
||||
|
||||
Xbyak::Label label;
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->je(label, code->T_SHORT);
|
||||
}
|
||||
|
||||
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->L(label);
|
||||
Xbyak::Label label;
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(loc_desc_reg, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->je(label, code->T_SHORT);
|
||||
}
|
||||
|
||||
code->mov(dword[code.ABI_JIT_PTR + offsetof(JitState, rsb_ptr)], index_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_location_descriptors)], loc_desc_reg);
|
||||
code->mov(qword[code.ABI_JIT_PTR + index_reg.cvt64() * 8 + offsetof(JitState, rsb_codeptrs)], code_ptr_reg);
|
||||
code->L(label);
|
||||
}
|
||||
```
|
||||
|
||||
In pseudocode:
|
||||
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rsb_location_descriptors[i] == imm64)
|
||||
goto label;
|
||||
rsb_ptr++;
|
||||
rsb_ptr %= RSBSize;
|
||||
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
|
||||
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
|
||||
label:
|
||||
```c++
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rsb_location_descriptors[i] == imm64)
|
||||
goto label;
|
||||
rsb_ptr++;
|
||||
rsb_ptr %= RSBSize;
|
||||
rsb_location_desciptors[rsb_ptr] = imm64; //< The UniqueHash
|
||||
rsb_codeptr[rsb_ptr] = /* codeptr corresponding to the UniqueHash */;
|
||||
label:
|
||||
```
|
||||
|
||||
## RSB Pop
|
||||
|
||||
To check if a predicition is in the RSB, we linearly scan the RSB.
|
||||
|
||||
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
|
||||
using namespace Xbyak::util;
|
||||
```c++
|
||||
void EmitX64::EmitTerminalPopRSBHint(IR::Term::PopRSBHint, IR::LocationDescriptor initial_location) {
|
||||
using namespace Xbyak::util;
|
||||
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
// This calculation has to match up with IREmitter::PushRSB
|
||||
code->mov(ecx, MJitStateReg(Arm::Reg::PC));
|
||||
code->shl(rcx, 32);
|
||||
code->mov(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, FPSCR_mode)]);
|
||||
code->or_(ebx, dword[code.ABI_JIT_PTR + offsetof(JitState, CPSR_et)]);
|
||||
code->or_(rbx, rcx);
|
||||
|
||||
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
}
|
||||
|
||||
code->jmp(rax);
|
||||
code->mov(rax, u64(code->GetReturnFromRunCodeAddress()));
|
||||
for (size_t i = 0; i < JitState::RSBSize; ++i) {
|
||||
code->cmp(rbx, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_location_descriptors) + i * sizeof(u64)]);
|
||||
code->cmove(rax, qword[code.ABI_JIT_PTR + offsetof(JitState, rsb_codeptrs) + i * sizeof(u64)]);
|
||||
}
|
||||
|
||||
code->jmp(rax);
|
||||
}
|
||||
```
|
||||
|
||||
In pseudocode:
|
||||
|
||||
rbx := ComputeUniqueHash()
|
||||
rax := ReturnToDispatch
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rbx == rsb_location_descriptors[i])
|
||||
rax = rsb_codeptrs[i]
|
||||
goto rax
|
||||
```c++
|
||||
rbx := ComputeUniqueHash()
|
||||
rax := ReturnToDispatch
|
||||
for (i := 0 .. RSBSize-1)
|
||||
if (rbx == rsb_location_descriptors[i])
|
||||
rax = rsb_codeptrs[i]
|
||||
goto rax
|
||||
```
|
||||
|
|
|
@ -246,10 +246,7 @@ std::unique_ptr<TranslationMap> InitializeTranslations(QObject* parent)
|
|||
INSERT(Settings,
|
||||
vram_usage_mode,
|
||||
tr("VRAM Usage Mode:"),
|
||||
tr("Selects whether the emulator should prefer to conserve memory or make maximum usage "
|
||||
"of available video memory for performance.\nHas no effect on integrated graphics. "
|
||||
"Aggressive mode may severely impact the performance of other applications such as "
|
||||
"recording software."));
|
||||
tr("Selects whether the emulator should prefer to conserve memory or make maximum usage of available video memory for performance.\nAggressive mode may severely impact the performance of other applications such as recording software."));
|
||||
INSERT(Settings,
|
||||
skip_cpu_inner_invalidation,
|
||||
tr("Skip CPU Inner Invalidation"),
|
||||
|
@ -575,6 +572,7 @@ std::unique_ptr<ComboboxTranslationMap> ComboboxEnumeration(QObject* parent)
|
|||
PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")),
|
||||
PAIR(ScalingFilter, Bilinear, tr("Bilinear")),
|
||||
PAIR(ScalingFilter, Bicubic, tr("Bicubic")),
|
||||
PAIR(ScalingFilter, Spline1, tr("Spline-1")),
|
||||
PAIR(ScalingFilter, Gaussian, tr("Gaussian")),
|
||||
PAIR(ScalingFilter, Lanczos, tr("Lanczos")),
|
||||
PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")),
|
||||
|
|
|
@ -38,6 +38,8 @@ static const std::map<Settings::ScalingFilter, QString> scaling_filter_texts_map
|
|||
{Settings::ScalingFilter::Bilinear,
|
||||
QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Bilinear"))},
|
||||
{Settings::ScalingFilter::Bicubic, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Bicubic"))},
|
||||
{Settings::ScalingFilter::Spline1,
|
||||
QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Spline-1"))},
|
||||
{Settings::ScalingFilter::Gaussian,
|
||||
QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Gaussian"))},
|
||||
{Settings::ScalingFilter::Lanczos,
|
||||
|
|
|
@ -46,6 +46,7 @@ set(SHADER_FILES
|
|||
present_bicubic.frag
|
||||
present_gaussian.frag
|
||||
present_lanczos.frag
|
||||
present_spline1.frag
|
||||
queries_prefix_scan_sum.comp
|
||||
queries_prefix_scan_sum_nosubgroups.comp
|
||||
resolve_conditional_render.comp
|
||||
|
|
24
src/video_core/host_shaders/present_spline1.frag
Normal file
24
src/video_core/host_shaders/present_spline1.frag
Normal file
|
@ -0,0 +1,24 @@
|
|||
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||
|
||||
// Spline (smooth linear inerpolation) with 1 texel fetch (needs bilinear to work)
|
||||
// Emulates bicubic without actually doing bicubic
|
||||
// See https://iquilezles.org/articles/texture, unfortunely there are issues with the original
|
||||
// where smoothstep "expansion" actually results in worse codegen (SPIRV-Opt does a direct conv to smoothstep)
|
||||
// TODO: Numerical analysis - fract is sawtooth func and floor, reuse params? Perhaps - no need for precision
|
||||
|
||||
#version 460 core
|
||||
|
||||
layout (location = 0) in vec2 frag_tex_coord;
|
||||
layout (location = 0) out vec4 color;
|
||||
layout (binding = 0) uniform sampler2D color_texture;
|
||||
|
||||
vec4 textureSpline1(sampler2D sam, vec2 uv) {
|
||||
float r = float(textureSize(sam, 0).x);
|
||||
vec2 x = fract(uv * r + 0.5);
|
||||
return texture(sam, (floor(uv * r + 0.5) + smoothstep(0.0, 1.0, x) - 0.5) / r);
|
||||
}
|
||||
|
||||
void main() {
|
||||
color = textureSpline1(color_texture, frag_tex_coord);
|
||||
}
|
|
@ -89,6 +89,9 @@ void BlitScreen::CreateWindowAdapt() {
|
|||
case Settings::ScalingFilter::Gaussian:
|
||||
window_adapt = MakeGaussian(device);
|
||||
break;
|
||||
case Settings::ScalingFilter::Spline1:
|
||||
window_adapt = MakeSpline1(device);
|
||||
break;
|
||||
case Settings::ScalingFilter::Lanczos:
|
||||
window_adapt = MakeLanczos(device);
|
||||
break;
|
||||
|
|
|
@ -28,6 +28,11 @@ std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device) {
|
|||
HostShaders::OPENGL_PRESENT_FRAG);
|
||||
}
|
||||
|
||||
std::unique_ptr<WindowAdaptPass> MakeSpline1(const Device& device) {
|
||||
return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(),
|
||||
HostShaders::PRESENT_SPLINE1_FRAG);
|
||||
}
|
||||
|
||||
std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device) {
|
||||
return std::make_unique<WindowAdaptPass>(device, CreateBilinearSampler(),
|
||||
HostShaders::PRESENT_BICUBIC_FRAG);
|
||||
|
|
|
@ -18,6 +18,7 @@ std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device);
|
|||
std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device);
|
||||
std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device);
|
||||
std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device);
|
||||
std::unique_ptr<WindowAdaptPass> MakeSpline1(const Device& device);
|
||||
std::unique_ptr<WindowAdaptPass> MakeLanczos(const Device& device);
|
||||
std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device);
|
||||
std::unique_ptr<WindowAdaptPass> MakeArea(const Device& device);
|
||||
|
|
|
@ -46,6 +46,11 @@ std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device, VkFormat fra
|
|||
BuildShader(device, VULKAN_PRESENT_FRAG_SPV));
|
||||
}
|
||||
|
||||
std::unique_ptr<WindowAdaptPass> MakeSpline1(const Device& device, VkFormat frame_format) {
|
||||
return std::make_unique<WindowAdaptPass>(device, frame_format, CreateBilinearSampler(device),
|
||||
BuildShader(device, PRESENT_SPLINE1_FRAG_SPV));
|
||||
}
|
||||
|
||||
std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device, VkFormat frame_format) {
|
||||
// No need for handrolled shader -- if the VK impl can do it for us ;)
|
||||
if (device.IsExtFilterCubicSupported())
|
||||
|
|
|
@ -18,6 +18,7 @@ class MemoryAllocator;
|
|||
std::unique_ptr<WindowAdaptPass> MakeNearestNeighbor(const Device& device, VkFormat frame_format);
|
||||
std::unique_ptr<WindowAdaptPass> MakeBilinear(const Device& device, VkFormat frame_format);
|
||||
std::unique_ptr<WindowAdaptPass> MakeBicubic(const Device& device, VkFormat frame_format);
|
||||
std::unique_ptr<WindowAdaptPass> MakeSpline1(const Device& device, VkFormat frame_format);
|
||||
std::unique_ptr<WindowAdaptPass> MakeGaussian(const Device& device, VkFormat frame_format);
|
||||
std::unique_ptr<WindowAdaptPass> MakeLanczos(const Device& device, VkFormat frame_format);
|
||||
std::unique_ptr<WindowAdaptPass> MakeScaleForce(const Device& device, VkFormat frame_format);
|
||||
|
|
|
@ -43,6 +43,9 @@ void BlitScreen::SetWindowAdaptPass() {
|
|||
case Settings::ScalingFilter::Bicubic:
|
||||
window_adapt = MakeBicubic(device, swapchain_view_format);
|
||||
break;
|
||||
case Settings::ScalingFilter::Spline1:
|
||||
window_adapt = MakeSpline1(device, swapchain_view_format);
|
||||
break;
|
||||
case Settings::ScalingFilter::Gaussian:
|
||||
window_adapt = MakeGaussian(device, swapchain_view_format);
|
||||
break;
|
||||
|
|
|
@ -470,8 +470,8 @@ void PresentManager::CopyToSwapchainImpl(Frame* frame) {
|
|||
const std::array wait_semaphores = {present_semaphore, *frame->render_ready};
|
||||
|
||||
static constexpr std::array<VkPipelineStageFlags, 2> wait_stage_masks{
|
||||
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
|
||||
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT,
|
||||
};
|
||||
|
||||
const VkSubmitInfo submit_info{
|
||||
|
|
|
@ -1395,23 +1395,20 @@ void Device::CollectPhysicalMemoryInfo() {
|
|||
}
|
||||
device_access_memory += mem_properties.memoryHeaps[element].size;
|
||||
}
|
||||
if (!is_integrated) {
|
||||
if (is_integrated) {
|
||||
const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);
|
||||
const u64 memory_size = Settings::values.vram_usage_mode.GetValue() == Settings::VramUsageMode::Aggressive ? 6_GiB : 4_GiB;
|
||||
device_access_memory = static_cast<u64>(std::max<s64>(std::min<s64>(available_memory - 8_GiB, memory_size), std::min<s64>(local_memory, memory_size)));
|
||||
} else {
|
||||
const u64 reserve_memory = std::min<u64>(device_access_memory / 8, 1_GiB);
|
||||
device_access_memory -= reserve_memory;
|
||||
|
||||
if (Settings::values.vram_usage_mode.GetValue() != Settings::VramUsageMode::Aggressive) {
|
||||
// Account for resolution scaling in memory limits
|
||||
const size_t normal_memory = 6_GiB;
|
||||
const size_t scaler_memory = 1_GiB * Settings::values.resolution_info.ScaleUp(1);
|
||||
device_access_memory =
|
||||
std::min<u64>(device_access_memory, normal_memory + scaler_memory);
|
||||
device_access_memory = std::min<u64>(device_access_memory, normal_memory + scaler_memory);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
const s64 available_memory = static_cast<s64>(device_access_memory - device_initial_usage);
|
||||
device_access_memory = static_cast<u64>(std::max<s64>(
|
||||
std::min<s64>(available_memory - 8_GiB, 6_GiB), std::min<s64>(local_memory, 6_GiB)));
|
||||
}
|
||||
|
||||
void Device::CollectToolingInfo() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue