From ef360352d0d2724fda63b9975f5e8cd50ec90b13 Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 1 Oct 2025 08:04:34 +0000 Subject: [PATCH 1/6] [core] use memcpy instead of hand rolling aligned cases Hand rolling memcpy like this is always frowned upon because the compiler has more insight on whats going on (plus the code resolves to a worse version of itself on assembly). This removes some branches that are just straight up redundant. May save stuff especially for systems without fastmem enabled. Signed-off-by: lizzie --- src/core/memory.cpp | 203 ++++++++------------------------------------ 1 file changed, 36 insertions(+), 167 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 2583aae867..f84507d125 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "common/assert.h" @@ -681,22 +682,17 @@ struct Memory::Impl { } } - [[nodiscard]] u8* GetPointerImpl(u64 vaddr, auto on_unmapped, auto on_rasterizer) const { + template + [[nodiscard]] u8* GetPointerImpl(u64 vaddr, F&& on_unmapped, G&& on_rasterizer) const { // AARCH64 masks the upper 16 bit of all memory accesses - vaddr = vaddr & 0xffffffffffffULL; - if (!AddressSpaceContains(*current_page_table, vaddr, 1)) [[unlikely]] { - on_unmapped(); - return nullptr; - } else { + vaddr &= 0xffffffffffffULL; + if (AddressSpaceContains(*current_page_table, vaddr, 1)) [[likely]] { // Avoid adding any extra logic to this fast-path block const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw(); - if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { + if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) [[likely]] { return reinterpret_cast(pointer + vaddr); } else { switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { - case Common::PageType::Unmapped: - on_unmapped(); - return nullptr; case Common::PageType::Memory: ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr); return nullptr; @@ -707,11 +703,18 @@ struct Memory::Impl { on_rasterizer(); return host_ptr; } + case Common::PageType::Unmapped: [[unlikely]] { + on_unmapped(); + return nullptr; + } default: UNREACHABLE(); } return nullptr; } + } else { + on_unmapped(); + return nullptr; } } @@ -729,172 +732,38 @@ struct Memory::Impl { GetInteger(vaddr), []() {}, []() {}); } - /** - * Reads a particular data type out of memory at the given virtual address. - * - * @param vaddr The virtual address to read the data type from. - * - * @tparam T The data type to read out of memory. This type *must* be - * trivially copyable, otherwise the behavior of this function - * is undefined. - * - * @returns The instance of T read from the specified virtual address. - */ + /// @brief Reads a particular data type out of memory at the given virtual address. + /// @param vaddr The virtual address to read the data type from. + /// @tparam T The data type to read out of memory. + /// @returns The instance of T read from the specified virtual address. template - T Read(Common::ProcessAddress vaddr) { - // Fast path for aligned reads of common sizes + inline T Read(Common::ProcessAddress vaddr) requires(std::is_trivially_copyable_v) noexcept { const u64 addr = GetInteger(vaddr); - if constexpr (std::is_same_v || std::is_same_v) { - // 8-bit reads are always aligned - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read8 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*ptr); - } - return 0; - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 16-bit reads - if ((addr & 1) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read16 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 32-bit reads - if ((addr & 3) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read32 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 64-bit reads - if ((addr & 7) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read64 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } - - // Fall back to the general case for other types or unaligned access - T result = 0; - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { + if (auto const ptr = GetPointerImpl(addr, [addr]() { + LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); + }, [&]() { + HandleRasterizerDownload(addr, sizeof(T)); + }); ptr) [[likely]] { + // It may be tempting to rewrite this particular section to use "reinterpret_cast"; + // afterall, it's trivially copyable so surely it can be copied ov- Alignment. + // Remember, alignment. memcpy() will deal with all the alignment extremely fast. + T result{}; std::memcpy(&result, ptr, sizeof(T)); + return result; } - return result; + return T{}; } - /** - * Writes a particular data type to memory at the given virtual address. - * - * @param vaddr The virtual address to write the data type to. - * - * @tparam T The data type to write to memory. This type *must* be - * trivially copyable, otherwise the behavior of this function - * is undefined. - */ + /// @brief Writes a particular data type to memory at the given virtual address. + /// @param vaddr The virtual address to write the data type to. + /// @tparam T The data type to write to memory. template - void Write(Common::ProcessAddress vaddr, const T data) { - // Fast path for aligned writes of common sizes + inline void Write(Common::ProcessAddress vaddr, const T data) requires(std::is_trivially_copyable_v) noexcept { const u64 addr = GetInteger(vaddr); - if constexpr (std::is_same_v || std::is_same_v) { - // 8-bit writes are always aligned - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write8 @ 0x{:016X} = 0x{:02X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *ptr = static_cast(data); - } - return; - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 16-bit writes - if ((addr & 1) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write16 @ 0x{:016X} = 0x{:04X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 32-bit writes - if ((addr & 3) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write32 @ 0x{:016X} = 0x{:08X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 64-bit writes - if ((addr & 7) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write64 @ 0x{:016X} = 0x{:016X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } - - // Fall back to the general case for other types or unaligned access - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, - addr, static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { + if (auto const ptr = GetPointerImpl(addr, [addr, data]() { + LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, addr, u64(data)); + }, [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); ptr) [[likely]] std::memcpy(ptr, &data, sizeof(T)); - } } template From f58a7f1ca3a735bd7ab9afb9d0ada779a8c42c86 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 2 Oct 2025 05:19:45 +0000 Subject: [PATCH 2/6] fix Signed-off-by: lizzie --- src/core/memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f84507d125..e1aa3473fc 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -737,7 +737,7 @@ struct Memory::Impl { /// @tparam T The data type to read out of memory. /// @returns The instance of T read from the specified virtual address. template - inline T Read(Common::ProcessAddress vaddr) requires(std::is_trivially_copyable_v) noexcept { + inline T Read(Common::ProcessAddress vaddr) noexcept requires(std::is_trivially_copyable_v) { const u64 addr = GetInteger(vaddr); if (auto const ptr = GetPointerImpl(addr, [addr]() { LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); @@ -758,7 +758,7 @@ struct Memory::Impl { /// @param vaddr The virtual address to write the data type to. /// @tparam T The data type to write to memory. template - inline void Write(Common::ProcessAddress vaddr, const T data) requires(std::is_trivially_copyable_v) noexcept { + inline void Write(Common::ProcessAddress vaddr, const T data) noexcept requires(std::is_trivially_copyable_v) { const u64 addr = GetInteger(vaddr); if (auto const ptr = GetPointerImpl(addr, [addr, data]() { LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, addr, u64(data)); From dbeae7add0bbc0242be84664641d8d20edbecdac Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 7 Oct 2025 06:35:57 +0200 Subject: [PATCH 3/6] [vk, ogl] Add VK_QCOM ZTC, Bspline, Mitchell filter weights, add MMPX filter (#2577) Adds native support for QCOM cubic filter weights, and for devices whom do not support said weights, just implement them in shaders TODO: ZTC filter is wrong!? Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2577 Reviewed-by: crueter Reviewed-by: MaranBr Reviewed-by: CamilleLaVey Co-authored-by: lizzie Co-committed-by: lizzie --- docs/User.md | 2 +- .../app/src/main/res/values/arrays.xml | 8 ++ .../app/src/main/res/values/strings.xml | 4 + src/common/settings_enums.h | 2 +- src/qt_common/shared_translation.cpp | 4 + src/qt_common/shared_translation.h | 4 + src/video_core/host_shaders/CMakeLists.txt | 4 + .../host_shaders/present_bicubic.frag | 71 ++++------ .../host_shaders/present_bspline.frag | 35 +++++ .../host_shaders/present_mitchell.frag | 35 +++++ src/video_core/host_shaders/present_mmpx.frag | 131 ++++++++++++++++++ .../host_shaders/present_zero_tangent.frag | 35 +++++ .../renderer_opengl/gl_blit_screen.cpp | 13 ++ .../renderer_opengl/present/filters.cpp | 24 ++++ .../renderer_opengl/present/filters.h | 4 + .../renderer_vulkan/present/filters.cpp | 38 ++++- .../renderer_vulkan/present/filters.h | 3 +- .../renderer_vulkan/present/util.cpp | 13 +- src/video_core/renderer_vulkan/present/util.h | 2 +- .../renderer_vulkan/vk_blit_screen.cpp | 15 +- src/video_core/vulkan_common/vulkan_device.h | 8 +- 21 files changed, 395 insertions(+), 60 deletions(-) create mode 100644 src/video_core/host_shaders/present_bspline.frag create mode 100644 src/video_core/host_shaders/present_mitchell.frag create mode 100644 src/video_core/host_shaders/present_mmpx.frag create mode 100644 src/video_core/host_shaders/present_zero_tangent.frag diff --git a/docs/User.md b/docs/User.md index ba5d1b3eb0..67f81eadb6 100644 --- a/docs/User.md +++ b/docs/User.md @@ -7,4 +7,4 @@ This handbook is primarily aimed at the end-user - baking useful knowledge for e - **[The Basics](user/Basics.md)** - **[Audio](user/Audio.md)** - **[Graphics](user/Graphics.md)** -- **[Platforms and Architectures](user/Architectures.md)** \ No newline at end of file +- **[Platforms and Architectures](user/Architectures.md)** diff --git a/src/android/app/src/main/res/values/arrays.xml b/src/android/app/src/main/res/values/arrays.xml index ec94730348..7e44750909 100644 --- a/src/android/app/src/main/res/values/arrays.xml +++ b/src/android/app/src/main/res/values/arrays.xml @@ -253,12 +253,16 @@ @string/scaling_filter_nearest_neighbor @string/scaling_filter_bilinear @string/scaling_filter_bicubic + @string/scaling_filter_zero_tangent + @string/scaling_filter_bspline + @string/scaling_filter_mitchell @string/scaling_filter_spline1 @string/scaling_filter_gaussian @string/scaling_filter_lanczos @string/scaling_filter_scale_force @string/scaling_filter_fsr @string/scaling_filter_area + @string/scaling_filter_mmpx @@ -271,6 +275,10 @@ 6 7 8 + 9 + 10 + 11 + 12 diff --git a/src/android/app/src/main/res/values/strings.xml b/src/android/app/src/main/res/values/strings.xml index 2f6587d136..cff0bab2b5 100644 --- a/src/android/app/src/main/res/values/strings.xml +++ b/src/android/app/src/main/res/values/strings.xml @@ -1018,6 +1018,10 @@ ScaleForce AMD FidelityFX™ Super Resolution Area + Zero-Tangent + B-Spline + Mitchell + MMPX None diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 6644bc01cc..3fcdf08256 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -143,7 +143,7 @@ ENUM(ConfirmStop, Ask_Always, Ask_Based_On_Game, Ask_Never); ENUM(FullscreenMode, Borderless, Exclusive); ENUM(NvdecEmulation, Off, Cpu, Gpu); ENUM(ResolutionSetup, Res1_4X, Res1_2X, Res3_4X, Res1X, Res5_4X, Res3_2X, Res2X, Res3X, Res4X, Res5X, Res6X, Res7X, Res8X); -ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, MaxEnum); +ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, ZeroTangent, BSpline, Mitchell, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, Mmpx, MaxEnum); ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum); ENUM(AspectRatio, R16_9, R4_3, R21_9, R16_10, Stretch); ENUM(ConsoleMode, Handheld, Docked); diff --git a/src/qt_common/shared_translation.cpp b/src/qt_common/shared_translation.cpp index 5b8622e00a..f926f506e7 100644 --- a/src/qt_common/shared_translation.cpp +++ b/src/qt_common/shared_translation.cpp @@ -555,12 +555,16 @@ std::unique_ptr ComboboxEnumeration(QObject* parent) PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")), PAIR(ScalingFilter, Bilinear, tr("Bilinear")), PAIR(ScalingFilter, Bicubic, tr("Bicubic")), + PAIR(ScalingFilter, ZeroTangent, tr("Zero-Tangent")), + PAIR(ScalingFilter, BSpline, tr("B-Spline")), + PAIR(ScalingFilter, Mitchell, tr("Mitchell")), PAIR(ScalingFilter, Spline1, tr("Spline-1")), PAIR(ScalingFilter, Gaussian, tr("Gaussian")), PAIR(ScalingFilter, Lanczos, tr("Lanczos")), PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")), PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™️ Super Resolution")), PAIR(ScalingFilter, Area, tr("Area")), + PAIR(ScalingFilter, Mmpx, tr("MMPX")), }}); translations->insert({Settings::EnumMetadata::Index(), { diff --git a/src/qt_common/shared_translation.h b/src/qt_common/shared_translation.h index c9216c2daa..a25887bb87 100644 --- a/src/qt_common/shared_translation.h +++ b/src/qt_common/shared_translation.h @@ -38,6 +38,9 @@ static const std::map scaling_filter_texts_map {Settings::ScalingFilter::Bilinear, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Bilinear"))}, {Settings::ScalingFilter::Bicubic, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Bicubic"))}, + {Settings::ScalingFilter::ZeroTangent, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Zero-Tangent"))}, + {Settings::ScalingFilter::BSpline, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "B-Spline"))}, + {Settings::ScalingFilter::Mitchell, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Mitchell"))}, {Settings::ScalingFilter::Spline1, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Spline-1"))}, {Settings::ScalingFilter::Gaussian, @@ -48,6 +51,7 @@ static const std::map scaling_filter_texts_map QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "ScaleForce"))}, {Settings::ScalingFilter::Fsr, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "FSR"))}, {Settings::ScalingFilter::Area, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Area"))}, + {Settings::ScalingFilter::Mmpx, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "MMPX"))}, }; static const std::map use_docked_mode_texts_map = { diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index c14b44a45a..9f7b9edd5a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -44,9 +44,13 @@ set(SHADER_FILES pitch_unswizzle.comp present_area.frag present_bicubic.frag + present_zero_tangent.frag + present_bspline.frag + present_mitchell.frag present_gaussian.frag present_lanczos.frag present_spline1.frag + present_mmpx.frag queries_prefix_scan_sum.comp queries_prefix_scan_sum_nosubgroups.comp resolve_conditional_render.comp diff --git a/src/video_core/host_shaders/present_bicubic.frag b/src/video_core/host_shaders/present_bicubic.frag index a9d9d40a38..5347fd2ef7 100644 --- a/src/video_core/host_shaders/present_bicubic.frag +++ b/src/video_core/host_shaders/present_bicubic.frag @@ -1,56 +1,37 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later // SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later - #version 460 core - - layout (location = 0) in vec2 frag_tex_coord; - layout (location = 0) out vec4 color; - layout (binding = 0) uniform sampler2D color_texture; - -vec4 cubic(float v) { - vec4 n = vec4(1.0, 2.0, 3.0, 4.0) - v; - vec4 s = n * n * n; - float x = s.x; - float y = s.y - 4.0 * s.x; - float z = s.z - 4.0 * s.y + 6.0 * s.x; - float w = 6.0 - x - y - z; - return vec4(x, y, z, w) * (1.0 / 6.0); +vec4 cubic(float x) { + float x2 = x * x; + float x3 = x2 * x; + return vec4(1.0, x, x2, x3) * transpose(mat4x4( + 0.0, 2.0, 0.0, 0.0, + -1.0, 0.0, 1.0, 0.0, + 2.0, -5.0, 4.0, -1.0, + -1.0, 3.0, -3.0, 1.0 + ) * (1.0 / 2.0)); } - -vec4 textureBicubic( sampler2D textureSampler, vec2 texCoords ) { - - vec2 texSize = textureSize(textureSampler, 0); - vec2 invTexSize = 1.0 / texSize; - - texCoords = texCoords * texSize - 0.5; - - vec2 fxy = fract(texCoords); - texCoords -= fxy; - - vec4 xcubic = cubic(fxy.x); - vec4 ycubic = cubic(fxy.y); - - vec4 c = texCoords.xxyy + vec2(-0.5, +1.5).xyxy; - - vec4 s = vec4(xcubic.xz + xcubic.yw, ycubic.xz + ycubic.yw); - vec4 offset = c + vec4(xcubic.yw, ycubic.yw) / s; - - offset *= invTexSize.xxyy; - - vec4 sample0 = texture(textureSampler, offset.xz); - vec4 sample1 = texture(textureSampler, offset.yz); - vec4 sample2 = texture(textureSampler, offset.xw); - vec4 sample3 = texture(textureSampler, offset.yw); - - float sx = s.x / (s.x + s.y); - float sy = s.z / (s.z + s.w); - - return mix(mix(sample3, sample2, sx), mix(sample1, sample0, sx), sy); +vec4 textureBicubic(sampler2D samp, vec2 uv) { + vec2 tex_size = vec2(textureSize(samp, 0)); + vec2 cc_tex = uv * tex_size - 0.5f; + vec2 fex = cc_tex - floor(cc_tex); + vec4 xcubic = cubic(fex.x); + vec4 ycubic = cubic(fex.y); + vec4 c = floor(cc_tex).xxyy + vec2(-0.5f, 1.5f).xyxy; + vec4 z = vec4(xcubic.yw, ycubic.yw); + vec4 s = vec4(xcubic.xz, ycubic.xz) + z; + vec4 offset = (c + z / s) * (1.0f / tex_size).xxyy; + vec2 n = vec2(s.x / (s.x + s.y), s.z / (s.z + s.w)); + return mix( + mix(texture(samp, offset.yw), texture(samp, offset.xw), n.x), + mix(texture(samp, offset.yz), texture(samp, offset.xz), n.x), + n.y); } - void main() { color = textureBicubic(color_texture, frag_tex_coord); } diff --git a/src/video_core/host_shaders/present_bspline.frag b/src/video_core/host_shaders/present_bspline.frag new file mode 100644 index 0000000000..f229de6030 --- /dev/null +++ b/src/video_core/host_shaders/present_bspline.frag @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +#version 460 core +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; +layout (binding = 0) uniform sampler2D color_texture; +vec4 cubic(float x) { + float x2 = x * x; + float x3 = x2 * x; + return vec4(1.0, x, x2, x3) * transpose(mat4x4( + 1.0, 4.0, 1.0, 0.0, + -3.0, 0.0, 3.0, 0.0, + 3.0, -6.0, 3.0, 0.0, + -1.0, 3.0, -3.0, 1.0 + ) * (1.0 / 6.0)); +} +vec4 textureBicubic(sampler2D samp, vec2 uv) { + vec2 tex_size = vec2(textureSize(samp, 0)); + vec2 cc_tex = uv * tex_size - 0.5f; + vec2 fex = cc_tex - floor(cc_tex); + vec4 xcubic = cubic(fex.x); + vec4 ycubic = cubic(fex.y); + vec4 c = floor(cc_tex).xxyy + vec2(-0.5f, 1.5f).xyxy; + vec4 z = vec4(xcubic.yw, ycubic.yw); + vec4 s = vec4(xcubic.xz, ycubic.xz) + z; + vec4 offset = (c + z / s) * (1.0f / tex_size).xxyy; + vec2 n = vec2(s.x / (s.x + s.y), s.z / (s.z + s.w)); + return mix( + mix(texture(samp, offset.yw), texture(samp, offset.xw), n.x), + mix(texture(samp, offset.yz), texture(samp, offset.xz), n.x), + n.y); +} +void main() { + color = textureBicubic(color_texture, frag_tex_coord); +} diff --git a/src/video_core/host_shaders/present_mitchell.frag b/src/video_core/host_shaders/present_mitchell.frag new file mode 100644 index 0000000000..4ca65cd6f0 --- /dev/null +++ b/src/video_core/host_shaders/present_mitchell.frag @@ -0,0 +1,35 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +#version 460 core +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; +layout (binding = 0) uniform sampler2D color_texture; +vec4 cubic(float x) { + float x2 = x * x; + float x3 = x2 * x; + return vec4(1.0, x, x2, x3) * transpose(mat4x4( + 1.0, 16.0, 1.0, 0.0, + -9.0, 0.0, 9.0, 0.0, + 15.0, -36.0, 27.0, -6.0, + -7.0, 21.0, -21.0, 7.0 + ) * (1.0 / 18.0)); +} +vec4 textureBicubic(sampler2D samp, vec2 uv) { + vec2 tex_size = vec2(textureSize(samp, 0)); + vec2 cc_tex = uv * tex_size - 0.5f; + vec2 fex = cc_tex - floor(cc_tex); + vec4 xcubic = cubic(fex.x); + vec4 ycubic = cubic(fex.y); + vec4 c = floor(cc_tex).xxyy + vec2(-0.5f, 1.5f).xyxy; + vec4 z = vec4(xcubic.yw, ycubic.yw); + vec4 s = vec4(xcubic.xz, ycubic.xz) + z; + vec4 offset = (c + z / s) * (1.0f / tex_size).xxyy; + vec2 n = vec2(s.x / (s.x + s.y), s.z / (s.z + s.w)); + return mix( + mix(texture(samp, offset.yw), texture(samp, offset.xw), n.x), + mix(texture(samp, offset.yz), texture(samp, offset.xz), n.x), + n.y); +} +void main() { + color = textureBicubic(color_texture, frag_tex_coord); +} diff --git a/src/video_core/host_shaders/present_mmpx.frag b/src/video_core/host_shaders/present_mmpx.frag new file mode 100644 index 0000000000..6c2c05a63a --- /dev/null +++ b/src/video_core/host_shaders/present_mmpx.frag @@ -0,0 +1,131 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later +// Copyright 2023 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#version 460 core +layout(location = 0) in vec2 tex_coord; +layout(location = 0) out vec4 frag_color; +layout(binding = 0) uniform sampler2D tex; + +#define src(x, y) texture(tex, coord + vec2(x, y) * 1.0 / source_size) + +float luma(vec4 col) { + return dot(col.rgb, vec3(0.2126, 0.7152, 0.0722)) * (1.0 - col.a); +} + +bool same(vec4 B, vec4 A0) { + return all(equal(B, A0)); +} + +bool notsame(vec4 B, vec4 A0) { + return any(notEqual(B, A0)); +} + +bool all_eq2(vec4 B, vec4 A0, vec4 A1) { + return (same(B,A0) && same(B,A1)); +} + +bool all_eq3(vec4 B, vec4 A0, vec4 A1, vec4 A2) { + return (same(B,A0) && same(B,A1) && same(B,A2)); +} + +bool all_eq4(vec4 B, vec4 A0, vec4 A1, vec4 A2, vec4 A3) { + return (same(B,A0) && same(B,A1) && same(B,A2) && same(B,A3)); +} + +bool any_eq3(vec4 B, vec4 A0, vec4 A1, vec4 A2) { + return (same(B,A0) || same(B,A1) || same(B,A2)); +} + +bool none_eq2(vec4 B, vec4 A0, vec4 A1) { + return (notsame(B,A0) && notsame(B,A1)); +} + +bool none_eq4(vec4 B, vec4 A0, vec4 A1, vec4 A2, vec4 A3) { + return (notsame(B,A0) && notsame(B,A1) && notsame(B,A2) && notsame(B,A3)); +} + +void main() +{ + vec2 source_size = vec2(textureSize(tex, 0)); + vec2 pos = fract(tex_coord * source_size) - vec2(0.5, 0.5); + vec2 coord = tex_coord - pos / source_size; + + vec4 E = src(0.0,0.0); + + vec4 A = src(-1.0,-1.0); + vec4 B = src(0.0,-1.0); + vec4 C = src(1.0,-1.0); + + vec4 D = src(-1.0,0.0); + vec4 F = src(1.0,0.0); + + vec4 G = src(-1.0,1.0); + vec4 H = src(0.0,1.0); + vec4 I = src(1.0,1.0); + + vec4 J = E; + vec4 K = E; + vec4 L = E; + vec4 M = E; + + frag_color = E; + + if(same(E,A) && same(E,B) && same(E,C) && same(E,D) && same(E,F) && same(E,G) && same(E,H) && same(E,I)) return; + + vec4 P = src(0.0,2.0); + vec4 Q = src(-2.0,0.0); + vec4 R = src(2.0,0.0); + vec4 S = src(0.0,2.0); + + float Bl = luma(B); + float Dl = luma(D); + float El = luma(E); + float Fl = luma(F); + float Hl = luma(H); + + if (((same(D,B) && notsame(D,H) && notsame(D,F))) && ((El>=Dl) || same(E,A)) && any_eq3(E,A,C,G) && ((El=Bl) || same(E,C)) && any_eq3(E,A,C,I) && ((El=Hl) || same(E,G)) && any_eq3(E,A,G,I) && ((El=Fl) || same(E,I)) && any_eq3(E,C,G,I) && ((El MakeBicubic(const Device& device) { HostShaders::PRESENT_BICUBIC_FRAG); } +std::unique_ptr MakeMitchell(const Device& device) { + return std::make_unique(device, CreateBilinearSampler(), + HostShaders::PRESENT_MITCHELL_FRAG); +} + +std::unique_ptr MakeZeroTangent(const Device& device) { + return std::make_unique(device, CreateBilinearSampler(), + HostShaders::PRESENT_ZERO_TANGENT_FRAG); +} + +std::unique_ptr MakeBSpline(const Device& device) { + return std::make_unique(device, CreateBilinearSampler(), + HostShaders::PRESENT_BSPLINE_FRAG); +} + std::unique_ptr MakeGaussian(const Device& device) { return std::make_unique(device, CreateBilinearSampler(), HostShaders::PRESENT_GAUSSIAN_FRAG); @@ -60,4 +79,9 @@ std::unique_ptr MakeArea(const Device& device) { HostShaders::PRESENT_AREA_FRAG); } +std::unique_ptr MakeMmpx(const Device& device) { + return std::make_unique(device, CreateNearestNeighborSampler(), + HostShaders::PRESENT_MMPX_FRAG); +} + } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/present/filters.h b/src/video_core/renderer_opengl/present/filters.h index 7b38ac56bc..187d0f1298 100644 --- a/src/video_core/renderer_opengl/present/filters.h +++ b/src/video_core/renderer_opengl/present/filters.h @@ -17,10 +17,14 @@ namespace OpenGL { std::unique_ptr MakeNearestNeighbor(const Device& device); std::unique_ptr MakeBilinear(const Device& device); std::unique_ptr MakeBicubic(const Device& device); +std::unique_ptr MakeZeroTangent(const Device& device); +std::unique_ptr MakeMitchell(const Device& device); +std::unique_ptr MakeBSpline(const Device& device); std::unique_ptr MakeGaussian(const Device& device); std::unique_ptr MakeSpline1(const Device& device); std::unique_ptr MakeLanczos(const Device& device); std::unique_ptr MakeScaleForce(const Device& device); std::unique_ptr MakeArea(const Device& device); +std::unique_ptr MakeMmpx(const Device& device); } // namespace OpenGL diff --git a/src/video_core/renderer_vulkan/present/filters.cpp b/src/video_core/renderer_vulkan/present/filters.cpp index e0f2b26f84..0a28ea6349 100644 --- a/src/video_core/renderer_vulkan/present/filters.cpp +++ b/src/video_core/renderer_vulkan/present/filters.cpp @@ -7,6 +7,8 @@ // SPDX-FileCopyrightText: Copyright 2024 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include +#include "common/assert.h" #include "common/common_types.h" #include "video_core/host_shaders/present_area_frag_spv.h" @@ -14,6 +16,10 @@ #include "video_core/host_shaders/present_gaussian_frag_spv.h" #include "video_core/host_shaders/present_lanczos_frag_spv.h" #include "video_core/host_shaders/present_spline1_frag_spv.h" +#include "video_core/host_shaders/present_mitchell_frag_spv.h" +#include "video_core/host_shaders/present_bspline_frag_spv.h" +#include "video_core/host_shaders/present_zero_tangent_frag_spv.h" +#include "video_core/host_shaders/present_mmpx_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" #include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" #include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" @@ -52,13 +58,28 @@ std::unique_ptr MakeSpline1(const Device& device, VkFormat fram BuildShader(device, PRESENT_SPLINE1_FRAG_SPV)); } -std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format) { +std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format, VkCubicFilterWeightsQCOM qcom_weights) { // No need for handrolled shader -- if the VK impl can do it for us ;) - if (device.IsExtFilterCubicSupported()) - return std::make_unique(device, frame_format, CreateCubicSampler(device), - BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); - return std::make_unique(device, frame_format, CreateBilinearSampler(device), - BuildShader(device, PRESENT_BICUBIC_FRAG_SPV)); + // Catmull-Rom is default bicubic for all implementations... + if (device.IsExtFilterCubicSupported() && (device.IsQcomFilterCubicWeightsSupported() || qcom_weights == VK_CUBIC_FILTER_WEIGHTS_CATMULL_ROM_QCOM)) { + return std::make_unique(device, frame_format, CreateCubicSampler(device, + qcom_weights), BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); + } else { + return std::make_unique(device, frame_format, CreateBilinearSampler(device), [&](){ + switch (qcom_weights) { + case VK_CUBIC_FILTER_WEIGHTS_CATMULL_ROM_QCOM: + return BuildShader(device, PRESENT_BICUBIC_FRAG_SPV); + case VK_CUBIC_FILTER_WEIGHTS_ZERO_TANGENT_CARDINAL_QCOM: + return BuildShader(device, PRESENT_ZERO_TANGENT_FRAG_SPV); + case VK_CUBIC_FILTER_WEIGHTS_B_SPLINE_QCOM: + return BuildShader(device, PRESENT_BSPLINE_FRAG_SPV); + case VK_CUBIC_FILTER_WEIGHTS_MITCHELL_NETRAVALI_QCOM: + return BuildShader(device, PRESENT_MITCHELL_FRAG_SPV); + default: + UNREACHABLE(); + } + }()); + } } std::unique_ptr MakeGaussian(const Device& device, VkFormat frame_format) { @@ -81,4 +102,9 @@ std::unique_ptr MakeArea(const Device& device, VkFormat frame_f BuildShader(device, PRESENT_AREA_FRAG_SPV)); } +std::unique_ptr MakeMmpx(const Device& device, VkFormat frame_format) { + return std::make_unique(device, frame_format, CreateNearestNeighborSampler(device), + BuildShader(device, PRESENT_MMPX_FRAG_SPV)); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/filters.h b/src/video_core/renderer_vulkan/present/filters.h index 015bffc8a5..afc3ba29a0 100644 --- a/src/video_core/renderer_vulkan/present/filters.h +++ b/src/video_core/renderer_vulkan/present/filters.h @@ -17,11 +17,12 @@ class MemoryAllocator; std::unique_ptr MakeNearestNeighbor(const Device& device, VkFormat frame_format); std::unique_ptr MakeBilinear(const Device& device, VkFormat frame_format); -std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format); +std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format, VkCubicFilterWeightsQCOM qcom_weights); std::unique_ptr MakeSpline1(const Device& device, VkFormat frame_format); std::unique_ptr MakeGaussian(const Device& device, VkFormat frame_format); std::unique_ptr MakeLanczos(const Device& device, VkFormat frame_format); std::unique_ptr MakeScaleForce(const Device& device, VkFormat frame_format); std::unique_ptr MakeArea(const Device& device, VkFormat frame_format); +std::unique_ptr MakeMmpx(const Device& device, VkFormat frame_format); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/present/util.cpp b/src/video_core/renderer_vulkan/present/util.cpp index 0b1a89eec0..29a1c34976 100644 --- a/src/video_core/renderer_vulkan/present/util.cpp +++ b/src/video_core/renderer_vulkan/present/util.cpp @@ -624,8 +624,8 @@ vk::Sampler CreateNearestNeighborSampler(const Device& device) { return device.GetLogical().CreateSampler(ci_nn); } -vk::Sampler CreateCubicSampler(const Device& device) { - const VkSamplerCreateInfo ci_nn{ +vk::Sampler CreateCubicSampler(const Device& device, VkCubicFilterWeightsQCOM qcom_weights) { + VkSamplerCreateInfo ci_nn{ .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, .pNext = nullptr, .flags = 0, @@ -645,7 +645,14 @@ vk::Sampler CreateCubicSampler(const Device& device) { .borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK, .unnormalizedCoordinates = VK_FALSE, }; - + const VkSamplerCubicWeightsCreateInfoQCOM ci_qcom_nn{ + .sType = VK_STRUCTURE_TYPE_SAMPLER_CUBIC_WEIGHTS_CREATE_INFO_QCOM, + .pNext = nullptr, + .cubicWeights = qcom_weights + }; + // If not specified, assume Catmull-Rom + if (qcom_weights != VK_CUBIC_FILTER_WEIGHTS_CATMULL_ROM_QCOM) + ci_nn.pNext = &ci_qcom_nn; return device.GetLogical().CreateSampler(ci_nn); } diff --git a/src/video_core/renderer_vulkan/present/util.h b/src/video_core/renderer_vulkan/present/util.h index 11810352df..38cc6203c5 100644 --- a/src/video_core/renderer_vulkan/present/util.h +++ b/src/video_core/renderer_vulkan/present/util.h @@ -57,7 +57,7 @@ VkWriteDescriptorSet CreateWriteDescriptorSet(std::vector VkDescriptorSet set, u32 binding); vk::Sampler CreateBilinearSampler(const Device& device); vk::Sampler CreateNearestNeighborSampler(const Device& device); -vk::Sampler CreateCubicSampler(const Device& device); +vk::Sampler CreateCubicSampler(const Device& device, VkCubicFilterWeightsQCOM qcom_weights); void BeginRenderPass(vk::CommandBuffer& cmdbuf, VkRenderPass render_pass, VkFramebuffer framebuffer, VkExtent2D extent); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index b720bcded3..0f54dd5ade 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -7,6 +7,7 @@ // SPDX-FileCopyrightText: Copyright 2018 yuzu Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later +#include #include "video_core/framebuffer_config.h" #include "video_core/present.h" #include "video_core/renderer_vulkan/present/filters.h" @@ -41,7 +42,16 @@ void BlitScreen::SetWindowAdaptPass() { window_adapt = MakeNearestNeighbor(device, swapchain_view_format); break; case Settings::ScalingFilter::Bicubic: - window_adapt = MakeBicubic(device, swapchain_view_format); + window_adapt = MakeBicubic(device, swapchain_view_format, VK_CUBIC_FILTER_WEIGHTS_CATMULL_ROM_QCOM); + break; + case Settings::ScalingFilter::ZeroTangent: + window_adapt = MakeBicubic(device, swapchain_view_format, VK_CUBIC_FILTER_WEIGHTS_ZERO_TANGENT_CARDINAL_QCOM); + break; + case Settings::ScalingFilter::BSpline: + window_adapt = MakeBicubic(device, swapchain_view_format, VK_CUBIC_FILTER_WEIGHTS_B_SPLINE_QCOM); + break; + case Settings::ScalingFilter::Mitchell: + window_adapt = MakeBicubic(device, swapchain_view_format, VK_CUBIC_FILTER_WEIGHTS_MITCHELL_NETRAVALI_QCOM); break; case Settings::ScalingFilter::Spline1: window_adapt = MakeSpline1(device, swapchain_view_format); @@ -58,6 +68,9 @@ void BlitScreen::SetWindowAdaptPass() { case Settings::ScalingFilter::Area: window_adapt = MakeArea(device, swapchain_view_format); break; + case Settings::ScalingFilter::Mmpx: + window_adapt = MakeMmpx(device, swapchain_view_format); + break; case Settings::ScalingFilter::Fsr: case Settings::ScalingFilter::Bilinear: default: diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index bd54144480..cb13f28523 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -89,7 +89,8 @@ VK_DEFINE_HANDLE(VmaAllocator) EXTENSION(NV, VIEWPORT_ARRAY2, viewport_array2) \ EXTENSION(NV, VIEWPORT_SWIZZLE, viewport_swizzle) \ EXTENSION(EXT, DESCRIPTOR_INDEXING, descriptor_indexing) \ - EXTENSION(EXT, FILTER_CUBIC, filter_cubic) + EXTENSION(EXT, FILTER_CUBIC, filter_cubic) \ + EXTENSION(QCOM, FILTER_CUBIC_WEIGHTS, filter_cubic_weights) // Define extensions which must be supported. #define FOR_EACH_VK_MANDATORY_EXTENSION(EXTENSION_NAME) \ @@ -558,6 +559,11 @@ public: return extensions.filter_cubic; } + /// Returns true if the device supports VK_QCOM_filter_cubic_weights + bool IsQcomFilterCubicWeightsSupported() const { + return extensions.filter_cubic_weights; + } + /// Returns true if the device supports VK_EXT_line_rasterization. bool IsExtLineRasterizationSupported() const { return extensions.line_rasterization; From cf0628af466f1297c87e73b2bfe973a97f12dd04 Mon Sep 17 00:00:00 2001 From: lizzie Date: Tue, 7 Oct 2025 06:38:22 +0200 Subject: [PATCH 4/6] [compat] improve thread naming logic (#271) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/271 Reviewed-by: MaranBr Reviewed-by: CamilleLaVey Co-authored-by: lizzie Co-committed-by: lizzie --- src/common/thread.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/common/thread.cpp b/src/common/thread.cpp index da1fa9b6c5..2de7465a22 100644 --- a/src/common/thread.cpp +++ b/src/common/thread.cpp @@ -1,6 +1,5 @@ // SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project // SPDX-License-Identifier: GPL-3.0-or-later - // SPDX-FileCopyrightText: 2013 Dolphin Emulator Project // SPDX-FileCopyrightText: 2014 Citra Emulator Project // SPDX-License-Identifier: GPL-2.0-or-later @@ -18,9 +17,8 @@ #else #if defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__) #include -#else -#include #endif +#include #include #endif #ifndef _WIN32 @@ -93,33 +91,35 @@ void SetCurrentThreadName(const char* name) { #else // !MSVC_VER, so must be POSIX threads // MinGW with the POSIX threading model does not support pthread_setname_np -#if !defined(_WIN32) || defined(_MSC_VER) void SetCurrentThreadName(const char* name) { + // See for reference + // https://gitlab.freedesktop.org/mesa/mesa/-/blame/main/src/util/u_thread.c?ref_type=heads#L75 #ifdef __APPLE__ pthread_setname_np(name); +#elif defined(__HAIKU__) + rename_thread(find_thread(NULL), name); #elif defined(__Bitrig__) || defined(__DragonFly__) || defined(__FreeBSD__) || defined(__OpenBSD__) pthread_set_name_np(pthread_self(), name); #elif defined(__NetBSD__) pthread_setname_np(pthread_self(), "%s", (void*)name); -#elif defined(__linux__) - // Linux limits thread names to 15 characters and will outright reject any - // attempt to set a longer name with ERANGE. - std::string truncated(name, (std::min)(strlen(name), static_cast(15))); - if (int e = pthread_setname_np(pthread_self(), truncated.c_str())) { - errno = e; - LOG_ERROR(Common, "Failed to set thread name to '{}': {}", truncated, GetLastErrorMsg()); +#elif defined(__linux__) || defined(__CYGWIN__) || defined(__sun__) || defined(__glibc__) || defined(__managarm__) + int ret = pthread_setname_np(pthread_self(), name); + if (ret == ERANGE) { + // Linux limits thread names to 15 characters and will outright reject any + // attempt to set a longer name with ERANGE. + char buf[16]; + size_t const len = std::min(std::strlen(name), sizeof(buf) - 1); + std::memcpy(buf, name, len); + buf[len] = '\0'; + pthread_setname_np(pthread_self(), buf); } +#elif !defined(_WIN32) || defined(_MSC_VER) + // mingw stub + (void)name; #else pthread_setname_np(pthread_self(), name); #endif } -#endif - -#if defined(_WIN32) -void SetCurrentThreadName(const char* name) { - // Do Nothing on MingW -} -#endif #endif From 8d8a68dc6d41af6067d9f4c786d8bc4355daa24a Mon Sep 17 00:00:00 2001 From: lizzie Date: Wed, 1 Oct 2025 08:04:34 +0000 Subject: [PATCH 5/6] [core] use memcpy instead of hand rolling aligned cases Hand rolling memcpy like this is always frowned upon because the compiler has more insight on whats going on (plus the code resolves to a worse version of itself on assembly). This removes some branches that are just straight up redundant. May save stuff especially for systems without fastmem enabled. Signed-off-by: lizzie --- src/core/memory.cpp | 203 ++++++++------------------------------------ 1 file changed, 36 insertions(+), 167 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 2583aae867..f84507d125 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "common/assert.h" @@ -681,22 +682,17 @@ struct Memory::Impl { } } - [[nodiscard]] u8* GetPointerImpl(u64 vaddr, auto on_unmapped, auto on_rasterizer) const { + template + [[nodiscard]] u8* GetPointerImpl(u64 vaddr, F&& on_unmapped, G&& on_rasterizer) const { // AARCH64 masks the upper 16 bit of all memory accesses - vaddr = vaddr & 0xffffffffffffULL; - if (!AddressSpaceContains(*current_page_table, vaddr, 1)) [[unlikely]] { - on_unmapped(); - return nullptr; - } else { + vaddr &= 0xffffffffffffULL; + if (AddressSpaceContains(*current_page_table, vaddr, 1)) [[likely]] { // Avoid adding any extra logic to this fast-path block const uintptr_t raw_pointer = current_page_table->pointers[vaddr >> YUZU_PAGEBITS].Raw(); - if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) { + if (const uintptr_t pointer = Common::PageTable::PageInfo::ExtractPointer(raw_pointer)) [[likely]] { return reinterpret_cast(pointer + vaddr); } else { switch (Common::PageTable::PageInfo::ExtractType(raw_pointer)) { - case Common::PageType::Unmapped: - on_unmapped(); - return nullptr; case Common::PageType::Memory: ASSERT_MSG(false, "Mapped memory page without a pointer @ 0x{:016X}", vaddr); return nullptr; @@ -707,11 +703,18 @@ struct Memory::Impl { on_rasterizer(); return host_ptr; } + case Common::PageType::Unmapped: [[unlikely]] { + on_unmapped(); + return nullptr; + } default: UNREACHABLE(); } return nullptr; } + } else { + on_unmapped(); + return nullptr; } } @@ -729,172 +732,38 @@ struct Memory::Impl { GetInteger(vaddr), []() {}, []() {}); } - /** - * Reads a particular data type out of memory at the given virtual address. - * - * @param vaddr The virtual address to read the data type from. - * - * @tparam T The data type to read out of memory. This type *must* be - * trivially copyable, otherwise the behavior of this function - * is undefined. - * - * @returns The instance of T read from the specified virtual address. - */ + /// @brief Reads a particular data type out of memory at the given virtual address. + /// @param vaddr The virtual address to read the data type from. + /// @tparam T The data type to read out of memory. + /// @returns The instance of T read from the specified virtual address. template - T Read(Common::ProcessAddress vaddr) { - // Fast path for aligned reads of common sizes + inline T Read(Common::ProcessAddress vaddr) requires(std::is_trivially_copyable_v) noexcept { const u64 addr = GetInteger(vaddr); - if constexpr (std::is_same_v || std::is_same_v) { - // 8-bit reads are always aligned - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read8 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*ptr); - } - return 0; - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 16-bit reads - if ((addr & 1) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read16 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 32-bit reads - if ((addr & 3) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read32 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 64-bit reads - if ((addr & 7) == 0) { - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read64 @ 0x{:016X}", addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { - return static_cast(*reinterpret_cast(ptr)); - } - } - } - - // Fall back to the general case for other types or unaligned access - T result = 0; - const u8* const ptr = GetPointerImpl( - addr, - [addr]() { - LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); - }, - [&]() { HandleRasterizerDownload(addr, sizeof(T)); }); - if (ptr) { + if (auto const ptr = GetPointerImpl(addr, [addr]() { + LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); + }, [&]() { + HandleRasterizerDownload(addr, sizeof(T)); + }); ptr) [[likely]] { + // It may be tempting to rewrite this particular section to use "reinterpret_cast"; + // afterall, it's trivially copyable so surely it can be copied ov- Alignment. + // Remember, alignment. memcpy() will deal with all the alignment extremely fast. + T result{}; std::memcpy(&result, ptr, sizeof(T)); + return result; } - return result; + return T{}; } - /** - * Writes a particular data type to memory at the given virtual address. - * - * @param vaddr The virtual address to write the data type to. - * - * @tparam T The data type to write to memory. This type *must* be - * trivially copyable, otherwise the behavior of this function - * is undefined. - */ + /// @brief Writes a particular data type to memory at the given virtual address. + /// @param vaddr The virtual address to write the data type to. + /// @tparam T The data type to write to memory. template - void Write(Common::ProcessAddress vaddr, const T data) { - // Fast path for aligned writes of common sizes + inline void Write(Common::ProcessAddress vaddr, const T data) requires(std::is_trivially_copyable_v) noexcept { const u64 addr = GetInteger(vaddr); - if constexpr (std::is_same_v || std::is_same_v) { - // 8-bit writes are always aligned - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write8 @ 0x{:016X} = 0x{:02X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *ptr = static_cast(data); - } - return; - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 16-bit writes - if ((addr & 1) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write16 @ 0x{:016X} = 0x{:04X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 32-bit writes - if ((addr & 3) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write32 @ 0x{:016X} = 0x{:08X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } else if constexpr (std::is_same_v || std::is_same_v) { - // Check alignment for 64-bit writes - if ((addr & 7) == 0) { - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write64 @ 0x{:016X} = 0x{:016X}", addr, - static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { - *reinterpret_cast(ptr) = static_cast(data); - return; - } - } - } - - // Fall back to the general case for other types or unaligned access - u8* const ptr = GetPointerImpl( - addr, - [addr, data]() { - LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, - addr, static_cast(data)); - }, - [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); - if (ptr) { + if (auto const ptr = GetPointerImpl(addr, [addr, data]() { + LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, addr, u64(data)); + }, [&]() { HandleRasterizerWrite(addr, sizeof(T)); }); ptr) [[likely]] std::memcpy(ptr, &data, sizeof(T)); - } } template From acc93cb564a29dd02fdc7293ce441e6d1bf70043 Mon Sep 17 00:00:00 2001 From: lizzie Date: Thu, 2 Oct 2025 05:19:45 +0000 Subject: [PATCH 6/6] fix Signed-off-by: lizzie --- src/core/memory.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index f84507d125..e1aa3473fc 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -737,7 +737,7 @@ struct Memory::Impl { /// @tparam T The data type to read out of memory. /// @returns The instance of T read from the specified virtual address. template - inline T Read(Common::ProcessAddress vaddr) requires(std::is_trivially_copyable_v) noexcept { + inline T Read(Common::ProcessAddress vaddr) noexcept requires(std::is_trivially_copyable_v) { const u64 addr = GetInteger(vaddr); if (auto const ptr = GetPointerImpl(addr, [addr]() { LOG_ERROR(HW_Memory, "Unmapped Read{} @ 0x{:016X}", sizeof(T) * 8, addr); @@ -758,7 +758,7 @@ struct Memory::Impl { /// @param vaddr The virtual address to write the data type to. /// @tparam T The data type to write to memory. template - inline void Write(Common::ProcessAddress vaddr, const T data) requires(std::is_trivially_copyable_v) noexcept { + inline void Write(Common::ProcessAddress vaddr, const T data) noexcept requires(std::is_trivially_copyable_v) { const u64 addr = GetInteger(vaddr); if (auto const ptr = GetPointerImpl(addr, [addr, data]() { LOG_ERROR(HW_Memory, "Unmapped Write{} @ 0x{:016X} = 0x{:016X}", sizeof(T) * 8, addr, u64(data));