From 035f9d80b7fe13c853f6714a3fb18d70d6fd2bab Mon Sep 17 00:00:00 2001 From: SDK Chan Date: Sat, 20 Sep 2025 21:47:07 +0000 Subject: [PATCH 1/5] [shader_recompiler] Rewrite ISBERD --- .../frontend/maxwell/translate/impl/impl.cpp | 36 ----- .../frontend/maxwell/translate/impl/impl.h | 30 ----- .../impl/internal_stage_buffer_entry_read.cpp | 125 +++++++++++++----- 3 files changed, 91 insertions(+), 100 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 157e5dfaaf..8b5a103006 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -271,40 +271,4 @@ void TranslatorVisitor::ResetOFlag() { SetOFlag(ir.Imm1(false)); } -IR::U32 TranslatorVisitor::apply_ISBERD_shift(IR::U32 result, Isberd::Shift shift_value) { - if (shift_value != Isberd::Shift::Default) { - return ir.ShiftLeftLogical(result, ir.Imm32(1)); - } - return result; -} - -IR::U32 TranslatorVisitor::apply_ISBERD_size_read(IR::U32 address, Isberd::SZ sz) { - switch (sz) { - case Isberd::SZ::U8: - return ir.LoadGlobalU8(ir.UConvert(64, address)); - case Isberd::SZ::U16: - return ir.LoadGlobalU16(ir.UConvert(64, address)); - case Isberd::SZ::U32: - case Isberd::SZ::F32: - return ir.LoadGlobal32(ir.UConvert(64, address)); - default: - UNREACHABLE(); - } -} - -IR::U32 TranslatorVisitor::compute_ISBERD_address(IR::Reg src_reg, u32 src_reg_num, u32 imm, u64 skew_value) { - IR::U32 address{}; - if (src_reg_num == 0xFF) { - address = ir.Imm32(imm); - } else { - auto offset = ir.Imm32(imm); - address = ir.IAdd(X(src_reg), offset); - if (skew_value != 0) { - address = ir.IAdd(address, ir.LaneId()); - } - } - - return address; -}; - } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 1b2547a1bd..37963dc777 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -56,30 +56,6 @@ enum class FPCompareOp : u64 { T, }; -namespace Isberd { -enum class Mode : u64 { - Default, - Patch, - Prim, - Attr, -}; - -enum class Shift : u64 { - Default, - U16, - B32, -}; - -enum class SZ : u64 { - U8, - U16, - U32, - F32, -}; - -} // namespace Isberd - - class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} @@ -408,12 +384,6 @@ public: void ResetSFlag(); void ResetCFlag(); void ResetOFlag(); - -private: - // Helper functions for various translator visitors - IR::U32 apply_ISBERD_shift(IR::U32 result, Isberd::Shift shift_value); - IR::U32 apply_ISBERD_size_read(IR::U32 address, Isberd::SZ sz_value); - IR::U32 compute_ISBERD_address(IR::Reg src_reg, u32 src_reg_num, u32 imm, u64 skew_value); }; } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index 2aaf85772d..8dc22282ca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -9,9 +9,52 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + Patch, + Prim, + Attr, +}; + +enum class SZ : u64 { + U8, + U16, + U32, + F32 +}; + +enum class Shift : u64 { + Default, + U16, + B32, +}; + +IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, Shift shift) { + switch (shift) { + case Shift::Default: return index; + case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1)); + case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2)); + default: UNREACHABLE(); + } +} + +IR::U32 skewBytes(IR::IREmitter& ir, SZ sizeRead) { + const IR::U32 lane = ir.LaneId(); + switch (sizeRead) { + case SZ::U8: return lane; + case SZ::U16: return ir.ShiftLeftLogical(lane, ir.Imm32(1)); + case SZ::U32: + case SZ::F32: return ir.ShiftLeftLogical(lane, ir.Imm32(2)); + default: UNREACHABLE(); + } +} + +} // Anonymous namespace -// Valid only for GS, TI, VS and trap void TranslatorVisitor::ISBERD(u64 insn) { + LOG_DEBUG(Shader, "called with insn={:#X}", insn); + union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; @@ -20,49 +63,63 @@ void TranslatorVisitor::ISBERD(u64 insn) { BitField<24, 8, u32> imm; BitField<31, 1, u64> skew; BitField<32, 1, u64> o; - BitField<33, 2, Isberd::Mode> mode; - BitField<36, 4, Isberd::SZ> sz; - BitField<47, 2, Isberd::Shift> shift; + BitField<33, 2, Mode> mode; + BitField<36, 4, SZ> sz; + BitField<47, 2, Shift> shift; } const isberd{insn}; - auto address = compute_ISBERD_address(isberd.src_reg, isberd.src_reg_num, isberd.imm, isberd.skew); - if (isberd.o != 0) { - auto result = apply_ISBERD_size_read(address, isberd.sz.Value()); - X(isberd.dest_reg, apply_ISBERD_shift(result, isberd.shift.Value())); - - return; + IR::U32 index{}; + if (isberd.src_reg_num.Value() == 0xFF) { + index = ir.Imm32(isberd.imm.Value()); + } else { + const IR::U32 scaledIndex = scaleIndex(ir, X(isberd.src_reg.Value()), isberd.shift.Value()); + index = ir.IAdd(scaledIndex, ir.Imm32(isberd.imm.Value())); } - if (isberd.mode != Isberd::Mode::Default) { - IR::F32 result_f32{}; - switch (isberd.mode.Value()) { - case Isberd::Mode::Patch: - result_f32 = ir.GetPatch(address.Patch()); - break; - case Isberd::Mode::Prim: - result_f32 = ir.GetAttribute(address.Attribute()); - break; - case Isberd::Mode::Attr: - result_f32 = ir.GetAttributeIndexed(address); - break; - default: - UNREACHABLE(); + if (isberd.o.Value()) { + if (isberd.skew.Value()) { + index = ir.IAdd(index, skewBytes(ir, isberd.sz.Value())); } - auto result_u32 = ir.BitCast(result_f32); - X(isberd.dest_reg, apply_ISBERD_shift(result_u32, isberd.shift.Value())); - return; - } - - if (isberd.skew != 0) { - auto result = ir.IAdd(X(isberd.src_reg), ir.LaneId()); - X(isberd.dest_reg, result); + const IR::U64 index64 = ir.UConvert(64, index); + IR::U32 globalLoaded{}; + switch (isberd.sz.Value()) { + case SZ::U8: globalLoaded = ir.LoadGlobalU8 (index64); break; + case SZ::U16: globalLoaded = ir.LoadGlobalU16(index64); break; + case SZ::U32: + case SZ::F32: globalLoaded = ir.LoadGlobal32(index64); break; + default: UNREACHABLE(); + } + X(isberd.dest_reg.Value(), globalLoaded); return; } - // Fallback if nothing else applies - X(isberd.dest_reg, X(isberd.src_reg)); + if (isberd.mode.Value() != Mode::Default) { + if (isberd.skew.Value()) { + index = ir.IAdd(index, skewBytes(ir, SZ::U32)); + } + + IR::F32 float_index{}; + switch (isberd.mode.Value()) { + case Mode::Patch: float_index = ir.GetPatch(index.Patch()); break; + case Mode::Prim: float_index = ir.GetAttribute(index.Attribute()); break; + case Mode::Attr: float_index = ir.GetAttributeIndexed(index); break; + default: UNREACHABLE(); + } + X(isberd.dest_reg.Value(), ir.BitCast(float_index)); + + return; + } + + if (isberd.skew.Value()) { + X(isberd.dest_reg.Value(), ir.IAdd(X(isberd.src_reg.Value()), ir.LaneId())); + + return; + } + + // Fallback copy + X(isberd.dest_reg.Value(), X(isberd.src_reg.Value())); } } // namespace Shader::Maxwell From 2988972107626f83240046dfe03e0b55c3229875 Mon Sep 17 00:00:00 2001 From: SDK Chan Date: Sat, 20 Sep 2025 21:50:48 +0000 Subject: [PATCH 2/5] [shader_recompiler] Refactor some whitespaces --- .../impl/internal_stage_buffer_entry_read.cpp | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index 8dc22282ca..e6345de65e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -32,21 +32,21 @@ enum class Shift : u64 { IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, Shift shift) { switch (shift) { - case Shift::Default: return index; - case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1)); - case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2)); - default: UNREACHABLE(); + case Shift::Default: return index; + case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1)); + case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2)); + default: UNREACHABLE(); } } IR::U32 skewBytes(IR::IREmitter& ir, SZ sizeRead) { const IR::U32 lane = ir.LaneId(); switch (sizeRead) { - case SZ::U8: return lane; - case SZ::U16: return ir.ShiftLeftLogical(lane, ir.Imm32(1)); - case SZ::U32: - case SZ::F32: return ir.ShiftLeftLogical(lane, ir.Imm32(2)); - default: UNREACHABLE(); + case SZ::U8: return lane; + case SZ::U16: return ir.ShiftLeftLogical(lane, ir.Imm32(1)); + case SZ::U32: + case SZ::F32: return ir.ShiftLeftLogical(lane, ir.Imm32(2)); + default: UNREACHABLE(); } } @@ -88,7 +88,7 @@ void TranslatorVisitor::ISBERD(u64 insn) { case SZ::U16: globalLoaded = ir.LoadGlobalU16(index64); break; case SZ::U32: case SZ::F32: globalLoaded = ir.LoadGlobal32(index64); break; - default: UNREACHABLE(); + default: UNREACHABLE(); } X(isberd.dest_reg.Value(), globalLoaded); @@ -102,10 +102,13 @@ void TranslatorVisitor::ISBERD(u64 insn) { IR::F32 float_index{}; switch (isberd.mode.Value()) { - case Mode::Patch: float_index = ir.GetPatch(index.Patch()); break; - case Mode::Prim: float_index = ir.GetAttribute(index.Attribute()); break; - case Mode::Attr: float_index = ir.GetAttributeIndexed(index); break; - default: UNREACHABLE(); + case Mode::Patch: float_index = ir.GetPatch(index.Patch()); + break; + case Mode::Prim: float_index = ir.GetAttribute(index.Attribute()); + break; + case Mode::Attr: float_index = ir.GetAttributeIndexed(index); + break; + default: UNREACHABLE(); } X(isberd.dest_reg.Value(), ir.BitCast(float_index)); From f33a771d587bfb3d7538f719213bb8f8955bed82 Mon Sep 17 00:00:00 2001 From: lizzie Date: Mon, 22 Sep 2025 17:34:55 +0200 Subject: [PATCH 3/5] [vk, opengl] add lanczo and spline-1 filtering (#2534) Signed-off-by: lizzie Reviewed-on: https://git.eden-emu.dev/eden-emu/eden/pulls/2534 Reviewed-by: MaranBr Reviewed-by: Shinmegumi Reviewed-by: Maufeat Co-authored-by: lizzie Co-committed-by: lizzie --- src/common/settings_enums.h | 2 +- src/qt_common/shared_translation.cpp | 2 + src/qt_common/shared_translation.h | 4 ++ src/video_core/host_shaders/CMakeLists.txt | 6 ++- .../host_shaders/present_lanczos.frag | 40 ++++++++++++++++ .../host_shaders/present_spline1.frag | 24 ++++++++++ .../renderer_opengl/gl_blit_screen.cpp | 6 +++ .../renderer_opengl/present/filters.cpp | 11 +++++ .../renderer_opengl/present/filters.h | 2 + .../renderer_vulkan/present/filters.cpp | 11 +++++ .../renderer_vulkan/present/filters.h | 2 + .../renderer_vulkan/vk_blit_screen.cpp | 6 +++ tools/lanczos_gen.c | 48 +++++++++++++++++++ 13 files changed, 161 insertions(+), 3 deletions(-) create mode 100644 src/video_core/host_shaders/present_lanczos.frag create mode 100644 src/video_core/host_shaders/present_spline1.frag create mode 100644 tools/lanczos_gen.c diff --git a/src/common/settings_enums.h b/src/common/settings_enums.h index 41133a7819..ebfa4ceb9e 100644 --- a/src/common/settings_enums.h +++ b/src/common/settings_enums.h @@ -166,7 +166,7 @@ ENUM(ResolutionSetup, Res7X, Res8X); -ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Gaussian, ScaleForce, Fsr, Area, MaxEnum); +ENUM(ScalingFilter, NearestNeighbor, Bilinear, Bicubic, Spline1, Gaussian, Lanczos, ScaleForce, Fsr, Area, MaxEnum); ENUM(AntiAliasing, None, Fxaa, Smaa, MaxEnum); diff --git a/src/qt_common/shared_translation.cpp b/src/qt_common/shared_translation.cpp index eb413f28e9..4254253c2f 100644 --- a/src/qt_common/shared_translation.cpp +++ b/src/qt_common/shared_translation.cpp @@ -572,7 +572,9 @@ std::unique_ptr ComboboxEnumeration(QObject* parent) PAIR(ScalingFilter, NearestNeighbor, tr("Nearest Neighbor")), PAIR(ScalingFilter, Bilinear, tr("Bilinear")), PAIR(ScalingFilter, Bicubic, tr("Bicubic")), + PAIR(ScalingFilter, Spline1, tr("Spline-1")), PAIR(ScalingFilter, Gaussian, tr("Gaussian")), + PAIR(ScalingFilter, Lanczos, tr("Lanczos")), PAIR(ScalingFilter, ScaleForce, tr("ScaleForce")), PAIR(ScalingFilter, Fsr, tr("AMD FidelityFX™️ Super Resolution")), PAIR(ScalingFilter, Area, tr("Area")), diff --git a/src/qt_common/shared_translation.h b/src/qt_common/shared_translation.h index 48a2cb5205..c9216c2daa 100644 --- a/src/qt_common/shared_translation.h +++ b/src/qt_common/shared_translation.h @@ -38,8 +38,12 @@ static const std::map scaling_filter_texts_map {Settings::ScalingFilter::Bilinear, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Bilinear"))}, {Settings::ScalingFilter::Bicubic, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Bicubic"))}, + {Settings::ScalingFilter::Spline1, + QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Spline-1"))}, {Settings::ScalingFilter::Gaussian, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Gaussian"))}, + {Settings::ScalingFilter::Lanczos, + QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "Lanczos"))}, {Settings::ScalingFilter::ScaleForce, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "ScaleForce"))}, {Settings::ScalingFilter::Fsr, QStringLiteral(QT_TRANSLATE_NOOP("GMainWindow", "FSR"))}, diff --git a/src/video_core/host_shaders/CMakeLists.txt b/src/video_core/host_shaders/CMakeLists.txt index 688e10d2e4..c14b44a45a 100644 --- a/src/video_core/host_shaders/CMakeLists.txt +++ b/src/video_core/host_shaders/CMakeLists.txt @@ -1,5 +1,5 @@ -# SPDX-FileCopyrightText: 2018 yuzu Emulator Project -# SPDX-License-Identifier: GPL-2.0-or-later +# SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +# SPDX-License-Identifier: GPL-3.0-or-later set(FIDELITYFX_INCLUDE_DIR ${CMAKE_SOURCE_DIR}/externals/FidelityFX-FSR/ffx-fsr) @@ -45,6 +45,8 @@ set(SHADER_FILES present_area.frag present_bicubic.frag present_gaussian.frag + present_lanczos.frag + present_spline1.frag queries_prefix_scan_sum.comp queries_prefix_scan_sum_nosubgroups.comp resolve_conditional_render.comp diff --git a/src/video_core/host_shaders/present_lanczos.frag b/src/video_core/host_shaders/present_lanczos.frag new file mode 100644 index 0000000000..b69b329c1b --- /dev/null +++ b/src/video_core/host_shaders/present_lanczos.frag @@ -0,0 +1,40 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// https://en.wikipedia.org/wiki/Lanczos_resampling + +#version 460 core + +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; +layout (binding = 0) uniform sampler2D color_texture; + +#define PI 3.1415926535897932384626433 +float sinc(float x) { + return x == 0.0f ? 1.0f : sin(PI * x) / (PI * x); +} +float lanczos(vec2 v, float a) { + float d = length(v); + return sinc(d) / sinc(d / a); +} +vec4 textureLanczos(sampler2D textureSampler, vec2 p) { + vec3 c_sum = vec3(0.0f); + float w_sum = 0.0f; + vec2 res = vec2(textureSize(textureSampler, 0)); + vec2 cc = floor(p * res) / res; + // kernel size = (2r + 1)^2 + const int r = 3; //radius (1 = 3 steps) + for (int x = -r; x <= r; x++) + for (int y = -r; y <= r; y++) { + vec2 kp = 0.5f * (vec2(x, y) / res); // 0.5 = half-pixel level resampling + vec2 uv = cc + kp; + float w = lanczos(kp, float(r)); + c_sum += w * texture(textureSampler, p + kp).rgb; + w_sum += w; + } + return vec4(c_sum / w_sum, 1.0f); +} + +void main() { + color = textureLanczos(color_texture, frag_tex_coord); +} diff --git a/src/video_core/host_shaders/present_spline1.frag b/src/video_core/host_shaders/present_spline1.frag new file mode 100644 index 0000000000..871b47586b --- /dev/null +++ b/src/video_core/host_shaders/present_spline1.frag @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// Spline (smooth linear inerpolation) with 1 texel fetch (needs bilinear to work) +// Emulates bicubic without actually doing bicubic +// See https://iquilezles.org/articles/texture, unfortunely there are issues with the original +// where smoothstep "expansion" actually results in worse codegen (SPIRV-Opt does a direct conv to smoothstep) +// TODO: Numerical analysis - fract is sawtooth func and floor, reuse params? Perhaps - no need for precision + +#version 460 core + +layout (location = 0) in vec2 frag_tex_coord; +layout (location = 0) out vec4 color; +layout (binding = 0) uniform sampler2D color_texture; + +vec4 textureSpline1(sampler2D sam, vec2 uv) { + float r = float(textureSize(sam, 0).x); + vec2 x = fract(uv * r + 0.5); + return texture(sam, (floor(uv * r + 0.5) + smoothstep(0.0, 1.0, x) - 0.5) / r); +} + +void main() { + color = textureSpline1(color_texture, frag_tex_coord); +} diff --git a/src/video_core/renderer_opengl/gl_blit_screen.cpp b/src/video_core/renderer_opengl/gl_blit_screen.cpp index 2071fe8d15..65670fcad8 100644 --- a/src/video_core/renderer_opengl/gl_blit_screen.cpp +++ b/src/video_core/renderer_opengl/gl_blit_screen.cpp @@ -89,6 +89,12 @@ void BlitScreen::CreateWindowAdapt() { case Settings::ScalingFilter::Gaussian: window_adapt = MakeGaussian(device); break; + case Settings::ScalingFilter::Spline1: + window_adapt = MakeSpline1(device); + break; + case Settings::ScalingFilter::Lanczos: + window_adapt = MakeLanczos(device); + break; case Settings::ScalingFilter::ScaleForce: window_adapt = MakeScaleForce(device); break; diff --git a/src/video_core/renderer_opengl/present/filters.cpp b/src/video_core/renderer_opengl/present/filters.cpp index c5ac8e7823..a840de304e 100644 --- a/src/video_core/renderer_opengl/present/filters.cpp +++ b/src/video_core/renderer_opengl/present/filters.cpp @@ -12,6 +12,7 @@ #include "video_core/host_shaders/present_area_frag.h" #include "video_core/host_shaders/present_bicubic_frag.h" #include "video_core/host_shaders/present_gaussian_frag.h" +#include "video_core/host_shaders/present_lanczos_frag.h" #include "video_core/renderer_opengl/present/filters.h" #include "video_core/renderer_opengl/present/util.h" @@ -27,6 +28,11 @@ std::unique_ptr MakeBilinear(const Device& device) { HostShaders::OPENGL_PRESENT_FRAG); } +std::unique_ptr MakeSpline1(const Device& device) { + return std::make_unique(device, CreateBilinearSampler(), + HostShaders::PRESENT_SPLINE1_FRAG); +} + std::unique_ptr MakeBicubic(const Device& device) { return std::make_unique(device, CreateBilinearSampler(), HostShaders::PRESENT_BICUBIC_FRAG); @@ -37,6 +43,11 @@ std::unique_ptr MakeGaussian(const Device& device) { HostShaders::PRESENT_GAUSSIAN_FRAG); } +std::unique_ptr MakeLanczos(const Device& device) { + return std::make_unique(device, CreateBilinearSampler(), + HostShaders::PRESENT_LANCZOS_FRAG); +} + std::unique_ptr MakeScaleForce(const Device& device) { return std::make_unique( device, CreateBilinearSampler(), diff --git a/src/video_core/renderer_opengl/present/filters.h b/src/video_core/renderer_opengl/present/filters.h index be2ce24842..7b38ac56bc 100644 --- a/src/video_core/renderer_opengl/present/filters.h +++ b/src/video_core/renderer_opengl/present/filters.h @@ -18,6 +18,8 @@ std::unique_ptr MakeNearestNeighbor(const Device& device); std::unique_ptr MakeBilinear(const Device& device); std::unique_ptr MakeBicubic(const Device& device); std::unique_ptr MakeGaussian(const Device& device); +std::unique_ptr MakeSpline1(const Device& device); +std::unique_ptr MakeLanczos(const Device& device); std::unique_ptr MakeScaleForce(const Device& device); std::unique_ptr MakeArea(const Device& device); diff --git a/src/video_core/renderer_vulkan/present/filters.cpp b/src/video_core/renderer_vulkan/present/filters.cpp index 7843f38d2c..6622b8daea 100644 --- a/src/video_core/renderer_vulkan/present/filters.cpp +++ b/src/video_core/renderer_vulkan/present/filters.cpp @@ -12,6 +12,7 @@ #include "video_core/host_shaders/present_area_frag_spv.h" #include "video_core/host_shaders/present_bicubic_frag_spv.h" #include "video_core/host_shaders/present_gaussian_frag_spv.h" +#include "video_core/host_shaders/present_lanczos_frag_spv.h" #include "video_core/host_shaders/vulkan_present_frag_spv.h" #include "video_core/host_shaders/vulkan_present_scaleforce_fp16_frag_spv.h" #include "video_core/host_shaders/vulkan_present_scaleforce_fp32_frag_spv.h" @@ -45,6 +46,11 @@ std::unique_ptr MakeBilinear(const Device& device, VkFormat fra BuildShader(device, VULKAN_PRESENT_FRAG_SPV)); } +std::unique_ptr MakeSpline1(const Device& device, VkFormat frame_format) { + return std::make_unique(device, frame_format, CreateBilinearSampler(device), + BuildShader(device, PRESENT_SPLINE1_FRAG_SPV)); +} + std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format) { // No need for handrolled shader -- if the VK impl can do it for us ;) if (device.IsExtFilterCubicSupported()) @@ -59,6 +65,11 @@ std::unique_ptr MakeGaussian(const Device& device, VkFormat fra BuildShader(device, PRESENT_GAUSSIAN_FRAG_SPV)); } +std::unique_ptr MakeLanczos(const Device& device, VkFormat frame_format) { + return std::make_unique(device, frame_format, CreateBilinearSampler(device), + BuildShader(device, PRESENT_LANCZOS_FRAG_SPV)); +} + std::unique_ptr MakeScaleForce(const Device& device, VkFormat frame_format) { return std::make_unique(device, frame_format, CreateBilinearSampler(device), SelectScaleForceShader(device)); diff --git a/src/video_core/renderer_vulkan/present/filters.h b/src/video_core/renderer_vulkan/present/filters.h index c8259487f8..015bffc8a5 100644 --- a/src/video_core/renderer_vulkan/present/filters.h +++ b/src/video_core/renderer_vulkan/present/filters.h @@ -18,7 +18,9 @@ class MemoryAllocator; std::unique_ptr MakeNearestNeighbor(const Device& device, VkFormat frame_format); std::unique_ptr MakeBilinear(const Device& device, VkFormat frame_format); std::unique_ptr MakeBicubic(const Device& device, VkFormat frame_format); +std::unique_ptr MakeSpline1(const Device& device, VkFormat frame_format); std::unique_ptr MakeGaussian(const Device& device, VkFormat frame_format); +std::unique_ptr MakeLanczos(const Device& device, VkFormat frame_format); std::unique_ptr MakeScaleForce(const Device& device, VkFormat frame_format); std::unique_ptr MakeArea(const Device& device, VkFormat frame_format); diff --git a/src/video_core/renderer_vulkan/vk_blit_screen.cpp b/src/video_core/renderer_vulkan/vk_blit_screen.cpp index 39f07b966d..b720bcded3 100644 --- a/src/video_core/renderer_vulkan/vk_blit_screen.cpp +++ b/src/video_core/renderer_vulkan/vk_blit_screen.cpp @@ -43,9 +43,15 @@ void BlitScreen::SetWindowAdaptPass() { case Settings::ScalingFilter::Bicubic: window_adapt = MakeBicubic(device, swapchain_view_format); break; + case Settings::ScalingFilter::Spline1: + window_adapt = MakeSpline1(device, swapchain_view_format); + break; case Settings::ScalingFilter::Gaussian: window_adapt = MakeGaussian(device, swapchain_view_format); break; + case Settings::ScalingFilter::Lanczos: + window_adapt = MakeLanczos(device, swapchain_view_format); + break; case Settings::ScalingFilter::ScaleForce: window_adapt = MakeScaleForce(device, swapchain_view_format); break; diff --git a/tools/lanczos_gen.c b/tools/lanczos_gen.c new file mode 100644 index 0000000000..6d7be3cb0e --- /dev/null +++ b/tools/lanczos_gen.c @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// clang -lm tools/lanczos_gen.c -o tools/lanczos_gen && ./tools/lanczos_gen +#include +#include + +double sinc(double x) { + return x == 0.0f ? 1.0f : sin(M_PI * x) / (M_PI * x); +} + +typedef struct vec2 { + double x; + double y; +} vec2; + +double lanczos(vec2 v, float a) { + double d = sqrt(v.x * v.x + v.y * v.y); + return sinc(d) / sinc(d / a); +} + +int main(int argc, char* argv[]) { + const int r = 3; //radius (1 = 3 steps) + const int k_size = (r * 2 + 1) * (r * 2 + 1); + double w_sum = 0.0f; + // kernel size = (r * 2 + 1) ^ 2 + printf("const float w_kernel[%i] = float[] (\n ", k_size); + double factor = 1.0f / ((double)r + 1.0f); + for (int x = -r; x <= r; x++) + for (int y = -r; y <= r; y++) { + double w = lanczos((vec2){ .x = x, .y = y }, (double)r); + printf("%lff, ", w); + w_sum += w; + } + printf("\n);\n"); + printf("const vec2 w_pos[%i] = vec2[] (\n ", k_size); + for (int x = -r; x <= r; x++) + for (int y = -r; y <= r; y++) { + vec2 kp = (vec2){ + .x = x * factor, + .y = y * factor + }; + printf("vec2(%lff, %lff), ", kp.x, kp.y); + } + printf("\n);\n"); + printf("const float w_sum = %lff;\n", w_sum); + return 0; +} From efe9c309976776f666b169fa3cfee913dd8f947f Mon Sep 17 00:00:00 2001 From: SDK Chan Date: Sat, 20 Sep 2025 21:47:07 +0000 Subject: [PATCH 4/5] [shader_recompiler] Rewrite ISBERD --- .../frontend/maxwell/translate/impl/impl.cpp | 36 ----- .../frontend/maxwell/translate/impl/impl.h | 30 ----- .../impl/internal_stage_buffer_entry_read.cpp | 125 +++++++++++++----- 3 files changed, 91 insertions(+), 100 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp index 157e5dfaaf..8b5a103006 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.cpp @@ -271,40 +271,4 @@ void TranslatorVisitor::ResetOFlag() { SetOFlag(ir.Imm1(false)); } -IR::U32 TranslatorVisitor::apply_ISBERD_shift(IR::U32 result, Isberd::Shift shift_value) { - if (shift_value != Isberd::Shift::Default) { - return ir.ShiftLeftLogical(result, ir.Imm32(1)); - } - return result; -} - -IR::U32 TranslatorVisitor::apply_ISBERD_size_read(IR::U32 address, Isberd::SZ sz) { - switch (sz) { - case Isberd::SZ::U8: - return ir.LoadGlobalU8(ir.UConvert(64, address)); - case Isberd::SZ::U16: - return ir.LoadGlobalU16(ir.UConvert(64, address)); - case Isberd::SZ::U32: - case Isberd::SZ::F32: - return ir.LoadGlobal32(ir.UConvert(64, address)); - default: - UNREACHABLE(); - } -} - -IR::U32 TranslatorVisitor::compute_ISBERD_address(IR::Reg src_reg, u32 src_reg_num, u32 imm, u64 skew_value) { - IR::U32 address{}; - if (src_reg_num == 0xFF) { - address = ir.Imm32(imm); - } else { - auto offset = ir.Imm32(imm); - address = ir.IAdd(X(src_reg), offset); - if (skew_value != 0) { - address = ir.IAdd(address, ir.LaneId()); - } - } - - return address; -}; - } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h index 1b2547a1bd..37963dc777 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/impl.h @@ -56,30 +56,6 @@ enum class FPCompareOp : u64 { T, }; -namespace Isberd { -enum class Mode : u64 { - Default, - Patch, - Prim, - Attr, -}; - -enum class Shift : u64 { - Default, - U16, - B32, -}; - -enum class SZ : u64 { - U8, - U16, - U32, - F32, -}; - -} // namespace Isberd - - class TranslatorVisitor { public: explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} @@ -408,12 +384,6 @@ public: void ResetSFlag(); void ResetCFlag(); void ResetOFlag(); - -private: - // Helper functions for various translator visitors - IR::U32 apply_ISBERD_shift(IR::U32 result, Isberd::Shift shift_value); - IR::U32 apply_ISBERD_size_read(IR::U32 address, Isberd::SZ sz_value); - IR::U32 compute_ISBERD_address(IR::Reg src_reg, u32 src_reg_num, u32 imm, u64 skew_value); }; } // namespace Shader::Maxwell diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index 2aaf85772d..8dc22282ca 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -9,9 +9,52 @@ #include "shader_recompiler/frontend/maxwell/translate/impl/impl.h" namespace Shader::Maxwell { +namespace { +enum class Mode : u64 { + Default, + Patch, + Prim, + Attr, +}; + +enum class SZ : u64 { + U8, + U16, + U32, + F32 +}; + +enum class Shift : u64 { + Default, + U16, + B32, +}; + +IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, Shift shift) { + switch (shift) { + case Shift::Default: return index; + case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1)); + case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2)); + default: UNREACHABLE(); + } +} + +IR::U32 skewBytes(IR::IREmitter& ir, SZ sizeRead) { + const IR::U32 lane = ir.LaneId(); + switch (sizeRead) { + case SZ::U8: return lane; + case SZ::U16: return ir.ShiftLeftLogical(lane, ir.Imm32(1)); + case SZ::U32: + case SZ::F32: return ir.ShiftLeftLogical(lane, ir.Imm32(2)); + default: UNREACHABLE(); + } +} + +} // Anonymous namespace -// Valid only for GS, TI, VS and trap void TranslatorVisitor::ISBERD(u64 insn) { + LOG_DEBUG(Shader, "called with insn={:#X}", insn); + union { u64 raw; BitField<0, 8, IR::Reg> dest_reg; @@ -20,49 +63,63 @@ void TranslatorVisitor::ISBERD(u64 insn) { BitField<24, 8, u32> imm; BitField<31, 1, u64> skew; BitField<32, 1, u64> o; - BitField<33, 2, Isberd::Mode> mode; - BitField<36, 4, Isberd::SZ> sz; - BitField<47, 2, Isberd::Shift> shift; + BitField<33, 2, Mode> mode; + BitField<36, 4, SZ> sz; + BitField<47, 2, Shift> shift; } const isberd{insn}; - auto address = compute_ISBERD_address(isberd.src_reg, isberd.src_reg_num, isberd.imm, isberd.skew); - if (isberd.o != 0) { - auto result = apply_ISBERD_size_read(address, isberd.sz.Value()); - X(isberd.dest_reg, apply_ISBERD_shift(result, isberd.shift.Value())); - - return; + IR::U32 index{}; + if (isberd.src_reg_num.Value() == 0xFF) { + index = ir.Imm32(isberd.imm.Value()); + } else { + const IR::U32 scaledIndex = scaleIndex(ir, X(isberd.src_reg.Value()), isberd.shift.Value()); + index = ir.IAdd(scaledIndex, ir.Imm32(isberd.imm.Value())); } - if (isberd.mode != Isberd::Mode::Default) { - IR::F32 result_f32{}; - switch (isberd.mode.Value()) { - case Isberd::Mode::Patch: - result_f32 = ir.GetPatch(address.Patch()); - break; - case Isberd::Mode::Prim: - result_f32 = ir.GetAttribute(address.Attribute()); - break; - case Isberd::Mode::Attr: - result_f32 = ir.GetAttributeIndexed(address); - break; - default: - UNREACHABLE(); + if (isberd.o.Value()) { + if (isberd.skew.Value()) { + index = ir.IAdd(index, skewBytes(ir, isberd.sz.Value())); } - auto result_u32 = ir.BitCast(result_f32); - X(isberd.dest_reg, apply_ISBERD_shift(result_u32, isberd.shift.Value())); - return; - } - - if (isberd.skew != 0) { - auto result = ir.IAdd(X(isberd.src_reg), ir.LaneId()); - X(isberd.dest_reg, result); + const IR::U64 index64 = ir.UConvert(64, index); + IR::U32 globalLoaded{}; + switch (isberd.sz.Value()) { + case SZ::U8: globalLoaded = ir.LoadGlobalU8 (index64); break; + case SZ::U16: globalLoaded = ir.LoadGlobalU16(index64); break; + case SZ::U32: + case SZ::F32: globalLoaded = ir.LoadGlobal32(index64); break; + default: UNREACHABLE(); + } + X(isberd.dest_reg.Value(), globalLoaded); return; } - // Fallback if nothing else applies - X(isberd.dest_reg, X(isberd.src_reg)); + if (isberd.mode.Value() != Mode::Default) { + if (isberd.skew.Value()) { + index = ir.IAdd(index, skewBytes(ir, SZ::U32)); + } + + IR::F32 float_index{}; + switch (isberd.mode.Value()) { + case Mode::Patch: float_index = ir.GetPatch(index.Patch()); break; + case Mode::Prim: float_index = ir.GetAttribute(index.Attribute()); break; + case Mode::Attr: float_index = ir.GetAttributeIndexed(index); break; + default: UNREACHABLE(); + } + X(isberd.dest_reg.Value(), ir.BitCast(float_index)); + + return; + } + + if (isberd.skew.Value()) { + X(isberd.dest_reg.Value(), ir.IAdd(X(isberd.src_reg.Value()), ir.LaneId())); + + return; + } + + // Fallback copy + X(isberd.dest_reg.Value(), X(isberd.src_reg.Value())); } } // namespace Shader::Maxwell From 08609ab94dd78aa7d3af45085c1617b4156c6fa9 Mon Sep 17 00:00:00 2001 From: SDK Chan Date: Sat, 20 Sep 2025 21:50:48 +0000 Subject: [PATCH 5/5] [shader_recompiler] Refactor some whitespaces --- .../impl/internal_stage_buffer_entry_read.cpp | 31 ++++++++++--------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp index 8dc22282ca..e6345de65e 100644 --- a/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp +++ b/src/shader_recompiler/frontend/maxwell/translate/impl/internal_stage_buffer_entry_read.cpp @@ -32,21 +32,21 @@ enum class Shift : u64 { IR::U32 scaleIndex(IR::IREmitter& ir, IR::U32 index, Shift shift) { switch (shift) { - case Shift::Default: return index; - case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1)); - case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2)); - default: UNREACHABLE(); + case Shift::Default: return index; + case Shift::U16: return ir.ShiftLeftLogical(index, ir.Imm32(1)); + case Shift::B32: return ir.ShiftLeftLogical(index, ir.Imm32(2)); + default: UNREACHABLE(); } } IR::U32 skewBytes(IR::IREmitter& ir, SZ sizeRead) { const IR::U32 lane = ir.LaneId(); switch (sizeRead) { - case SZ::U8: return lane; - case SZ::U16: return ir.ShiftLeftLogical(lane, ir.Imm32(1)); - case SZ::U32: - case SZ::F32: return ir.ShiftLeftLogical(lane, ir.Imm32(2)); - default: UNREACHABLE(); + case SZ::U8: return lane; + case SZ::U16: return ir.ShiftLeftLogical(lane, ir.Imm32(1)); + case SZ::U32: + case SZ::F32: return ir.ShiftLeftLogical(lane, ir.Imm32(2)); + default: UNREACHABLE(); } } @@ -88,7 +88,7 @@ void TranslatorVisitor::ISBERD(u64 insn) { case SZ::U16: globalLoaded = ir.LoadGlobalU16(index64); break; case SZ::U32: case SZ::F32: globalLoaded = ir.LoadGlobal32(index64); break; - default: UNREACHABLE(); + default: UNREACHABLE(); } X(isberd.dest_reg.Value(), globalLoaded); @@ -102,10 +102,13 @@ void TranslatorVisitor::ISBERD(u64 insn) { IR::F32 float_index{}; switch (isberd.mode.Value()) { - case Mode::Patch: float_index = ir.GetPatch(index.Patch()); break; - case Mode::Prim: float_index = ir.GetAttribute(index.Attribute()); break; - case Mode::Attr: float_index = ir.GetAttributeIndexed(index); break; - default: UNREACHABLE(); + case Mode::Patch: float_index = ir.GetPatch(index.Patch()); + break; + case Mode::Prim: float_index = ir.GetAttribute(index.Attribute()); + break; + case Mode::Attr: float_index = ir.GetAttributeIndexed(index); + break; + default: UNREACHABLE(); } X(isberd.dest_reg.Value(), ir.BitCast(float_index));