From 4436a87d31f854ceec1e374b4f5fd60605a0ffec Mon Sep 17 00:00:00 2001 From: lizzie Date: Sat, 20 Sep 2025 13:00:46 +0000 Subject: [PATCH] optimize with precomputed kernel Signed-off-by: lizzie --- .../host_shaders/present_lanczos.frag | 35 +++++--------- tools/lanczos_gen.c | 48 +++++++++++++++++++ 2 files changed, 61 insertions(+), 22 deletions(-) create mode 100644 tools/lanczos_gen.c diff --git a/src/video_core/host_shaders/present_lanczos.frag b/src/video_core/host_shaders/present_lanczos.frag index 5afc985bc3..9501b7ca33 100644 --- a/src/video_core/host_shaders/present_lanczos.frag +++ b/src/video_core/host_shaders/present_lanczos.frag @@ -9,32 +9,23 @@ layout (location = 0) in vec2 frag_tex_coord; layout (location = 0) out vec4 color; layout (binding = 0) uniform sampler2D color_texture; -#define PI 3.1415926535897932384626433 - -float sinc(float x) { - return x == 0.0f ? 1.0f : sin(PI * x) / (PI * x); -} - -float lanczos(vec2 v, float a) { - float d = sqrt(v.x * v.x + v.y * v.y); - return sinc(d) / sinc(d / a); -} - +// precomputed kernel +const float w_kernel[49] = float[] ( + -0.238811f, 0.531959f, 0.961865f, 1.000000f, 0.961865f, 0.531959f, -0.238811f, 0.531959f, 0.957419f, 0.313883f, -0.000000f, 0.313883f, 0.957419f, 0.531959f, 0.961865f, 0.313883f, -0.322602f, 0.000000f, -0.322602f, 0.313883f, 0.961865f, 1.000000f, -0.000000f, 0.000000f, 1.000000f, 0.000000f, -0.000000f, 1.000000f, 0.961865f, 0.313883f, -0.322602f, 0.000000f, -0.322602f, 0.313883f, 0.961865f, 0.531959f, 0.957419f, 0.313883f, -0.000000f, 0.313883f, 0.957419f, 0.531959f, -0.238811f, 0.531959f, 0.961865f, 1.000000f, 0.961865f, 0.531959f, -0.238811f +); +const vec2 w_pos[49] = vec2[] ( + vec2(-0.750000f, -0.750000f), vec2(-0.750000f, -0.500000f), vec2(-0.750000f, -0.250000f), vec2(-0.750000f, 0.000000f), vec2(-0.750000f, 0.250000f), vec2(-0.750000f, 0.500000f), vec2(-0.750000f, 0.750000f), vec2(-0.500000f, -0.750000f), vec2(-0.500000f, -0.500000f), vec2(-0.500000f, -0.250000f), vec2(-0.500000f, 0.000000f), vec2(-0.500000f, 0.250000f), vec2(-0.500000f, 0.500000f), vec2(-0.500000f, 0.750000f), vec2(-0.250000f, -0.750000f), vec2(-0.250000f, -0.500000f), vec2(-0.250000f, -0.250000f), vec2(-0.250000f, 0.000000f), vec2(-0.250000f, 0.250000f), vec2(-0.250000f, 0.500000f), vec2(-0.250000f, 0.750000f), vec2(0.000000f, -0.750000f), vec2(0.000000f, -0.500000f), vec2(0.000000f, -0.250000f), vec2(0.000000f, 0.000000f), vec2(0.000000f, 0.250000f), vec2(0.000000f, 0.500000f), vec2(0.000000f, 0.750000f), vec2(0.250000f, -0.750000f), vec2(0.250000f, -0.500000f), vec2(0.250000f, -0.250000f), vec2(0.250000f, 0.000000f), vec2(0.250000f, 0.250000f), vec2(0.250000f, 0.500000f), vec2(0.250000f, 0.750000f), vec2(0.500000f, -0.750000f), vec2(0.500000f, -0.500000f), vec2(0.500000f, -0.250000f), vec2(0.500000f, 0.000000f), vec2(0.500000f, 0.250000f), vec2(0.500000f, 0.500000f), vec2(0.500000f, 0.750000f), vec2(0.750000f, -0.750000f), vec2(0.750000f, -0.500000f), vec2(0.750000f, -0.250000f), vec2(0.750000f, 0.000000f), vec2(0.750000f, 0.250000f), vec2(0.750000f, 0.500000f), vec2(0.750000f, 0.750000f) +); +const float w_sum = 21.045683f; vec4 textureLanczos(sampler2D textureSampler, vec2 p) { - const int r = 1; //radius (1 = 3 steps) vec3 c_sum = vec3(0.0f); - float w_sum = 0.0f; vec2 res = vec2(textureSize(textureSampler, 0)); vec2 cc = floor(p * res) / res; - // kernel size = (r * 2 + 1) * (r * 2 + 1) - for (int x = -r; x <= r; x++) - for (int y = -r; y <= r; y++) { - vec2 kp = 0.5f * (vec2(x, y) / res); // 0.5 = half-pixel level resampling - vec2 uv = cc + kp; - float w = lanczos(kp, float(r)); - c_sum += w * texture(textureSampler, p + kp).rgb; - w_sum += w; - } + for (int i = 0; i < 49; i++) { // kernel size = (r * 2 + 1) ^ 2 + vec2 kp = w_pos[i] / res; + vec2 uv = cc + kp; + c_sum += w_kernel[i] * texture(textureSampler, p + kp).rgb; + } return vec4(c_sum / w_sum, 1.0f); } diff --git a/tools/lanczos_gen.c b/tools/lanczos_gen.c new file mode 100644 index 0000000000..6d7be3cb0e --- /dev/null +++ b/tools/lanczos_gen.c @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project +// SPDX-License-Identifier: GPL-3.0-or-later + +// clang -lm tools/lanczos_gen.c -o tools/lanczos_gen && ./tools/lanczos_gen +#include +#include + +double sinc(double x) { + return x == 0.0f ? 1.0f : sin(M_PI * x) / (M_PI * x); +} + +typedef struct vec2 { + double x; + double y; +} vec2; + +double lanczos(vec2 v, float a) { + double d = sqrt(v.x * v.x + v.y * v.y); + return sinc(d) / sinc(d / a); +} + +int main(int argc, char* argv[]) { + const int r = 3; //radius (1 = 3 steps) + const int k_size = (r * 2 + 1) * (r * 2 + 1); + double w_sum = 0.0f; + // kernel size = (r * 2 + 1) ^ 2 + printf("const float w_kernel[%i] = float[] (\n ", k_size); + double factor = 1.0f / ((double)r + 1.0f); + for (int x = -r; x <= r; x++) + for (int y = -r; y <= r; y++) { + double w = lanczos((vec2){ .x = x, .y = y }, (double)r); + printf("%lff, ", w); + w_sum += w; + } + printf("\n);\n"); + printf("const vec2 w_pos[%i] = vec2[] (\n ", k_size); + for (int x = -r; x <= r; x++) + for (int y = -r; y <= r; y++) { + vec2 kp = (vec2){ + .x = x * factor, + .y = y * factor + }; + printf("vec2(%lff, %lff), ", kp.x, kp.y); + } + printf("\n);\n"); + printf("const float w_sum = %lff;\n", w_sum); + return 0; +}