optimize with precomputed kernel
All checks were successful
eden-license / license-header (pull_request) Successful in 16s

Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
lizzie 2025-09-20 12:54:56 +00:00
parent 5ec5c5e19b
commit 9cd6c6dc13
Signed by: Lizzie
GPG key ID: 00287378CADCAB13
2 changed files with 62 additions and 22 deletions

View file

@ -9,32 +9,24 @@ layout (location = 0) in vec2 frag_tex_coord;
layout (location = 0) out vec4 color; layout (location = 0) out vec4 color;
layout (binding = 0) uniform sampler2D color_texture; layout (binding = 0) uniform sampler2D color_texture;
#define PI 3.1415926535897932384626433 // precomputed kernel
const float w_kernel[49] = float[] (
float sinc(float x) { -0.238811f, 0.531959f, 0.961865f, 1.000000f, 0.961865f, 0.531959f, -0.238811f, 0.531959f, 0.957419f, 0.313883f, -0.000000f, 0.313883f, 0.957419f, 0.531959f, 0.961865f, 0.313883f, -0.322602f, 0.000000f, -0.322602f, 0.313883f, 0.961865f, 1.000000f, -0.000000f, 0.000000f, 1.000000f, 0.000000f, -0.000000f, 1.000000f, 0.961865f, 0.313883f, -0.322602f, 0.000000f, -0.322602f, 0.313883f, 0.961865f, 0.531959f, 0.957419f, 0.313883f, -0.000000f, 0.313883f, 0.957419f, 0.531959f, -0.238811f, 0.531959f, 0.961865f, 1.000000f, 0.961865f, 0.531959f, -0.238811f
return x == 0.0f ? 1.0f : sin(PI * x) / (PI * x); );
} const vec2 w_pos[49] = vec2[] (
vec2(-0.750000f, -0.750000f), vec2(-0.750000f, -0.500000f), vec2(-0.750000f, -0.250000f), vec2(-0.750000f, 0.000000f), vec2(-0.750000f, 0.250000f), vec2(-0.750000f, 0.500000f), vec2(-0.750000f, 0.750000f), vec2(-0.500000f, -0.750000f), vec2(-0.500000f, -0.500000f), vec2(-0.500000f, -0.250000f), vec2(-0.500000f, 0.000000f), vec2(-0.500000f, 0.250000f), vec2(-0.500000f, 0.500000f), vec2(-0.500000f, 0.750000f), vec2(-0.250000f, -0.750000f), vec2(-0.250000f, -0.500000f), vec2(-0.250000f, -0.250000f), vec2(-0.250000f, 0.000000f), vec2(-0.250000f, 0.250000f), vec2(-0.250000f, 0.500000f), vec2(-0.250000f, 0.750000f), vec2(0.000000f, -0.750000f), vec2(0.000000f, -0.500000f), vec2(0.000000f, -0.250000f), vec2(0.000000f, 0.000000f), vec2(0.000000f, 0.250000f), vec2(0.000000f, 0.500000f), vec2(0.000000f, 0.750000f), vec2(0.250000f, -0.750000f), vec2(0.250000f, -0.500000f), vec2(0.250000f, -0.250000f), vec2(0.250000f, 0.000000f), vec2(0.250000f, 0.250000f), vec2(0.250000f, 0.500000f), vec2(0.250000f, 0.750000f), vec2(0.500000f, -0.750000f), vec2(0.500000f, -0.500000f), vec2(0.500000f, -0.250000f), vec2(0.500000f, 0.000000f), vec2(0.500000f, 0.250000f), vec2(0.500000f, 0.500000f), vec2(0.500000f, 0.750000f), vec2(0.750000f, -0.750000f), vec2(0.750000f, -0.500000f), vec2(0.750000f, -0.250000f), vec2(0.750000f, 0.000000f), vec2(0.750000f, 0.250000f), vec2(0.750000f, 0.500000f), vec2(0.750000f, 0.750000f)
float lanczos(vec2 v, float a) { );
float d = sqrt(v.x * v.x + v.y * v.y); const float w_sum = 21.045683f;
return sinc(d) / sinc(d / a);
}
vec4 textureLanczos(sampler2D textureSampler, vec2 p) { vec4 textureLanczos(sampler2D textureSampler, vec2 p) {
const int r = 1; //radius (1 = 3 steps) const int r = 3; //radius (1 = 3 steps)
vec3 c_sum = vec3(0.0f); vec3 c_sum = vec3(0.0f);
float w_sum = 0.0f;
vec2 res = vec2(textureSize(textureSampler, 0)); vec2 res = vec2(textureSize(textureSampler, 0));
vec2 cc = floor(p * res) / res; vec2 cc = floor(p * res) / res;
// kernel size = (r * 2 + 1) * (r * 2 + 1) for (int i = 0; i < 49; i++) { // kernel size = (r * 2 + 1) ^ 2
for (int x = -r; x <= r; x++) vec2 kp = w_pos[i] / res;
for (int y = -r; y <= r; y++) { vec2 uv = cc + kp;
vec2 kp = 0.5f * (vec2(x, y) / res); // 0.5 = half-pixel level resampling c_sum += w_kernel[i] * texture(textureSampler, p + kp).rgb;
vec2 uv = cc + kp; }
float w = lanczos(kp, float(r));
c_sum += w * texture(textureSampler, p + kp).rgb;
w_sum += w;
}
return vec4(c_sum / w_sum, 1.0f); return vec4(c_sum / w_sum, 1.0f);
} }

48
tools/lanczos_gen.c Normal file
View file

@ -0,0 +1,48 @@
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
// SPDX-License-Identifier: GPL-3.0-or-later
// clang -lm tools/lanczos_gen.c -o tools/lanczos_gen && ./tools/lanczos_gen
#include <stdio.h>
#include <math.h>
double sinc(double x) {
return x == 0.0f ? 1.0f : sin(M_PI * x) / (M_PI * x);
}
typedef struct vec2 {
double x;
double y;
} vec2;
double lanczos(vec2 v, float a) {
double d = sqrt(v.x * v.x + v.y * v.y);
return sinc(d) / sinc(d / a);
}
int main(int argc, char* argv[]) {
const int r = 3; //radius (1 = 3 steps)
const int k_size = (r * 2 + 1) * (r * 2 + 1);
double w_sum = 0.0f;
// kernel size = (r * 2 + 1) ^ 2
printf("const float w_kernel[%i] = float[] (\n ", k_size);
double factor = 1.0f / ((double)r + 1.0f);
for (int x = -r; x <= r; x++)
for (int y = -r; y <= r; y++) {
double w = lanczos((vec2){ .x = x, .y = y }, (double)r);
printf("%lff, ", w);
w_sum += w;
}
printf("\n);\n");
printf("const vec2 w_pos[%i] = vec2[] (\n ", k_size);
for (int x = -r; x <= r; x++)
for (int y = -r; y <= r; y++) {
vec2 kp = (vec2){
.x = x * factor,
.y = y * factor
};
printf("vec2(%lff, %lff), ", kp.x, kp.y);
}
printf("\n);\n");
printf("const float w_sum = %lff;\n", w_sum);
return 0;
}