optimize with precomputed kernel
All checks were successful
eden-license / license-header (pull_request) Successful in 16s
All checks were successful
eden-license / license-header (pull_request) Successful in 16s
Signed-off-by: lizzie <lizzie@eden-emu.dev>
This commit is contained in:
parent
5ec5c5e19b
commit
9cd6c6dc13
2 changed files with 62 additions and 22 deletions
|
@ -9,32 +9,24 @@ layout (location = 0) in vec2 frag_tex_coord;
|
||||||
layout (location = 0) out vec4 color;
|
layout (location = 0) out vec4 color;
|
||||||
layout (binding = 0) uniform sampler2D color_texture;
|
layout (binding = 0) uniform sampler2D color_texture;
|
||||||
|
|
||||||
#define PI 3.1415926535897932384626433
|
// precomputed kernel
|
||||||
|
const float w_kernel[49] = float[] (
|
||||||
float sinc(float x) {
|
-0.238811f, 0.531959f, 0.961865f, 1.000000f, 0.961865f, 0.531959f, -0.238811f, 0.531959f, 0.957419f, 0.313883f, -0.000000f, 0.313883f, 0.957419f, 0.531959f, 0.961865f, 0.313883f, -0.322602f, 0.000000f, -0.322602f, 0.313883f, 0.961865f, 1.000000f, -0.000000f, 0.000000f, 1.000000f, 0.000000f, -0.000000f, 1.000000f, 0.961865f, 0.313883f, -0.322602f, 0.000000f, -0.322602f, 0.313883f, 0.961865f, 0.531959f, 0.957419f, 0.313883f, -0.000000f, 0.313883f, 0.957419f, 0.531959f, -0.238811f, 0.531959f, 0.961865f, 1.000000f, 0.961865f, 0.531959f, -0.238811f
|
||||||
return x == 0.0f ? 1.0f : sin(PI * x) / (PI * x);
|
);
|
||||||
}
|
const vec2 w_pos[49] = vec2[] (
|
||||||
|
vec2(-0.750000f, -0.750000f), vec2(-0.750000f, -0.500000f), vec2(-0.750000f, -0.250000f), vec2(-0.750000f, 0.000000f), vec2(-0.750000f, 0.250000f), vec2(-0.750000f, 0.500000f), vec2(-0.750000f, 0.750000f), vec2(-0.500000f, -0.750000f), vec2(-0.500000f, -0.500000f), vec2(-0.500000f, -0.250000f), vec2(-0.500000f, 0.000000f), vec2(-0.500000f, 0.250000f), vec2(-0.500000f, 0.500000f), vec2(-0.500000f, 0.750000f), vec2(-0.250000f, -0.750000f), vec2(-0.250000f, -0.500000f), vec2(-0.250000f, -0.250000f), vec2(-0.250000f, 0.000000f), vec2(-0.250000f, 0.250000f), vec2(-0.250000f, 0.500000f), vec2(-0.250000f, 0.750000f), vec2(0.000000f, -0.750000f), vec2(0.000000f, -0.500000f), vec2(0.000000f, -0.250000f), vec2(0.000000f, 0.000000f), vec2(0.000000f, 0.250000f), vec2(0.000000f, 0.500000f), vec2(0.000000f, 0.750000f), vec2(0.250000f, -0.750000f), vec2(0.250000f, -0.500000f), vec2(0.250000f, -0.250000f), vec2(0.250000f, 0.000000f), vec2(0.250000f, 0.250000f), vec2(0.250000f, 0.500000f), vec2(0.250000f, 0.750000f), vec2(0.500000f, -0.750000f), vec2(0.500000f, -0.500000f), vec2(0.500000f, -0.250000f), vec2(0.500000f, 0.000000f), vec2(0.500000f, 0.250000f), vec2(0.500000f, 0.500000f), vec2(0.500000f, 0.750000f), vec2(0.750000f, -0.750000f), vec2(0.750000f, -0.500000f), vec2(0.750000f, -0.250000f), vec2(0.750000f, 0.000000f), vec2(0.750000f, 0.250000f), vec2(0.750000f, 0.500000f), vec2(0.750000f, 0.750000f)
|
||||||
float lanczos(vec2 v, float a) {
|
);
|
||||||
float d = sqrt(v.x * v.x + v.y * v.y);
|
const float w_sum = 21.045683f;
|
||||||
return sinc(d) / sinc(d / a);
|
|
||||||
}
|
|
||||||
|
|
||||||
vec4 textureLanczos(sampler2D textureSampler, vec2 p) {
|
vec4 textureLanczos(sampler2D textureSampler, vec2 p) {
|
||||||
const int r = 1; //radius (1 = 3 steps)
|
const int r = 3; //radius (1 = 3 steps)
|
||||||
vec3 c_sum = vec3(0.0f);
|
vec3 c_sum = vec3(0.0f);
|
||||||
float w_sum = 0.0f;
|
|
||||||
vec2 res = vec2(textureSize(textureSampler, 0));
|
vec2 res = vec2(textureSize(textureSampler, 0));
|
||||||
vec2 cc = floor(p * res) / res;
|
vec2 cc = floor(p * res) / res;
|
||||||
// kernel size = (r * 2 + 1) * (r * 2 + 1)
|
for (int i = 0; i < 49; i++) { // kernel size = (r * 2 + 1) ^ 2
|
||||||
for (int x = -r; x <= r; x++)
|
vec2 kp = w_pos[i] / res;
|
||||||
for (int y = -r; y <= r; y++) {
|
vec2 uv = cc + kp;
|
||||||
vec2 kp = 0.5f * (vec2(x, y) / res); // 0.5 = half-pixel level resampling
|
c_sum += w_kernel[i] * texture(textureSampler, p + kp).rgb;
|
||||||
vec2 uv = cc + kp;
|
}
|
||||||
float w = lanczos(kp, float(r));
|
|
||||||
c_sum += w * texture(textureSampler, p + kp).rgb;
|
|
||||||
w_sum += w;
|
|
||||||
}
|
|
||||||
return vec4(c_sum / w_sum, 1.0f);
|
return vec4(c_sum / w_sum, 1.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
48
tools/lanczos_gen.c
Normal file
48
tools/lanczos_gen.c
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
// SPDX-FileCopyrightText: Copyright 2025 Eden Emulator Project
|
||||||
|
// SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
|
||||||
|
// clang -lm tools/lanczos_gen.c -o tools/lanczos_gen && ./tools/lanczos_gen
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
double sinc(double x) {
|
||||||
|
return x == 0.0f ? 1.0f : sin(M_PI * x) / (M_PI * x);
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef struct vec2 {
|
||||||
|
double x;
|
||||||
|
double y;
|
||||||
|
} vec2;
|
||||||
|
|
||||||
|
double lanczos(vec2 v, float a) {
|
||||||
|
double d = sqrt(v.x * v.x + v.y * v.y);
|
||||||
|
return sinc(d) / sinc(d / a);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
const int r = 3; //radius (1 = 3 steps)
|
||||||
|
const int k_size = (r * 2 + 1) * (r * 2 + 1);
|
||||||
|
double w_sum = 0.0f;
|
||||||
|
// kernel size = (r * 2 + 1) ^ 2
|
||||||
|
printf("const float w_kernel[%i] = float[] (\n ", k_size);
|
||||||
|
double factor = 1.0f / ((double)r + 1.0f);
|
||||||
|
for (int x = -r; x <= r; x++)
|
||||||
|
for (int y = -r; y <= r; y++) {
|
||||||
|
double w = lanczos((vec2){ .x = x, .y = y }, (double)r);
|
||||||
|
printf("%lff, ", w);
|
||||||
|
w_sum += w;
|
||||||
|
}
|
||||||
|
printf("\n);\n");
|
||||||
|
printf("const vec2 w_pos[%i] = vec2[] (\n ", k_size);
|
||||||
|
for (int x = -r; x <= r; x++)
|
||||||
|
for (int y = -r; y <= r; y++) {
|
||||||
|
vec2 kp = (vec2){
|
||||||
|
.x = x * factor,
|
||||||
|
.y = y * factor
|
||||||
|
};
|
||||||
|
printf("vec2(%lff, %lff), ", kp.x, kp.y);
|
||||||
|
}
|
||||||
|
printf("\n);\n");
|
||||||
|
printf("const float w_sum = %lff;\n", w_sum);
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue