GPU: format and cast float

This commit is contained in:
Owain van Brakel
2020-01-19 00:34:34 +01:00
parent 952e8e8a10
commit aff45b6bbe
5 changed files with 281 additions and 304 deletions

View File

@@ -45,12 +45,13 @@ out vec4 FragColor;
void main() { void main() {
vec4 c; vec4 c;
if (samplingMode == SAMPLING_DEFAULT) if (samplingMode == SAMPLING_DEFAULT) {
c = texture(tex, TexCoord); c = texture(tex, TexCoord);
else if (samplingMode == SAMPLING_CATROM || samplingMode == SAMPLING_MITCHELL) } else if (samplingMode == SAMPLING_CATROM || samplingMode == SAMPLING_MITCHELL) {
c = textureCubic(tex, TexCoord, samplingMode); c = textureCubic(tex, TexCoord, samplingMode);
else if (samplingMode == SAMPLING_XBR) } else if (samplingMode == SAMPLING_XBR) {
c = textureXBR(tex, TexCoord, xbrTable, ceil(1.0 * targetDimensions.x / sourceDimensions.x)); c = textureXBR(tex, TexCoord, xbrTable, ceil(1.0 * targetDimensions.x / sourceDimensions.x));
}
FragColor = c; FragColor = c;
} }

View File

@@ -24,8 +24,7 @@
*/ */
// General case cubic filter // General case cubic filter
float cubic_custom(float x, float b, float c) float cubic_custom(float x, float b, float c) {
{
/* A generalized cubic filter as described by Mitchell and Netravali is defined by the piecewise equation: /* A generalized cubic filter as described by Mitchell and Netravali is defined by the piecewise equation:
* if abs(x) < 1 * if abs(x) < 1
* y = 1/6 * ( (12 - 9b - 6c) * abs(x)^3 + (-18 + 12b + 6c) * abs(x)^2 + (6 - 2b) ) * y = 1/6 * ( (12 - 9b - 6c) * abs(x)^3 + (-18 + 12b + 6c) * abs(x)^2 + (6 - 2b) )
@@ -40,17 +39,18 @@ float cubic_custom(float x, float b, float c)
float t2 = t * t; // t squared float t2 = t * t; // t squared
float t3 = t * t * t; // t cubed float t3 = t * t * t; // t cubed
if (t < 1) // This part defines the [-1,1] region of the curve. if (t < 1) { // This part defines the [-1,1] region of the curve.
return 1.0/6 * ((12 - 9 * b - 6 * c) * t3 + (-18 + 12 * b + 6 * c) * t2 + (6 - 2 * b)); return 1.0/6 * ((12 - 9 * b - 6 * c) * t3 + (-18 + 12 * b + 6 * c) * t2 + (6 - 2 * b));
else if (t < 2) // This part defines the [-2,-1] and [1,2] regions. } else if (t < 2) { // This part defines the [-2,-1] and [1,2] regions.
return 1.0/6 * ((-1 * b - 6 * c) * t3 + (6 * b + 30 * c) * t2 + (-12 * b - 48 * c) * t + (8 * b + 24 * c)); return 1.0/6 * ((-1 * b - 6 * c) * t3 + (6 * b + 30 * c) * t2 + (-12 * b - 48 * c) * t + (8 * b + 24 * c));
else // Outside of [-2,2], the value is 0. }
return 0;
// Outside of [-2,2], the value is 0.
return float(0);
} }
// Cubic filter with Catmull-Rom parameters // Cubic filter with Catmull-Rom parameters
float catmull_rom(float x) float catmull_rom(float x) {
{
/* /*
* Generally favorable results in image upscaling are given by a cubic filter with the values b = 0 and c = 0.5. * Generally favorable results in image upscaling are given by a cubic filter with the values b = 0 and c = 0.5.
* This is known as the Catmull-Rom filter, and it closely approximates Jinc upscaling with Lanczos input values. * This is known as the Catmull-Rom filter, and it closely approximates Jinc upscaling with Lanczos input values.
@@ -63,16 +63,16 @@ float catmull_rom(float x)
float t2 = t * t; float t2 = t * t;
float t3 = t * t * t; float t3 = t * t * t;
if (t < 1) if (t < 1) {
return 1.5 * t3 - 2.5 * t2 + 1; return 1.5 * t3 - 2.5 * t2 + 1;
else if (t < 2) } else if (t < 2) {
return -0.5 * t3 + 2.5 * t2 - 4 * t + 2; return -0.5 * t3 + 2.5 * t2 - 4 * t + 2;
else
return 0;
} }
float mitchell(float x) return float(0);
{ }
float mitchell(float x) {
/* /*
* This is another cubic filter with less aggressive sharpening than Catmull-Rom, which some users may prefer. * This is another cubic filter with less aggressive sharpening than Catmull-Rom, which some users may prefer.
* B = 1/3, C = 1/3. * B = 1/3, C = 1/3.
@@ -82,12 +82,13 @@ float mitchell(float x)
float t2 = t * t; float t2 = t * t;
float t3 = t * t * t; float t3 = t * t * t;
if (t < 1) if (t < 1) {
return 7.0/6 * t3 + -2 * t2 + 8.0/9; return 7.0/6 * t3 + -2 * t2 + 8.0/9;
else if (t < 2) } else if (t < 2) {
return -7.0/18 * t3 + 2 * t2 - 10.0/3 * t + 16.0/9; return -7.0/18 * t3 + 2 * t2 - 10.0/3 * t + 16.0/9;
else }
return 0;
return float(0);
} }
#define CR_AR_STRENGTH 0.9 #define CR_AR_STRENGTH 0.9
@@ -96,8 +97,7 @@ float mitchell(float x)
#define FLT_MIN 1.175494351e-38 #define FLT_MIN 1.175494351e-38
// Calculates the distance between two points // Calculates the distance between two points
float d(vec2 pt1, vec2 pt2) float d(vec2 pt1, vec2 pt2) {
{
vec2 v = pt2 - pt1; vec2 v = pt2 - pt1;
return sqrt(dot(v,v)); return sqrt(dot(v,v));
} }
@@ -117,15 +117,12 @@ vec4 textureCubic(sampler2D sampler, vec2 texCoords, int mode){
vec4 c; vec4 c;
if (mode == SAMPLING_CATROM) if (mode == SAMPLING_CATROM) {
{
// catrom benefits from anti-ringing, which requires knowledge of the minimum and maximum samples in the kernel // catrom benefits from anti-ringing, which requires knowledge of the minimum and maximum samples in the kernel
vec4 min_sample = vec4(FLT_MAX); vec4 min_sample = vec4(FLT_MAX);
vec4 max_sample = vec4(FLT_MIN); vec4 max_sample = vec4(FLT_MIN);
for (int m = -1; m <= 2; m++) for (int m = -1; m <= 2; m++) {
{ for (int n = -1; n <= 2; n++) {
for (int n = -1; n <= 2; n++)
{
// get the raw texel, bypassing any other filters // get the raw texel, bypassing any other filters
vec4 vecData = texelFetch(sampler, texelCoords + ivec2(m, n), 0); vec4 vecData = texelFetch(sampler, texelCoords + ivec2(m, n), 0);
@@ -150,13 +147,9 @@ vec4 textureCubic(sampler2D sampler, vec2 texCoords, int mode){
c = clamp(c, min_sample, max_sample); c = clamp(c, min_sample, max_sample);
// mix according to anti-ringing strength // mix according to anti-ringing strength
c = mix(aux, c, CR_AR_STRENGTH); c = mix(aux, c, CR_AR_STRENGTH);
} } else if (mode == SAMPLING_MITCHELL) {
else if (mode == SAMPLING_MITCHELL) for (int m = -1; m <= 2; m++) {
{ for (int n = -1; n <= 2; n++) {
for (int m = -1; m <= 2; m++)
{
for (int n = -1; n <= 2; n++)
{
// get the raw texel, bypassing any other filters // get the raw texel, bypassing any other filters
vec4 vecData = texelFetch(sampler, texelCoords + ivec2(m, n), 0); vec4 vecData = texelFetch(sampler, texelCoords + ivec2(m, n), 0);

View File

@@ -24,8 +24,7 @@
Incorporates some of the ideas from SABR shader. Thanks to Joshua Street. Incorporates some of the ideas from SABR shader. Thanks to Joshua Street.
*/ */
struct XBRTable struct XBRTable {
{
vec2 texCoord; vec2 texCoord;
vec4 t1; vec4 t1;
vec4 t2; vec4 t2;

View File

@@ -62,53 +62,44 @@ const vec4 Ci = vec4(0.25, 0.25, 0.25, 0.25);
const vec3 Y = vec3(0.2126, 0.7152, 0.0722); // rec.709 luma weights const vec3 Y = vec3(0.2126, 0.7152, 0.0722); // rec.709 luma weights
// Difference between vector components. // Difference between vector components.
vec4 df(vec4 A, vec4 B) vec4 df(vec4 A, vec4 B) {
{
return vec4(abs(A-B)); return vec4(abs(A-B));
} }
// Compare two vectors and return their components are different. // Compare two vectors and return their components are different.
vec4 diff(vec4 A, vec4 B) vec4 diff(vec4 A, vec4 B) {
{
return vec4(notEqual(A, B)); return vec4(notEqual(A, B));
} }
// Determine if two vector components are equal based on a threshold. // Determine if two vector components are equal based on a threshold.
vec4 eq(vec4 A, vec4 B) vec4 eq(vec4 A, vec4 B) {
{
return (step(df(A, B), vec4(XBR_EQ_THRESHOLD))); return (step(df(A, B), vec4(XBR_EQ_THRESHOLD)));
} }
// Determine if two vector components are NOT equal based on a threshold. // Determine if two vector components are NOT equal based on a threshold.
vec4 neq(vec4 A, vec4 B) vec4 neq(vec4 A, vec4 B) {
{
return (vec4(1.0, 1.0, 1.0, 1.0) - eq(A, B)); return (vec4(1.0, 1.0, 1.0, 1.0) - eq(A, B));
} }
// Weighted distance. // Weighted distance.
vec4 wd(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h) vec4 wd(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h) {
{
return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + 4.0*df(g,h)); return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + 4.0*df(g,h));
} }
vec4 weighted_distance(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h, vec4 i, vec4 j, vec4 k, vec4 l) vec4 weighted_distance(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h, vec4 i, vec4 j, vec4 k, vec4 l) {
{
return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + df(i,j) + df(k,l) + 2.0*df(g,h)); return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + df(i,j) + df(k,l) + 2.0*df(g,h));
} }
float c_df(vec3 c1, vec3 c2) float c_df(vec3 c1, vec3 c2) {
{
vec3 df = abs(c1 - c2); vec3 df = abs(c1 - c2);
return df.r + df.g + df.b; return df.r + df.g + df.b;
} }
#include scale/xbr_lv2_common.glsl #include scale/xbr_lv2_common.glsl
// xBR-level2 upscaler. Level 2 means it detects edges in 2 directions, instead of just 1 in the most basic form of the algorithm. // xBR-level2 upscaler. Level 2 means it detects edges in 2 directions, instead of just 1 in the most basic form of the algorithm.
// This improves quality by a good bit without adding too much complexity compared to available level-3 and level-4 algorithms. // This improves quality by a good bit without adding too much complexity compared to available level-3 and level-4 algorithms.
vec4 textureXBR(sampler2D image, vec2 texCoord, XBRTable t, float scale) vec4 textureXBR(sampler2D image, vec2 texCoord, XBRTable t, float scale) {
{
vec4 delta = vec4(1.0/scale, 1.0/scale, 1.0/scale, 1.0/scale); vec4 delta = vec4(1.0/scale, 1.0/scale, 1.0/scale, 1.0/scale);
vec4 delta_l = vec4(0.5/scale, 1.0/scale, 0.5/scale, 1.0/scale); vec4 delta_l = vec4(0.5/scale, 1.0/scale, 0.5/scale, 1.0/scale);
vec4 delta_u = delta_l.yxwz; vec4 delta_u = delta_l.yxwz;
@@ -161,14 +152,11 @@ vec4 textureXBR(sampler2D image, vec2 texCoord, XBRTable t, float scale)
float y_weight = XBR_Y_WEIGHT; float y_weight = XBR_Y_WEIGHT;
if (small_details < 0.5) if (small_details < 0.5) {
{
i4 = vec4(dot(I4.xyz,rgbw), dot(C1.xyz,rgbw), dot(A0.xyz,rgbw), dot(G5.xyz,rgbw)); i4 = vec4(dot(I4.xyz,rgbw), dot(C1.xyz,rgbw), dot(A0.xyz,rgbw), dot(G5.xyz,rgbw));
i5 = vec4(dot(I5.xyz,rgbw), dot(C4.xyz,rgbw), dot(A1.xyz,rgbw), dot(G0.xyz,rgbw)); i5 = vec4(dot(I5.xyz,rgbw), dot(C4.xyz,rgbw), dot(A1.xyz,rgbw), dot(G0.xyz,rgbw));
h5 = vec4(dot(H5.xyz,rgbw), dot(F4.xyz,rgbw), dot(B1.xyz,rgbw), dot(D0.xyz,rgbw)); h5 = vec4(dot(H5.xyz,rgbw), dot(F4.xyz,rgbw), dot(B1.xyz,rgbw), dot(D0.xyz,rgbw));
} } else {
else
{
i4 = mul(mat4x3(I4.xyz, C1.xyz, A0.xyz, G5.xyz), y_weight * Y); i4 = mul(mat4x3(I4.xyz, C1.xyz, A0.xyz, G5.xyz), y_weight * Y);
i5 = mul(mat4x3(I5.xyz, C4.xyz, A1.xyz, G0.xyz), y_weight * Y); i5 = mul(mat4x3(I5.xyz, C4.xyz, A1.xyz, G0.xyz), y_weight * Y);
h5 = mul(mat4x3(H5.xyz, F4.xyz, B1.xyz, D0.xyz), y_weight * Y); h5 = mul(mat4x3(H5.xyz, F4.xyz, B1.xyz, D0.xyz), y_weight * Y);
@@ -203,13 +191,10 @@ vec4 textureXBR(sampler2D image, vec2 texCoord, XBRTable t, float scale)
vec4 fx60 = clamp((fx_u + delta_u -Cy )/(2.0*delta_u), 0.0, 1.0); vec4 fx60 = clamp((fx_u + delta_u -Cy )/(2.0*delta_u), 0.0, 1.0);
vec4 wd1, wd2; vec4 wd1, wd2;
if (small_details < 0.5) if (small_details < 0.5) {
{
wd1 = wd( e, c, g, i, h5, f4, h, f); wd1 = wd( e, c, g, i, h5, f4, h, f);
wd2 = wd( h, d, i5, f, i4, b, e, i); wd2 = wd( h, d, i5, f, i4, b, e, i);
} } else {
else
{
wd1 = weighted_distance( e, c, g, i, f4, h5, h, f, b, d, i4, i5); wd1 = weighted_distance( e, c, g, i, f4, h5, h, f, b, d, i4, i5);
wd2 = weighted_distance( h, d, i5, f, b, i4, e, i, g, h5, c, f4); wd2 = weighted_distance( h, d, i5, f, b, i4, e, i, g, h5, c, f4);
} }

View File

@@ -26,8 +26,7 @@
#include scale/xbr_lv2_common.glsl #include scale/xbr_lv2_common.glsl
XBRTable xbr_vert(vec2 texCoord, ivec2 sourceDimensions) XBRTable xbr_vert(vec2 texCoord, ivec2 sourceDimensions) {
{
float dx = (1.0/sourceDimensions.x); float dx = (1.0/sourceDimensions.x);
float dy = (1.0/sourceDimensions.y); float dy = (1.0/sourceDimensions.y);