GPU: format and cast float

This commit is contained in:
Owain van Brakel
2020-01-19 00:34:34 +01:00
parent 952e8e8a10
commit aff45b6bbe
5 changed files with 281 additions and 304 deletions

View File

@@ -43,14 +43,15 @@ in XBRTable xbrTable;
out vec4 FragColor; out vec4 FragColor;
void main() { void main() {
vec4 c; vec4 c;
if (samplingMode == SAMPLING_DEFAULT) if (samplingMode == SAMPLING_DEFAULT) {
c = texture(tex, TexCoord); c = texture(tex, TexCoord);
else if (samplingMode == SAMPLING_CATROM || samplingMode == SAMPLING_MITCHELL) } else if (samplingMode == SAMPLING_CATROM || samplingMode == SAMPLING_MITCHELL) {
c = textureCubic(tex, TexCoord, samplingMode); c = textureCubic(tex, TexCoord, samplingMode);
else if (samplingMode == SAMPLING_XBR) } else if (samplingMode == SAMPLING_XBR) {
c = textureXBR(tex, TexCoord, xbrTable, ceil(1.0 * targetDimensions.x / sourceDimensions.x)); c = textureXBR(tex, TexCoord, xbrTable, ceil(1.0 * targetDimensions.x / sourceDimensions.x));
}
FragColor = c; FragColor = c;
} }

View File

@@ -24,70 +24,71 @@
*/ */
// General case cubic filter // General case cubic filter
float cubic_custom(float x, float b, float c) float cubic_custom(float x, float b, float c) {
{ /* A generalized cubic filter as described by Mitchell and Netravali is defined by the piecewise equation:
/* A generalized cubic filter as described by Mitchell and Netravali is defined by the piecewise equation: * if abs(x) < 1
* if abs(x) < 1 * y = 1/6 * ( (12 - 9b - 6c) * abs(x)^3 + (-18 + 12b + 6c) * abs(x)^2 + (6 - 2b) )
* y = 1/6 * ( (12 - 9b - 6c) * abs(x)^3 + (-18 + 12b + 6c) * abs(x)^2 + (6 - 2b) ) * if abs(x) >= 1 and < 2
* if abs(x) >= 1 and < 2 * y = 1/6 * ( (-1b - 6c) * abs(x)^3 + (6b + 30c) * abs(x)^2 + (-12b - 48c) * abs(x) + (8b + 24c) )
* y = 1/6 * ( (-1b - 6c) * abs(x)^3 + (6b + 30c) * abs(x)^2 + (-12b - 48c) * abs(x) + (8b + 24c) ) * otherwise
* otherwise * y = 0
* y = 0 * This produces a bell curve centered on 0 with a width of 2.
* This produces a bell curve centered on 0 with a width of 2. */
*/
float t = abs(x); // absolute value of the x coordinate float t = abs(x); // absolute value of the x coordinate
float t2 = t * t; // t squared float t2 = t * t; // t squared
float t3 = t * t * t; // t cubed float t3 = t * t * t; // t cubed
if (t < 1) // This part defines the [-1,1] region of the curve. if (t < 1) { // This part defines the [-1,1] region of the curve.
return 1.0/6 * ( (12 - 9 * b - 6 * c) * t3 + (-18 + 12 * b + 6 * c) * t2 + (6 - 2 * b) ); return 1.0/6 * ((12 - 9 * b - 6 * c) * t3 + (-18 + 12 * b + 6 * c) * t2 + (6 - 2 * b));
else if (t < 2) // This part defines the [-2,-1] and [1,2] regions. } else if (t < 2) { // This part defines the [-2,-1] and [1,2] regions.
return 1.0/6 * ( (-1 * b - 6 * c) * t3 + (6 * b + 30 * c) * t2 + (-12 * b - 48 * c) * t + (8 * b + 24 * c) ); return 1.0/6 * ((-1 * b - 6 * c) * t3 + (6 * b + 30 * c) * t2 + (-12 * b - 48 * c) * t + (8 * b + 24 * c));
else // Outside of [-2,2], the value is 0. }
return 0;
// Outside of [-2,2], the value is 0.
return float(0);
} }
// Cubic filter with Catmull-Rom parameters // Cubic filter with Catmull-Rom parameters
float catmull_rom(float x) float catmull_rom(float x) {
{ /*
/* * Generally favorable results in image upscaling are given by a cubic filter with the values b = 0 and c = 0.5.
* Generally favorable results in image upscaling are given by a cubic filter with the values b = 0 and c = 0.5. * This is known as the Catmull-Rom filter, and it closely approximates Jinc upscaling with Lanczos input values.
* This is known as the Catmull-Rom filter, and it closely approximates Jinc upscaling with Lanczos input values. * Placing these values into the piecewise equation gives us a more compact representation of:
* Placing these values into the piecewise equation gives us a more compact representation of: * y = 1.5 * abs(x)^3 - 2.5 * abs(x)^2 + 1 // abs(x) < 1
* y = 1.5 * abs(x)^3 - 2.5 * abs(x)^2 + 1 // abs(x) < 1 * y = -0.5 * abs(x)^3 + 2.5 * abs(x)^2 - 4 * abs(x) + 2 // 1 <= abs(x) < 2
* y = -0.5 * abs(x)^3 + 2.5 * abs(x)^2 - 4 * abs(x) + 2 // 1 <= abs(x) < 2 */
*/
float t = abs(x); float t = abs(x);
float t2 = t * t; float t2 = t * t;
float t3 = t * t * t; float t3 = t * t * t;
if (t < 1) if (t < 1) {
return 1.5 * t3 - 2.5 * t2 + 1; return 1.5 * t3 - 2.5 * t2 + 1;
else if (t < 2) } else if (t < 2) {
return -0.5 * t3 + 2.5 * t2 - 4 * t + 2; return -0.5 * t3 + 2.5 * t2 - 4 * t + 2;
else }
return 0;
return float(0);
} }
float mitchell(float x) float mitchell(float x) {
{ /*
/* * This is another cubic filter with less aggressive sharpening than Catmull-Rom, which some users may prefer.
* This is another cubic filter with less aggressive sharpening than Catmull-Rom, which some users may prefer. * B = 1/3, C = 1/3.
* B = 1/3, C = 1/3. */
*/
float t = abs(x); float t = abs(x);
float t2 = t * t; float t2 = t * t;
float t3 = t * t * t; float t3 = t * t * t;
if (t < 1) if (t < 1) {
return 7.0/6 * t3 + -2 * t2 + 8.0/9; return 7.0/6 * t3 + -2 * t2 + 8.0/9;
else if (t < 2) } else if (t < 2) {
return -7.0/18 * t3 + 2 * t2 - 10.0/3 * t + 16.0/9; return -7.0/18 * t3 + 2 * t2 - 10.0/3 * t + 16.0/9;
else }
return 0;
return float(0);
} }
#define CR_AR_STRENGTH 0.9 #define CR_AR_STRENGTH 0.9
@@ -96,82 +97,74 @@ float mitchell(float x)
#define FLT_MIN 1.175494351e-38 #define FLT_MIN 1.175494351e-38
// Calculates the distance between two points // Calculates the distance between two points
float d(vec2 pt1, vec2 pt2) float d(vec2 pt1, vec2 pt2) {
{ vec2 v = pt2 - pt1;
vec2 v = pt2 - pt1; return sqrt(dot(v,v));
return sqrt(dot(v,v));
} }
// Samples a texture using a 4x4 kernel. // Samples a texture using a 4x4 kernel.
vec4 textureCubic(sampler2D sampler, vec2 texCoords, int mode){ vec4 textureCubic(sampler2D sampler, vec2 texCoords, int mode){
vec2 texSize = textureSize(sampler, 0); vec2 texSize = textureSize(sampler, 0);
vec2 texelSize = 1.0 / texSize; vec2 texelSize = 1.0 / texSize;
texCoords *= texSize; texCoords *= texSize;
texCoords -= 0.5; texCoords -= 0.5;
vec4 nSum = vec4( 0.0, 0.0, 0.0, 0.0 ); vec4 nSum = vec4( 0.0, 0.0, 0.0, 0.0 );
vec4 nDenom = vec4( 0.0, 0.0, 0.0, 0.0 ); vec4 nDenom = vec4( 0.0, 0.0, 0.0, 0.0 );
ivec2 texelCoords = ivec2(floor(texCoords)); ivec2 texelCoords = ivec2(floor(texCoords));
vec2 coordFract = fract(texCoords); vec2 coordFract = fract(texCoords);
vec4 c; vec4 c;
if (mode == SAMPLING_CATROM) if (mode == SAMPLING_CATROM) {
{ // catrom benefits from anti-ringing, which requires knowledge of the minimum and maximum samples in the kernel
// catrom benefits from anti-ringing, which requires knowledge of the minimum and maximum samples in the kernel vec4 min_sample = vec4(FLT_MAX);
vec4 min_sample = vec4(FLT_MAX); vec4 max_sample = vec4(FLT_MIN);
vec4 max_sample = vec4(FLT_MIN); for (int m = -1; m <= 2; m++) {
for (int m = -1; m <= 2; m++) for (int n = -1; n <= 2; n++) {
{ // get the raw texel, bypassing any other filters
for (int n = -1; n <= 2; n++) vec4 vecData = texelFetch(sampler, texelCoords + ivec2(m, n), 0);
{
// get the raw texel, bypassing any other filters
vec4 vecData = texelFetch(sampler, texelCoords + ivec2(m, n), 0);
// update min and max as we go // update min and max as we go
min_sample = min(min_sample, vecData); min_sample = min(min_sample, vecData);
max_sample = max(max_sample, vecData); max_sample = max(max_sample, vecData);
// calculate weight based on distance of the current texel offset from the sub-texel position of the sampling location // calculate weight based on distance of the current texel offset from the sub-texel position of the sampling location
float w = catmull_rom( d(vec2(m, n), coordFract) ); float w = catmull_rom( d(vec2(m, n), coordFract) );
// build the weighted average // build the weighted average
nSum += vecData * w; nSum += vecData * w;
nDenom += w; nDenom += w;
} }
}
// calculate weighted average
c = nSum / nDenom;
// store value before anti-ringing
vec4 aux = c;
// anti-ringing: clamp the color value so that it cannot exceed values already present in the kernel area
c = clamp(c, min_sample, max_sample);
// mix according to anti-ringing strength
c = mix(aux, c, CR_AR_STRENGTH);
} }
else if (mode == SAMPLING_MITCHELL) // calculate weighted average
{ c = nSum / nDenom;
for (int m = -1; m <= 2; m++)
{
for (int n = -1; n <= 2; n++)
{
// get the raw texel, bypassing any other filters
vec4 vecData = texelFetch(sampler, texelCoords + ivec2(m, n), 0);
// calculate weight based on distance of the current texel offset from the sub-texel position of the sampling location // store value before anti-ringing
float w = mitchell( d(vec2(m, n), coordFract) ); vec4 aux = c;
// anti-ringing: clamp the color value so that it cannot exceed values already present in the kernel area
c = clamp(c, min_sample, max_sample);
// mix according to anti-ringing strength
c = mix(aux, c, CR_AR_STRENGTH);
} else if (mode == SAMPLING_MITCHELL) {
for (int m = -1; m <= 2; m++) {
for (int n = -1; n <= 2; n++) {
// get the raw texel, bypassing any other filters
vec4 vecData = texelFetch(sampler, texelCoords + ivec2(m, n), 0);
// build the weighted average // calculate weight based on distance of the current texel offset from the sub-texel position of the sampling location
nSum += vecData * w; float w = mitchell( d(vec2(m, n), coordFract) );
nDenom += w;
} // build the weighted average
} nSum += vecData * w;
// calculate weighted average nDenom += w;
c = nSum / nDenom; }
} }
// calculate weighted average
c = nSum / nDenom;
}
// return the weighted average // return the weighted average
return c; return c;
} }

View File

@@ -24,14 +24,13 @@
Incorporates some of the ideas from SABR shader. Thanks to Joshua Street. Incorporates some of the ideas from SABR shader. Thanks to Joshua Street.
*/ */
struct XBRTable struct XBRTable {
{ vec2 texCoord;
vec2 texCoord; vec4 t1;
vec4 t1; vec4 t2;
vec4 t2; vec4 t3;
vec4 t3; vec4 t4;
vec4 t4; vec4 t5;
vec4 t5; vec4 t6;
vec4 t6; vec4 t7;
vec4 t7;
}; };

View File

@@ -48,200 +48,185 @@
const vec3 rgbw = vec3(14.352, 28.176, 5.472); // rgb weights const vec3 rgbw = vec3(14.352, 28.176, 5.472); // rgb weights
//const vec4 eq_threshold = vec4(15.0, 15.0, 15.0, 15.0); // unused //const vec4 eq_threshold = vec4(15.0, 15.0, 15.0, 15.0); // unused
const vec4 Ao = vec4( 1.0, -1.0, -1.0, 1.0 ); const vec4 Ao = vec4( 1.0, -1.0, -1.0, 1.0 );
const vec4 Bo = vec4( 1.0, 1.0, -1.0,-1.0 ); const vec4 Bo = vec4( 1.0, 1.0, -1.0,-1.0 );
const vec4 Co = vec4( 1.5, 0.5, -0.5, 0.5 ); const vec4 Co = vec4( 1.5, 0.5, -0.5, 0.5 );
const vec4 Ax = vec4( 1.0, -1.0, -1.0, 1.0 ); const vec4 Ax = vec4( 1.0, -1.0, -1.0, 1.0 );
const vec4 Bx = vec4( 0.5, 2.0, -0.5,-2.0 ); const vec4 Bx = vec4( 0.5, 2.0, -0.5,-2.0 );
const vec4 Cx = vec4( 1.0, 1.0, -0.5, 0.0 ); const vec4 Cx = vec4( 1.0, 1.0, -0.5, 0.0 );
const vec4 Ay = vec4( 1.0, -1.0, -1.0, 1.0 ); const vec4 Ay = vec4( 1.0, -1.0, -1.0, 1.0 );
const vec4 By = vec4( 2.0, 0.5, -2.0,-0.5 ); const vec4 By = vec4( 2.0, 0.5, -2.0,-0.5 );
const vec4 Cy = vec4( 2.0, 0.0, -1.0, 0.5 ); const vec4 Cy = vec4( 2.0, 0.0, -1.0, 0.5 );
const vec4 Ci = vec4(0.25, 0.25, 0.25, 0.25); const vec4 Ci = vec4(0.25, 0.25, 0.25, 0.25);
const vec3 Y = vec3(0.2126, 0.7152, 0.0722); // rec.709 luma weights const vec3 Y = vec3(0.2126, 0.7152, 0.0722); // rec.709 luma weights
// Difference between vector components. // Difference between vector components.
vec4 df(vec4 A, vec4 B) vec4 df(vec4 A, vec4 B) {
{ return vec4(abs(A-B));
return vec4(abs(A-B));
} }
// Compare two vectors and return their components are different. // Compare two vectors and return their components are different.
vec4 diff(vec4 A, vec4 B) vec4 diff(vec4 A, vec4 B) {
{ return vec4(notEqual(A, B));
return vec4(notEqual(A, B));
} }
// Determine if two vector components are equal based on a threshold. // Determine if two vector components are equal based on a threshold.
vec4 eq(vec4 A, vec4 B) vec4 eq(vec4 A, vec4 B) {
{ return (step(df(A, B), vec4(XBR_EQ_THRESHOLD)));
return (step(df(A, B), vec4(XBR_EQ_THRESHOLD)));
} }
// Determine if two vector components are NOT equal based on a threshold. // Determine if two vector components are NOT equal based on a threshold.
vec4 neq(vec4 A, vec4 B) vec4 neq(vec4 A, vec4 B) {
{ return (vec4(1.0, 1.0, 1.0, 1.0) - eq(A, B));
return (vec4(1.0, 1.0, 1.0, 1.0) - eq(A, B));
} }
// Weighted distance. // Weighted distance.
vec4 wd(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h) vec4 wd(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h) {
{ return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + 4.0*df(g,h));
return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + 4.0*df(g,h));
} }
vec4 weighted_distance(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h, vec4 i, vec4 j, vec4 k, vec4 l) vec4 weighted_distance(vec4 a, vec4 b, vec4 c, vec4 d, vec4 e, vec4 f, vec4 g, vec4 h, vec4 i, vec4 j, vec4 k, vec4 l) {
{ return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + df(i,j) + df(k,l) + 2.0*df(g,h));
return (df(a,b) + df(a,c) + df(d,e) + df(d,f) + df(i,j) + df(k,l) + 2.0*df(g,h));
} }
float c_df(vec3 c1, vec3 c2) float c_df(vec3 c1, vec3 c2) {
{ vec3 df = abs(c1 - c2);
vec3 df = abs(c1 - c2); return df.r + df.g + df.b;
return df.r + df.g + df.b;
} }
#include scale/xbr_lv2_common.glsl #include scale/xbr_lv2_common.glsl
// xBR-level2 upscaler. Level 2 means it detects edges in 2 directions, instead of just 1 in the most basic form of the algorithm. // xBR-level2 upscaler. Level 2 means it detects edges in 2 directions, instead of just 1 in the most basic form of the algorithm.
// This improves quality by a good bit without adding too much complexity compared to available level-3 and level-4 algorithms. // This improves quality by a good bit without adding too much complexity compared to available level-3 and level-4 algorithms.
vec4 textureXBR(sampler2D image, vec2 texCoord, XBRTable t, float scale) vec4 textureXBR(sampler2D image, vec2 texCoord, XBRTable t, float scale) {
{ vec4 delta = vec4(1.0/scale, 1.0/scale, 1.0/scale, 1.0/scale);
vec4 delta = vec4(1.0/scale, 1.0/scale, 1.0/scale, 1.0/scale); vec4 delta_l = vec4(0.5/scale, 1.0/scale, 0.5/scale, 1.0/scale);
vec4 delta_l = vec4(0.5/scale, 1.0/scale, 0.5/scale, 1.0/scale); vec4 delta_u = delta_l.yxwz;
vec4 delta_u = delta_l.yxwz;
vec2 textureDimensions = textureSize(image, 0); vec2 textureDimensions = textureSize(image, 0);
vec4 edri, edr, edr_l, edr_u, px; // px = pixel, edr = edge detection rule vec4 edri, edr, edr_l, edr_u, px; // px = pixel, edr = edge detection rule
vec4 irlv0, irlv1, irlv2l, irlv2u, block_3d; vec4 irlv0, irlv1, irlv2l, irlv2u, block_3d;
vec4 fx, fx_l, fx_u; // inequations of straight lines. vec4 fx, fx_l, fx_u; // inequations of straight lines.
vec2 fp = fract(texCoord*textureDimensions); vec2 fp = fract(texCoord*textureDimensions);
// A1 B1 C1 // A1 B1 C1
// A0 A B C C4 // A0 A B C C4
// D0 D E F F4 // D0 D E F F4
// G0 G H I I4 // G0 G H I I4
// G5 H5 I5 // G5 H5 I5
vec4 A1 = texture(image, t.t1.xw ); vec4 A1 = texture(image, t.t1.xw );
vec4 B1 = texture(image, t.t1.yw ); vec4 B1 = texture(image, t.t1.yw );
vec4 C1 = texture(image, t.t1.zw ); vec4 C1 = texture(image, t.t1.zw );
vec4 A = texture(image, t.t2.xw ); vec4 A = texture(image, t.t2.xw );
vec4 B = texture(image, t.t2.yw ); vec4 B = texture(image, t.t2.yw );
vec4 C = texture(image, t.t2.zw ); vec4 C = texture(image, t.t2.zw );
vec4 D = texture(image, t.t3.xw ); vec4 D = texture(image, t.t3.xw );
vec4 E = texture(image, t.t3.yw ); vec4 E = texture(image, t.t3.yw );
vec4 F = texture(image, t.t3.zw ); vec4 F = texture(image, t.t3.zw );
vec4 G = texture(image, t.t4.xw ); vec4 G = texture(image, t.t4.xw );
vec4 H = texture(image, t.t4.yw ); vec4 H = texture(image, t.t4.yw );
vec4 I = texture(image, t.t4.zw ); vec4 I = texture(image, t.t4.zw );
vec4 G5 = texture(image, t.t5.xw ); vec4 G5 = texture(image, t.t5.xw );
vec4 H5 = texture(image, t.t5.yw ); vec4 H5 = texture(image, t.t5.yw );
vec4 I5 = texture(image, t.t5.zw ); vec4 I5 = texture(image, t.t5.zw );
vec4 A0 = texture(image, t.t6.xy ); vec4 A0 = texture(image, t.t6.xy );
vec4 D0 = texture(image, t.t6.xz ); vec4 D0 = texture(image, t.t6.xz );
vec4 G0 = texture(image, t.t6.xw ); vec4 G0 = texture(image, t.t6.xw );
vec4 C4 = texture(image, t.t7.xy ); vec4 C4 = texture(image, t.t7.xy );
vec4 F4 = texture(image, t.t7.xz ); vec4 F4 = texture(image, t.t7.xz );
vec4 I4 = texture(image, t.t7.xw ); vec4 I4 = texture(image, t.t7.xw );
vec4 b = vec4(dot(B.xyz ,rgbw), dot(D.xyz ,rgbw), dot(H.xyz ,rgbw), dot(F.xyz ,rgbw)); vec4 b = vec4(dot(B.xyz ,rgbw), dot(D.xyz ,rgbw), dot(H.xyz ,rgbw), dot(F.xyz ,rgbw));
vec4 c = vec4(dot(C.xyz ,rgbw), dot(A.xyz ,rgbw), dot(G.xyz ,rgbw), dot(I.xyz ,rgbw)); vec4 c = vec4(dot(C.xyz ,rgbw), dot(A.xyz ,rgbw), dot(G.xyz ,rgbw), dot(I.xyz ,rgbw));
vec4 d = b.yzwx; vec4 d = b.yzwx;
vec4 e = vec4(dot(E.xyz,rgbw)); vec4 e = vec4(dot(E.xyz,rgbw));
vec4 f = b.wxyz; vec4 f = b.wxyz;
vec4 g = c.zwxy; vec4 g = c.zwxy;
vec4 h = b.zwxy; vec4 h = b.zwxy;
vec4 i = c.wxyz; vec4 i = c.wxyz;
vec4 i4, i5, h5, f4; vec4 i4, i5, h5, f4;
float y_weight = XBR_Y_WEIGHT; float y_weight = XBR_Y_WEIGHT;
if (small_details < 0.5) if (small_details < 0.5) {
{ i4 = vec4(dot(I4.xyz,rgbw), dot(C1.xyz,rgbw), dot(A0.xyz,rgbw), dot(G5.xyz,rgbw));
i4 = vec4(dot(I4.xyz,rgbw), dot(C1.xyz,rgbw), dot(A0.xyz,rgbw), dot(G5.xyz,rgbw)); i5 = vec4(dot(I5.xyz,rgbw), dot(C4.xyz,rgbw), dot(A1.xyz,rgbw), dot(G0.xyz,rgbw));
i5 = vec4(dot(I5.xyz,rgbw), dot(C4.xyz,rgbw), dot(A1.xyz,rgbw), dot(G0.xyz,rgbw)); h5 = vec4(dot(H5.xyz,rgbw), dot(F4.xyz,rgbw), dot(B1.xyz,rgbw), dot(D0.xyz,rgbw));
h5 = vec4(dot(H5.xyz,rgbw), dot(F4.xyz,rgbw), dot(B1.xyz,rgbw), dot(D0.xyz,rgbw)); } else {
} i4 = mul(mat4x3(I4.xyz, C1.xyz, A0.xyz, G5.xyz), y_weight * Y);
else i5 = mul(mat4x3(I5.xyz, C4.xyz, A1.xyz, G0.xyz), y_weight * Y);
{ h5 = mul(mat4x3(H5.xyz, F4.xyz, B1.xyz, D0.xyz), y_weight * Y);
i4 = mul( mat4x3(I4.xyz, C1.xyz, A0.xyz, G5.xyz), y_weight * Y ); }
i5 = mul( mat4x3(I5.xyz, C4.xyz, A1.xyz, G0.xyz), y_weight * Y );
h5 = mul( mat4x3(H5.xyz, F4.xyz, B1.xyz, D0.xyz), y_weight * Y );
}
// These inequations define the line below which interpolation occurs. // These inequations define the line below which interpolation occurs.
fx = (Ao*fp.y+Bo*fp.x); fx = (Ao*fp.y+Bo*fp.x);
fx_l = (Ax*fp.y+Bx*fp.x); fx_l = (Ax*fp.y+Bx*fp.x);
fx_u = (Ay*fp.y+By*fp.x); fx_u = (Ay*fp.y+By*fp.x);
// corner detection // corner detection
irlv1 = irlv0 = diff(e,f) * diff(e,h); irlv1 = irlv0 = diff(e,f) * diff(e,h);
#ifdef CORNER_B #ifdef CORNER_B
irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) ); irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) );
#endif #endif
#ifdef CORNER_D #ifdef CORNER_D
vec4 c1 = i4.yzwx; vec4 c1 = i4.yzwx;
vec4 g0 = i5.wxyz; vec4 g0 = i5.wxyz;
irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) * (diff(f,f4) * diff(f,i) + diff(h,h5) * diff(h,i) + diff(h,g) + diff(f,c) + eq(b,c1) * eq(d,g0))); irlv1 = (irlv0 * ( neq(f,b) * neq(h,d) + eq(e,i) * neq(f,i4) * neq(h,i5) + eq(e,g) + eq(e,c) ) * (diff(f,f4) * diff(f,i) + diff(h,h5) * diff(h,i) + diff(h,g) + diff(f,c) + eq(b,c1) * eq(d,g0)));
#endif #endif
#ifdef CORNER_C #ifdef CORNER_C
irlv1 = (irlv0 * ( neq(f,b) * neq(f,c) + neq(h,d) * neq(h,g) + eq(e,i) * (neq(f,f4) * neq(f,i4) + neq(h,h5) * neq(h,i5)) + eq(e,g) + eq(e,c)) ); irlv1 = (irlv0 * ( neq(f,b) * neq(f,c) + neq(h,d) * neq(h,g) + eq(e,i) * (neq(f,f4) * neq(f,i4) + neq(h,h5) * neq(h,i5)) + eq(e,g) + eq(e,c)) );
#endif #endif
// corner detection in the other direction // corner detection in the other direction
irlv2l = diff(e,g) * diff(d,g); irlv2l = diff(e,g) * diff(d,g);
irlv2u = diff(e,c) * diff(b,c); irlv2u = diff(e,c) * diff(b,c);
vec4 fx45i = clamp((fx + delta -Co - Ci)/(2.0*delta ), 0.0, 1.0); vec4 fx45i = clamp((fx + delta -Co - Ci)/(2.0*delta ), 0.0, 1.0);
vec4 fx45 = clamp((fx + delta -Co )/(2.0*delta ), 0.0, 1.0); vec4 fx45 = clamp((fx + delta -Co )/(2.0*delta ), 0.0, 1.0);
vec4 fx30 = clamp((fx_l + delta_l -Cx )/(2.0*delta_l), 0.0, 1.0); vec4 fx30 = clamp((fx_l + delta_l -Cx )/(2.0*delta_l), 0.0, 1.0);
vec4 fx60 = clamp((fx_u + delta_u -Cy )/(2.0*delta_u), 0.0, 1.0); vec4 fx60 = clamp((fx_u + delta_u -Cy )/(2.0*delta_u), 0.0, 1.0);
vec4 wd1, wd2; vec4 wd1, wd2;
if (small_details < 0.5) if (small_details < 0.5) {
{ wd1 = wd( e, c, g, i, h5, f4, h, f);
wd1 = wd( e, c, g, i, h5, f4, h, f); wd2 = wd( h, d, i5, f, i4, b, e, i);
wd2 = wd( h, d, i5, f, i4, b, e, i); } else {
} wd1 = weighted_distance( e, c, g, i, f4, h5, h, f, b, d, i4, i5);
else wd2 = weighted_distance( h, d, i5, f, b, i4, e, i, g, h5, c, f4);
{ }
wd1 = weighted_distance( e, c, g, i, f4, h5, h, f, b, d, i4, i5);
wd2 = weighted_distance( h, d, i5, f, b, i4, e, i, g, h5, c, f4);
}
edri = step(wd1, wd2) * irlv0; edri = step(wd1, wd2) * irlv0;
edr = step(wd1 + vec4(0.1, 0.1, 0.1, 0.1), wd2) * step(vec4(0.5, 0.5, 0.5, 0.5), irlv1); edr = step(wd1 + vec4(0.1, 0.1, 0.1, 0.1), wd2) * step(vec4(0.5, 0.5, 0.5, 0.5), irlv1);
edr_l = step( lv2_cf*df(f,g), df(h,c) ) * irlv2l * edr; edr_l = step( lv2_cf*df(f,g), df(h,c) ) * irlv2l * edr;
edr_u = step( lv2_cf*df(h,c), df(f,g) ) * irlv2u * edr; edr_u = step( lv2_cf*df(h,c), df(f,g) ) * irlv2u * edr;
fx45 = edr * fx45; fx45 = edr * fx45;
fx30 = edr_l * fx30; fx30 = edr_l * fx30;
fx60 = edr_u * fx60; fx60 = edr_u * fx60;
fx45i = edri * fx45i; fx45i = edri * fx45i;
px = step(df(e,f), df(e,h)); px = step(df(e,f), df(e,h));
#ifdef SMOOTH_TIPS #ifdef SMOOTH_TIPS
vec4 maximos = max(max(fx30, fx60), max(fx45, fx45i)); vec4 maximos = max(max(fx30, fx60), max(fx45, fx45i));
#endif #endif
#ifndef SMOOTH_TIPS #ifndef SMOOTH_TIPS
vec4 maximos = max(max(fx30, fx60), fx45); vec4 maximos = max(max(fx30, fx60), fx45);
#endif #endif
vec4 res1 = E; vec4 res1 = E;
res1 = mix(res1, mix(H, F, px.x), maximos.x); res1 = mix(res1, mix(H, F, px.x), maximos.x);
res1 = mix(res1, mix(B, D, px.z), maximos.z); res1 = mix(res1, mix(B, D, px.z), maximos.z);
vec4 res2 = E; vec4 res2 = E;
res2 = mix(res2, mix(F, B, px.y), maximos.y); res2 = mix(res2, mix(F, B, px.y), maximos.y);
res2 = mix(res2, mix(D, H, px.w), maximos.w); res2 = mix(res2, mix(D, H, px.w), maximos.w);
vec4 res = mix(res1, res2, step(c_df(E.xyz, res1.xyz), c_df(E.xyz, res2.xyz))); vec4 res = mix(res1, res2, step(c_df(E.xyz, res1.xyz), c_df(E.xyz, res2.xyz)));
return res; return res;
} }

View File

@@ -26,29 +26,28 @@
#include scale/xbr_lv2_common.glsl #include scale/xbr_lv2_common.glsl
XBRTable xbr_vert(vec2 texCoord, ivec2 sourceDimensions) XBRTable xbr_vert(vec2 texCoord, ivec2 sourceDimensions) {
{ float dx = (1.0/sourceDimensions.x);
float dx = (1.0/sourceDimensions.x); float dy = (1.0/sourceDimensions.y);
float dy = (1.0/sourceDimensions.y);
// Define coordinates to optimize later fetching of adjacent pixels // Define coordinates to optimize later fetching of adjacent pixels
// A1 B1 C1 // A1 B1 C1
// A0 A B C C4 // A0 A B C C4
// D0 D E F F4 // D0 D E F F4
// G0 G H I I4 // G0 G H I I4
// G5 H5 I5 // G5 H5 I5
XBRTable tab = XBRTable( XBRTable tab = XBRTable(
texCoord, texCoord,
texCoord.xxxy + vec4( -dx, 0, dx,-2.0*dy), // A1 B1 C1 texCoord.xxxy + vec4( -dx, 0, dx,-2.0*dy), // A1 B1 C1
texCoord.xxxy + vec4( -dx, 0, dx, -dy), // A B C texCoord.xxxy + vec4( -dx, 0, dx, -dy), // A B C
texCoord.xxxy + vec4( -dx, 0, dx, 0), // D E F texCoord.xxxy + vec4( -dx, 0, dx, 0), // D E F
texCoord.xxxy + vec4( -dx, 0, dx, dy), // G H I texCoord.xxxy + vec4( -dx, 0, dx, dy), // G H I
texCoord.xxxy + vec4( -dx, 0, dx, 2.0*dy), // G5 H5 I5 texCoord.xxxy + vec4( -dx, 0, dx, 2.0*dy), // G5 H5 I5
texCoord.xyyy + vec4(-2.0*dx,-dy, 0, dy), // A0 D0 G0 texCoord.xyyy + vec4(-2.0*dx,-dy, 0, dy), // A0 D0 G0
texCoord.xyyy + vec4( 2.0*dx,-dy, 0, dy) // C4 F4 I4 texCoord.xyyy + vec4( 2.0*dx,-dy, 0, dy) // C4 F4 I4
); );
tab.texCoord.x *= 1.00000001; tab.texCoord.x *= 1.00000001;
return tab; return tab;
} }