-
-
Save agyild/bbb4e58298b2f86aa24da3032a0d2ee6 to your computer and use it in GitHub Desktop.
// LICENSE | |
// ======= | |
// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. | |
// ------- | |
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation | |
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, | |
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the | |
// Software is furnished to do so, subject to the following conditions: | |
// ------- | |
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the | |
// Software. | |
// ------- | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | |
// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | |
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
// FidelityFX CAS v1.0.2 by AMD | |
// ported to mpv by agyild | |
// Changelog | |
// Optimized texture lookups for OpenGL 4.0+, DirectX 10+, and OpenGL ES 3.1+ | |
// Changed rcp + mul operations to div for better clarity when CAS_GO_SLOWER is set to 1, since the compiler should automatically | |
// optimize those instructions anyway. | |
// Made it directly operate on LUMA plane, since the original shader was operating on LUMA by deriving it from RGB. This should | |
// cause a major increase in performance, especially on OpenGL 4.0+ renderers (4 texture lookups vs. 16) | |
// Removed transparency preservation mechanism since the alpha channel is a separate source plan than LUMA | |
// Added custom gamma curve support for relinearization | |
// Removed final blending between the original and the sharpened pixels since it was redundant | |
// | |
// Notes | |
// Per AMD's guidelines only upscales content up to 4x (e.g., 1080p -> 2160p, 720p -> 1440p etc.) and everything else in between, | |
// that means CAS will scale up to 4x at maximum, and any further scaling will be processed by mpv's scalers | |
// | |
// The filter is designed to run in linear light, and does have an optional relinerization and delinearization pass which | |
// assumes BT.1886 content by default. Do not forget to change SOURCE_TRC and TARGET_TRC variables depending | |
// on what kind of content the filter is running on. You might want to create seperate versions of the file with different | |
// colorspace values, and apply them via autoprofiles. Note that running in non-linear light will result in oversharpening. | |
//!HOOK LUMA | |
//!BIND HOOKED | |
//!DESC FidelityFX Upsampling and Sharpening v1.0.2 (Relinearization) | |
//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > | |
// User variables - Relinearization | |
// Compatibility | |
#define SOURCE_TRC 4 // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom | |
#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when SOURCE_TRC is 6. | |
// Shader code | |
float From709(float rec709) { | |
return max(min(rec709 / float(4.5), float(0.081)), pow((rec709 + float(0.099)) / float(1.099), float(1.0 / 0.45))); | |
} | |
float FromPq(float pq) { | |
float p = pow(pq, float(0.0126833)); | |
return (pow(clamp(p - float(0.835938), 0.0, 1.0) / (float(18.8516) - float(18.6875) * p), float(6.27739))); | |
} | |
float FromSrgb(float srgb) { | |
return max(min(srgb / 12.92, float(0.04045)), pow((srgb + float(0.055)) / float(1.055), float(2.4))); | |
} | |
float FromHlg(float hlg) { | |
const float a = 0.17883277; | |
const float b = 0.28466892; | |
const float c = 0.55991073; | |
float linear; | |
if (hlg >= 0.0 && hlg <= 0.5) { | |
linear = pow(hlg, 2.0) / 3.0; | |
} else { | |
linear = (exp((hlg - c) / a) + b) / 12.0; | |
} | |
return linear; | |
} | |
vec4 hook() { | |
vec4 col = HOOKED_tex(HOOKED_pos); | |
col.r = clamp(col.r, 0.0, 1.0); | |
#if (SOURCE_TRC == 1) | |
col.r = From709(col.r); | |
#elif (SOURCE_TRC == 2) | |
col.r = FromPq(col.r); | |
#elif (SOURCE_TRC == 3) | |
col.r = FromSrgb(col.r); | |
#elif (SOURCE_TRC == 4) | |
col.r = pow(col.r, float(2.4)); | |
#elif (SOURCE_TRC == 5) | |
col.r = FromHlg(col.r); | |
#elif (SOURCE_TRC == 6) | |
col.r = pow(col.r, float(CUSTOM_GAMMA)); | |
#endif | |
return col; | |
} | |
//!HOOK LUMA | |
//!BIND HOOKED | |
//!DESC FidelityFX Upsampling and Sharpening v1.0.2 | |
//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > | |
//!WIDTH OUTPUT.w OUTPUT.w LUMA.w 2 * < * LUMA.w 2 * OUTPUT.w LUMA.w 2 * > * + OUTPUT.w OUTPUT.w LUMA.w 2 * = * + | |
//!HEIGHT OUTPUT.h OUTPUT.h LUMA.h 2 * < * LUMA.h 2 * OUTPUT.h LUMA.h 2 * > * + OUTPUT.h OUTPUT.h LUMA.h 2 * = * + | |
// User variables - Upsampling and Sharpening | |
// Intensity | |
#define SHARPENING 0.0 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. | |
// Performance | |
#define CAS_BETTER_DIAGONALS 1 // If set to 0, drops certain math and texture lookup operations for better performance. This is only useful on pre-OpenGL 4.0 renderers and there is no need to disable it otherwise. 0 or 1. | |
#define CAS_GO_SLOWER 0 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. | |
// Compatibility | |
#define TARGET_TRC 4 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom | |
#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when TARGET_TRC is 6. | |
// Shader code | |
float To709(float linear) { | |
return max(min(linear * float(4.5), float(0.018)), float(1.099) * pow(linear, float(0.45)) - float(0.099)); | |
} | |
float ToPq(float linear) { | |
float p = pow(linear, float(0.159302)); | |
return pow((float(0.835938) + float(18.8516) * p) / (float(1.0) + float(18.6875) * p), float(78.8438)); | |
} | |
float ToSrgb(float linear) { | |
return max(min(linear * float(12.92), float(0.0031308)), float(1.055) * pow(linear, float(0.41666)) - float(0.055)); | |
} | |
float ToHlg(float linear) { | |
const float a = 0.17883277; | |
const float b = 0.28466892; | |
const float c = 0.55991073; | |
float hlg; | |
if (linear <= 1.0 / 12.0) { | |
hlg = sqrt(3.0 * linear); | |
} else { | |
hlg = a * log(12.0 * linear - b) + c; | |
} | |
return hlg; | |
} | |
#if (CAS_GO_SLOWER == 0) | |
float APrxLoSqrtF1(float a) { | |
return uintBitsToFloat((floatBitsToUint(a) >> uint(1)) + uint(0x1fbc4639)); | |
} | |
float APrxLoRcpF1(float a) { | |
return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a)); | |
} | |
float APrxMedRcpF1(float a) { | |
float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a)); | |
return b * (-b * a + float(2.0)); | |
} | |
#endif | |
vec4 hook() | |
{ | |
// Scaling algorithm adaptively interpolates between nearest 4 results of the non-scaling algorithm. | |
// a b c d | |
// e f g h | |
// i j k l | |
// m n o p | |
// Working these 4 results. | |
// +-----+-----+ | |
// | | | | |
// | f..|..g | | |
// | . | . | | |
// +-----+-----+ | |
// | . | . | | |
// | j..|..k | | |
// | | | | |
// +-----+-----+ | |
vec2 pp = HOOKED_pos * HOOKED_size - 0.5; | |
vec2 fp = floor(pp); | |
pp -= fp; | |
#if (defined(HOOKED_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310))) | |
vec4 abef = HOOKED_gather(vec2((fp - vec2(0.5)) * HOOKED_pt), 0); | |
float b = abef.z; | |
float e = abef.x; | |
float f = abef.y; | |
vec4 cdgh = HOOKED_gather(vec2((fp + vec2(1.5, -0.5)) * HOOKED_pt), 0); | |
float c = cdgh.w; | |
float g = cdgh.x; | |
float h = cdgh.y; | |
vec4 ijmn = HOOKED_gather(vec2((fp + vec2(-0.5, 1.5)) * HOOKED_pt), 0); | |
float i = ijmn.w; | |
float j = ijmn.z; | |
float n = ijmn.y; | |
vec4 klop = HOOKED_gather(vec2((fp + vec2(1.5)) * HOOKED_pt), 0); | |
float k = klop.w; | |
float l = klop.z; | |
float o = klop.x; | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float a = abef.w; | |
float d = cdgh.z; | |
float m = ijmn.x; | |
float p = klop.y; | |
#endif | |
#else | |
ivec2 sp = ivec2(fp); | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float a = texelFetch(HOOKED_raw, sp + ivec2(-1, -1), 0).r * HOOKED_mul; | |
float d = texelFetch(HOOKED_raw, sp + ivec2( 2, -1), 0).r * HOOKED_mul; | |
float m = texelFetch(HOOKED_raw, sp + ivec2(-1, 2), 0).r * HOOKED_mul; | |
float p = texelFetch(HOOKED_raw, sp + ivec2( 2, 2), 0).r * HOOKED_mul; | |
#endif | |
float b = texelFetch(HOOKED_raw, sp + ivec2( 0, -1), 0).r * HOOKED_mul; | |
float e = texelFetch(HOOKED_raw, sp + ivec2(-1, 0), 0).r * HOOKED_mul; | |
float f = texelFetch(HOOKED_raw, sp , 0).r * HOOKED_mul; | |
float c = texelFetch(HOOKED_raw, sp + ivec2( 1, -1), 0).r * HOOKED_mul; | |
float g = texelFetch(HOOKED_raw, sp + ivec2( 1, 0), 0).r * HOOKED_mul; | |
float h = texelFetch(HOOKED_raw, sp + ivec2( 2, 0), 0).r * HOOKED_mul; | |
float i = texelFetch(HOOKED_raw, sp + ivec2(-1, 1), 0).r * HOOKED_mul; | |
float j = texelFetch(HOOKED_raw, sp + ivec2( 0, 1), 0).r * HOOKED_mul; | |
float n = texelFetch(HOOKED_raw, sp + ivec2( 0, 2), 0).r * HOOKED_mul; | |
float k = texelFetch(HOOKED_raw, sp + ivec2( 1, 1), 0).r * HOOKED_mul; | |
float l = texelFetch(HOOKED_raw, sp + ivec2( 2, 1), 0).r * HOOKED_mul; | |
float o = texelFetch(HOOKED_raw, sp + ivec2( 1, 2), 0).r * HOOKED_mul; | |
#endif | |
// Soft min and max. | |
// These are 2.0x bigger (factored out the extra multiply). | |
// a b c b | |
// e f g * 0.5 + e f g * 0.5 [F] | |
// i j k j | |
float mnfL = min(min(b, min(e, f)), min(g, j)); | |
float mxfL = max(max(b, max(e, f)), max(g, j)); | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float mnfL2 = min(min(mnfL, min(a, c)), min(i, k)); | |
mnfL += mnfL2; | |
float mxfL2 = max(max(mxfL, max(a, c)), max(i, k)); | |
mxfL += mxfL2; | |
#endif | |
// b c d c | |
// f g h * 0.5 + f g h * 0.5 [G] | |
// j k l k | |
float mngL = min(min(c, min(f, g)), min(h, k)); | |
float mxgL = max(max(c, max(f, g)), max(h, k)); | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float mngL2 = min(min(mngL, min(b, d)), min(j, l)); | |
mngL += mngL2; | |
float mxgL2 = max(max(mxgL, max(b, d)), max(j, l)); | |
mxgL += mxgL2; | |
#endif | |
// e f g f | |
// i j k * 0.5 + i j k * 0.5 [J] | |
// m n o n | |
float mnjL = min(min(f, min(i, j)), min(k, n)); | |
float mxjL = max(max(f, max(i, j)), max(k, n)); | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float mnjL2 = min(min(mnjL, min(e, g)), min(m, o)); | |
mnjL += mnjL2; | |
float mxjL2 = max(max(mxjL, max(e, g)), max(m, o)); | |
mxjL += mxjL2; | |
#endif | |
// f g h g | |
// j k l * 0.5 + j k l * 0.5 [K] | |
// n o p o | |
float mnkL = min(min(g, min(j, k)), min(l, o)); | |
float mxkL = max(max(g, max(j, k)), max(l, o)); | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float mnkL2 = min(min(mnkL, min(f, h)), min(n, p)); | |
mnkL += mnkL2; | |
float mxkL2 = max(max(mxkL, max(f, h)), max(n, p)); | |
mxkL += mxkL2; | |
#endif | |
// Smooth minimum distance to signal limit divided by smooth max. | |
const float bdval = bool(CAS_BETTER_DIAGONALS) ? 2.0 : 1.0; | |
#if (CAS_GO_SLOWER == 1) | |
float ampfL = clamp(min(mnfL, bdval - mxfL) / mxfL, 0.0, 1.0); | |
float ampgL = clamp(min(mngL, bdval - mxgL) / mxgL, 0.0, 1.0); | |
float ampjL = clamp(min(mnjL, bdval - mxjL) / mxjL, 0.0, 1.0); | |
float ampkL = clamp(min(mnkL, bdval - mxkL) / mxkL, 0.0, 1.0); | |
#else | |
float ampfL = clamp(min(mnfL, bdval - mxfL) * APrxLoRcpF1(mxfL), 0.0, 1.0); | |
float ampgL = clamp(min(mngL, bdval - mxgL) * APrxLoRcpF1(mxgL), 0.0, 1.0); | |
float ampjL = clamp(min(mnjL, bdval - mxjL) * APrxLoRcpF1(mxjL), 0.0, 1.0); | |
float ampkL = clamp(min(mnkL, bdval - mxkL) * APrxLoRcpF1(mxkL), 0.0, 1.0); | |
#endif | |
// Shaping amount of sharpening. | |
#if (CAS_GO_SLOWER == 1) | |
ampfL = sqrt(ampfL); | |
ampgL = sqrt(ampgL); | |
ampjL = sqrt(ampjL); | |
ampkL = sqrt(ampkL); | |
#else | |
ampfL = APrxLoSqrtF1(ampfL); | |
ampgL = APrxLoSqrtF1(ampgL); | |
ampjL = APrxLoSqrtF1(ampjL); | |
ampkL = APrxLoSqrtF1(ampkL); | |
#endif | |
// Filter shape. | |
// 0 w 0 | |
// w 1 w | |
// 0 w 0 | |
const float peak = -(mix(8.0, 5.0, clamp(SHARPENING, 0.0, 1.0))); | |
float wfL = ampfL / peak; | |
float wgL = ampgL / peak; | |
float wjL = ampjL / peak; | |
float wkL = ampkL / peak; | |
// Blend between 4 results. | |
// s t | |
// u v | |
float s = (1.0 - pp.x) * (1.0 - pp.y); | |
float t = pp.x * (1.0 - pp.y); | |
float u = (1.0 - pp.x) * pp.y; | |
float v = pp.x * pp.y; | |
// Thin edges to hide bilinear interpolation (helps diagonals). | |
const float thinB = 0.03125; // 1.0 / 32.0 | |
#if (CAS_GO_SLOWER == 1) | |
s /= thinB + mxfL - mnfL; | |
t /= thinB + mxgL - mngL; | |
u /= thinB + mxjL - mnjL; | |
v /= thinB + mxkL - mnkL; | |
#else | |
s *= APrxLoRcpF1(thinB + mxfL - mnfL); | |
t *= APrxLoRcpF1(thinB + mxgL - mngL); | |
u *= APrxLoRcpF1(thinB + mxjL - mnjL); | |
v *= APrxLoRcpF1(thinB + mxkL - mnkL); | |
#endif | |
// Final weighting. | |
// b c | |
// e f g h | |
// i j k l | |
// n o | |
// _____ _____ _____ _____ | |
// fs gt | |
// | |
// _____ _____ _____ _____ | |
// fs s gt fs t gt | |
// ju kv | |
// _____ _____ _____ _____ | |
// fs gt | |
// ju u kv ju v kv | |
// _____ _____ _____ _____ | |
// | |
// ju kv | |
float qbeL = wfL * s; | |
float qchL = wgL * t; | |
float qfL = wgL * t + wjL * u + s; | |
float qgL = wfL * s + wkL * v + t; | |
float qjL = wfL * s + wkL * v + u; | |
float qkL = wgL * t + wjL * u + v; | |
float qinL = wjL * u; | |
float qloL = wkL * v; | |
// Filter. | |
vec4 pix = vec4(0.0, 0.0, 0.0, 1.0); | |
float W = 2.0 * qbeL + 2.0 * qchL + 2.0 * qinL + 2.0 * qloL + qfL + qgL + qjL + qkL; | |
pix.r = b * qbeL + e * qbeL + c * qchL + h * qchL + i * qinL + n * qinL + l * qloL + o * qloL + f * qfL + g * qgL + j * qjL + k * qkL; | |
#if (CAS_GO_SLOWER == 1) | |
pix.r /= W; | |
#else | |
pix.r *= APrxMedRcpF1(W); | |
#endif | |
pix.r = clamp(pix.r, 0.0, 1.0); | |
#if (TARGET_TRC == 1) | |
pix.r = To709(pix.r); | |
#elif (TARGET_TRC == 2) | |
pix.r = ToPq(pix.r); | |
#elif (TARGET_TRC == 3) | |
pix.r = ToSrgb(pix.r); | |
#elif (TARGET_TRC == 4) | |
pix.r = pow(pix.r, float(1.0 / 2.4)); | |
#elif (TARGET_TRC == 5) | |
pix.r = ToHlg(pix.r); | |
#elif (TARGET_TRC == 6) | |
pix.r = pow(pix.r, float(1.0 / CUSTOM_GAMMA)); | |
#endif | |
return pix; | |
} |
// LICENSE | |
// ======= | |
// Copyright (c) 2017-2019 Advanced Micro Devices, Inc. All rights reserved. | |
// ------- | |
// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation | |
// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, | |
// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the | |
// Software is furnished to do so, subject to the following conditions: | |
// ------- | |
// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the | |
// Software. | |
// ------- | |
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE | |
// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR | |
// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, | |
// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | |
// FidelityFX CAS by AMD | |
// ported to mpv by agyild | |
// Changelog | |
// Optimized texture lookups for OpenGL 4.0+, DirectX 10+, and OpenGL ES 3.1+ (9 -> 4). | |
// Changed rcp + mul operations to div for better clarity when CAS_GO_SLOWER is set to 1, since the compiler should automatically | |
// optimize those instructions anyway. | |
// Made it directly operate on LUMA plane, since the original shader was operating on LUMA by deriving it from RGB. This should | |
// cause a major increase in performance, especially on OpenGL 4.0+ renderers (4 texture lookups vs. 9) | |
// Removed transparency preservation mechanism since the alpha channel is a separate source plan than LUMA | |
// Added custom gamma curve support for relinearization | |
// Removed final blending between the original and the sharpened pixels since it was redundant | |
// | |
// Notes | |
// The filter is designed to run in linear light, and does have an optional relinerization and delinearization pass which | |
// assumes BT.1886 content by default. Do not forget to change SOURCE_TRC and TARGET_TRC variables depending | |
// on what kind of content the filter is running on. You might want to create seperate versions of the file with different | |
// colorspace values, and apply them via autoprofiles. Note that running in non-linear light will result in oversharpening. | |
// | |
// By default the shader only runs on non-scaled content since it is designed for use without scaling, if the content is | |
// scaled you should probably use CAS-scaled.glsl instead. However this behavior can be overriden by changing the WHEN | |
// directives with "OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 < !" which allows it to be used as a pre-upscale sharpener. | |
//!HOOK LUMA | |
//!BIND HOOKED | |
//!DESC FidelityFX Sharpening (Relinearization) | |
//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > ! OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 < ! * | |
// User variables - Relinearization | |
// Compatibility | |
#define SOURCE_TRC 4 // Is needed to convert from source colorspace to linear light. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom | |
#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when SOURCE_TRC is 6. | |
// Shader code | |
float From709(float rec709) { | |
return max(min(rec709 / float(4.5), float(0.081)), pow((rec709 + float(0.099)) / float(1.099), float(1.0 / 0.45))); | |
} | |
float FromPq(float pq) { | |
float p = pow(pq, float(0.0126833)); | |
return (pow(clamp(p - float(0.835938), 0.0, 1.0) / (float(18.8516) - float(18.6875) * p), float(6.27739))); | |
} | |
float FromSrgb(float srgb) { | |
return max(min(srgb / 12.92, float(0.04045)), pow((srgb + float(0.055)) / float(1.055), float(2.4))); | |
} | |
float FromHlg(float hlg) { | |
const float a = 0.17883277; | |
const float b = 0.28466892; | |
const float c = 0.55991073; | |
float linear; | |
if (hlg >= 0.0 && hlg <= 0.5) { | |
linear = pow(hlg, 2.0) / 3.0; | |
} else { | |
linear = (exp((hlg - c) / a) + b) / 12.0; | |
} | |
return linear; | |
} | |
vec4 hook() { | |
vec4 col = HOOKED_tex(HOOKED_pos); | |
col.r = clamp(col.r, 0.0, 1.0); | |
#if (SOURCE_TRC == 1) | |
col.r = From709(col.r); | |
#elif (SOURCE_TRC == 2) | |
col.r = FromPq(col.r); | |
#elif (SOURCE_TRC == 3) | |
col.r = FromSrgb(col.r); | |
#elif (SOURCE_TRC == 4) | |
col.r = pow(col.r, float(2.4)); | |
#elif (SOURCE_TRC == 5) | |
col.r = FromHlg(col.r); | |
#elif (SOURCE_TRC == 6) | |
col.r = pow(col.r, float(CUSTOM_GAMMA)); | |
#endif | |
return col; | |
} | |
//!HOOK LUMA | |
//!BIND HOOKED | |
//!DESC FidelityFX Sharpening | |
//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > ! OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 < ! * | |
// User variables | |
// Intensity | |
#define SHARPENING 0.0 // Adjusts the range the shader adapts to high contrast (0 is not all the way off). Higher values = more high contrast sharpening. 0.0 to 1.0. | |
// Performance | |
#define CAS_BETTER_DIAGONALS 1 // If set to 0, drops certain math and texture lookup operations for better performance. 0 or 1. | |
#define CAS_GO_SLOWER 0 // If set to 1, disables the use of optimized approximate transcendental functions which might slightly increase accuracy in exchange of performance. 0 or 1. | |
// Compatibility | |
#define TARGET_TRC 4 // Is needed to convert from source colorspace to target colorspace. 0 = None (Skip conversion), 1 = Rec709, 2 = PQ, 3 = sRGB, 4 = BT.1886, 5 = HLG, 6 = Custom | |
#define CUSTOM_GAMMA 2.2 // Custom power gamma curve to use if and when TARGET_TRC is 6. | |
// Shader code | |
float To709(float linear) { | |
return max(min(linear * float(4.5), float(0.018)), float(1.099) * pow(linear, float(0.45)) - float(0.099)); | |
} | |
float ToPq(float linear) { | |
float p = pow(linear, float(0.159302)); | |
return pow((float(0.835938) + float(18.8516) * p) / (float(1.0) + float(18.6875) * p), float(78.8438)); | |
} | |
float ToSrgb(float linear) { | |
return max(min(linear * float(12.92), float(0.0031308)), float(1.055) * pow(linear, float(0.41666)) - float(0.055)); | |
} | |
float ToHlg(float linear) { | |
const float a = 0.17883277; | |
const float b = 0.28466892; | |
const float c = 0.55991073; | |
float hlg; | |
if (linear <= 1.0 / 12.0) { | |
hlg = sqrt(3.0 * linear); | |
} else { | |
hlg = a * log(12.0 * linear - b) + c; | |
} | |
return hlg; | |
} | |
#if (CAS_GO_SLOWER == 0) | |
float APrxLoSqrtF1(float a) { | |
return uintBitsToFloat((floatBitsToUint(a) >> uint(1)) + uint(0x1fbc4639)); | |
} | |
float APrxLoRcpF1(float a) { | |
return uintBitsToFloat(uint(0x7ef07ebb) - floatBitsToUint(a)); | |
} | |
float APrxMedRcpF1(float a) { | |
float b = uintBitsToFloat(uint(0x7ef19fff) - floatBitsToUint(a)); | |
return b * (-b * a + float(2.0)); | |
} | |
#endif | |
vec4 hook() | |
{ | |
// fetch a 3x3 neighborhood around the pixel 'e', | |
// a b c | |
// d(e)f | |
// g h i | |
#if (defined(HOOKED_gather) && (__VERSION__ >= 400 || (GL_ES && __VERSION__ >= 310))) | |
vec4 efhi = HOOKED_gather(vec2(HOOKED_pos + vec2(0.5) * HOOKED_pt), 0); | |
float e = efhi.w; | |
float f = efhi.z; | |
float h = efhi.x; | |
vec3 abd = HOOKED_gather(vec2(HOOKED_pos - vec2(0.5) * HOOKED_pt), 0).wzx; | |
float b = abd.y; | |
float d = abd.z; | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float a = abd.x; | |
float i = efhi.y; | |
#endif | |
#else | |
float e = HOOKED_tex(HOOKED_pos).r; | |
float f = HOOKED_texOff(vec2(1.0, 0.0)).r; | |
float h = HOOKED_texOff(vec2(0.0, 1.0)).r; | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float a = HOOKED_texOff(vec2(-1.0, -1.0)).r; | |
float i = HOOKED_texOff(vec2(1.0, 1.0)).r; | |
#endif | |
float b = HOOKED_texOff(vec2( 0.0, -1.0)).r; | |
float d = HOOKED_texOff(vec2(-1.0, 0.0)).r; | |
#endif | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float c = HOOKED_texOff(vec2( 1.0, -1.0)).r; | |
float g = HOOKED_texOff(vec2(-1.0, 1.0)).r; | |
#endif | |
// Soft min and max. | |
// a b c b | |
// d e f * 0.5 + d e f * 0.5 | |
// g h i h | |
// These are 2.0x bigger (factored out the extra multiply). | |
float mnL = min(min(min(d, e), min(f, b)), h); | |
float mxL = max(max(max(d, e), max(f, b)), h); | |
#if (CAS_BETTER_DIAGONALS == 1) | |
float mnL2 = min(mnL, min(min(a, c), min(g, i))); | |
mnL += mnL2; | |
float mxL2 = max(mxL, max(max(a, c), max(g, i))); | |
mxL += mxL2; | |
#endif | |
// Smooth minimum distance to signal limit divided by smooth max. | |
const float bdval = bool(CAS_BETTER_DIAGONALS) ? 2.0 : 1.0; | |
#if (CAS_GO_SLOWER == 1) | |
float ampL = clamp(min(mnL, bdval - mxL) / mxL, 0.0, 1.0); | |
#else | |
float ampL = clamp(min(mnL, bdval - mxL) * APrxLoRcpF1(mxL), 0.0, 1.0); | |
#endif | |
// Shaping amount of sharpening. | |
#if (CAS_GO_SLOWER == 1) | |
ampL = sqrt(ampL); | |
#else | |
ampL = APrxLoSqrtF1(ampL); | |
#endif | |
// Filter shape. | |
// 0 w 0 | |
// w 1 w | |
// 0 w 0 | |
const float peak = -(mix(8.0, 5.0, clamp(SHARPENING, 0.0, 1.0))); | |
float wL = ampL / peak; | |
// Filter. | |
// Using green coef only | |
float Weight = 1.0 + 4.0 * wL; | |
vec4 pix = vec4(0.0, 0.0, 0.0, 1.0); | |
pix.r = ((b + d + f + h) * wL) + e; | |
#if (CAS_GO_SLOWER == 1) | |
pix.r /= Weight; | |
#else | |
pix.r *= APrxMedRcpF1(Weight); | |
#endif | |
pix.r = clamp(pix.r, 0.0, 1.0); | |
#if (TARGET_TRC == 1) | |
pix.r = To709(pix.r); | |
#elif (TARGET_TRC == 2) | |
pix.r = ToPq(pix.r); | |
#elif (TARGET_TRC == 3) | |
pix.r = ToSrgb(pix.r); | |
#elif (TARGET_TRC == 4) | |
pix.r = pow(pix.r, float(1.0 / 2.4)); | |
#elif (TARGET_TRC == 5) | |
pix.r = ToHlg(pix.r); | |
#elif (TARGET_TRC == 6) | |
pix.r = pow(pix.r, float(1.0 / CUSTOM_GAMMA)); | |
#endif | |
return pix; | |
} |
Think I will just skip conversion altogether setting SOURCE_TRC and TARGET_TRC to 0.
I'm reluctant to use such baked in options and assume a source trc and do double conversion for target trc. Until the video frame reaches CAS shader doesn't mpv already convert the frame to some trc based on source and than when presenting convert to a target trc? In this case what is the point of the internal conversion in the shader?
See Issue #9818.
@agyild Can you please add same SOURCE_TRC
and TARGET_TRC
6
option to CAS.glsl
@agyild Can you please add same
SOURCE_TRC
andTARGET_TRC
6
option toCAS.glsl
Done.
In addition, CAS.glsl
also hooks LUMA
, and only runs if the content is non-scaled. However, this behavior can be disabled by following the instructions in the comments to make it also work as a pre-upscale sharpener.
Did some testing and here are my findings:
After the change to make CAS.glsl use LUMA it does not work anymore:
And sharpening without scaling is IMO not recommended because all the sharpening artifacts get scaled at a later stage, why make CAS a pre-scale filter?:
Scaling should always happen before sharpening most of the time. With sharpening you want to restore some missing information in the signal, but you don't want any mistakes made by your sharpening algorithm to be made more visible by scaling. This is why IMO most sharpeners use RGB.
The "RGB" version of CAS was actually worse because it was directly making sharpening calculations on the green channel by default. The shaders don't do anything at all with RGB data, it's all done on LUMA. They are designed to derive LUMA from RGB, because video game renderers do not have separate CHROMA and LUMA channels like in video content. The most recent version of CAS.glsl
doesn't do pre-upscale sharpening, unless you modify the WHEN
directive to do so. CAS.glsl
should be used for 1:1 scaling, for upscaling you should use CAS-scaled.glsl
.
Also in your screenshot, you have only modified the first WHEN directive in the relinearization pass. You have to modify both WHEN directives for it to properly work as a pre-upscale sharpener.
I did miss the second when condition, but if I did not modify any WHEN condition the CAS was not applied at all. Even so I see CAS as something applied after scaling not as a pre-scaling sharpening shader. FSR does scaling first RCAS after and not without a reason, you want to sharpen after scaling for better results. Can you make CAS be applied after other LUMA scalers? One such example FSRCNN or any other LUMA scalers?
I did miss the second when condition, but if I did not modify any WHEN condition the CAS was not applied at all. Even so I see CAS as something applied after scaling not as a pre-scaling sharpening shader. FSR does scaling first RCAS after and not without a reason, you want to sharpen after scaling for better results. Can you make CAS be applied after other LUMA scalers? One such example FSRCNN or any other LUMA scalers?
It is already possible to do it in cases where the video resolution is 50% of the output resolution at maximum (considering the luma upscaler does 4x area scaling). Make sure that CAS.glsl
is applied after the other luma scaler. For example:
glsl-shader=~~/shaders/FSRCNNX_x2_16-0-4-1.glsl
glsl-shader=~~/shaders/CAS.glsl
So in this case by the time CAS.glsl
runs the luma texture will be 1:1 to OUTPUT
which will trigger CAS. In other cases where you have a smaller video, you have to apply the RGB version of CAS since scale
works on RGB texture, and LUMA
remains unscaled, in which case if sharpened would result in pre-scale sharpening. Or if you don't care about that just modify the WHEN
directive as shown in the comments, and in that case CAS will always sharpen the pre-scaled LUMA
.
Can you elaborate a bit more on why CAS can't be used as a post scaling sharpener, removing the WHEN condition completely from CAS.glsl and adding it after FSRCNNX I see CAS is being applied at the last stage of LUMA processing:
Why does CAS care if the LUMA is upscaled or not? All it should work on is a LUMA you give it scaled or not scaled.
But interesting enough pre-scale sharpening is way more pronounced than post-scale sharpening with CAS:
CAS+FSRCNNX
FSRCNNX+CAS
Does CAS use some info on how much sharpening it should do based on image size vs. display size?
Because this is a reference implementation of CAS algorithm and it is originally not designed to be used as pre-scaling sharpener. In video game implementations it either sharpens the native rendered output or it upscales and sharpens the lower resolution output. By default I am trying to emulate this behavior.
The directives are there so the users get this intended effect. It does not stop one from modifying the code to remove this limit or create their own fork. If you know what you are doing you can use it in any way that you would like. Note that due to how mpv works, it is only possible to use CAS as a post-scaling sharpener when the LUMA channel is resized to the exact size of the output beforehand (e.g., FSRCNNX(720p) -> 1440p on 1440p output).
Makes more sense now. FSRCNNX is a double-er the LUMA at the stage CAS has access to it is a LUMA bigger than even the display size for most content like 1080p on 1440p display (because I forced CAS to be used after FSRCNNX and before downscaling with lanczos). For the post-scale sharpening CAS-rgb would be needed to be able to process the upscaled+downscaled final frame.
I did propose that FSR.glsl should be changed to work on LUMA because it made sense there, but I'm still not convinced that the change to LUMA in CAS.glsl makes sense. The unchanged CAS.glsl that is a 1:1 sharpener has a very limited use-case and as soon as you insert another shader it is most likely not used anymore. The changed CAS.glsl (by modifying the WHEN condition) used as a pre-scale sharpener makes the result have a lot of sharpening artifacts even more than FSR.
IMO and correct me if I'm wrong, but to make CAS.glsl more useful it would make more sense to have a step in the shader that saves LUMA information after scale/dscale (not sure if its possible because internal scalers are rgb) and use that info in CAS-rgb instead of GREEN channel. This would make CAS a post-scale sharpener useful with any combination of other shaders.
This is what I would find useful for this CAS.glsl:
any-scaler + CAS-rgb sharpener
Aren't any of these shaders doing it like this?
Or maybe recreate LUMA from RGB if all else fails (instead of using GREEN channel), something similar to adaptive-sharpen.glsl
// RGB to luma
float luma[25] = float[](CtL(c[0]), CtL(c[1]), CtL(c[2]), CtL(c[3]), CtL(c[4]), CtL(c[5]), CtL(c[6]),
CtL(c[7]), CtL(c[8]), CtL(c[9]), CtL(c[10]), CtL(c[11]), CtL(c[12]),
CtL(c[13]), CtL(c[14]), CtL(c[15]), CtL(c[16]), CtL(c[17]), CtL(c[18]),
CtL(c[19]), CtL(c[20]), CtL(c[21]), CtL(c[22]), CtL(c[23]), CtL(c[24]));
Hello, I liked this shader so much since it is not that aggressive (not scaled one). How can I make this work on all resolutions. Probably I have to change this but dunno change to what.
//!WHEN OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 > ! OUTPUT.w OUTPUT.h * LUMA.w LUMA.h * / 1.0 < ! *
@fideliochan You can remove all !WHEN
lines to make it resolution independent.
@christoph-heinrich it worked, thank you so much!
Think I will just skip conversion altogether setting SOURCE_TRC and TARGET_TRC to 0.
I'm reluctant to use such baked in options and assume a source trc and do double conversion for target trc. Until the video frame reaches CAS shader doesn't mpv already convert the frame to some trc based on source and than when presenting convert to a target trc? In this case what is the point of the internal conversion in the shader?