Skip to content

Instantly share code, notes, and snippets.

@igv
Last active September 17, 2024 10:39
Show Gist options
  • Save igv/8a77e4eb8276753b54bb94c1c50c317e to your computer and use it in GitHub Desktop.
Save igv/8a77e4eb8276753b54bb94c1c50c317e to your computer and use it in GitHub Desktop.
Optimal sharpening strength (according to objective metrics) - 0.5. Can be applied only to luma channel (change OUTPUT to LUMA). To use it on-demand add the following line to input.conf: n change-list glsl-shaders toggle "~~/adaptive-sharpen.glsl"
// Copyright (c) 2015-2021, bacondither
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer
// in this position and unchanged.
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Adaptive sharpen - version 2021-10-17
// Tuned for use post-resize
//!HOOK OUTPUT
//!BIND HOOKED
//!DESC adaptive-sharpen
//--------------------------------------- Settings ------------------------------------------------
#define curve_height 1.0 // Main control of sharpening strength [>0]
// 0.3 <-> 2.0 is a reasonable range of values
#define overshoot_ctrl false // Allow for higher overshoot if the current edge pixel
// is surrounded by similar edge pixels
// Defined values under this row are "optimal" DO NOT CHANGE IF YOU DO NOT KNOW WHAT YOU ARE DOING!
#define curveslope 0.5 // Sharpening curve slope, high edge values
#define L_compr_low 0.167 // Light compression, default (0.167=~6x)
#define L_compr_high 0.334 // Light compression, surrounded by edges (0.334=~3x)
#define D_compr_low 0.250 // Dark compression, default (0.250=4x)
#define D_compr_high 0.500 // Dark compression, surrounded by edges (0.500=2x)
#define scale_lim 0.1 // Abs max change before compression [>0.01]
#define scale_cs 0.056 // Compression slope above scale_lim
#define pm_p 1.0 // Power mean p-value [>0-1.0]
//-------------------------------------------------------------------------------------------------
#define max4(a,b,c,d) ( max(max(a, b), max(c, d)) )
// Soft if, fast linear approx
#define soft_if(a,b,c) ( sat((a + b + c + 0.056/2.5)/(maxedge + 0.03/2.5) - 0.85) )
// Soft limit, modified tanh approx
#define soft_lim(v,s) ( sat(abs(v/s)*(27.0 + pow(v/s, 2.0))/(27.0 + 9.0*pow(v/s, 2.0)))*s )
// Weighted power mean
#define wpmean(a,b,w) ( pow(w*pow(abs(a), pm_p) + abs(1.0-w)*pow(abs(b), pm_p), (1.0/pm_p)) )
// Get destination pixel values
#define get(x,y) ( HOOKED_texOff(vec2(x, y)).rgb )
#define sat(x) ( clamp(x, 0.0, 1.0) )
#define dxdy(val) ( length(fwidth(val)) ) // =~1/2.5 hq edge without c_comp
#ifdef LUMA_tex
#define CtL(RGB) RGB.x
#else
#define CtL(RGB) ( sqrt(dot(sat(RGB)*sat(RGB), vec3(0.2126, 0.7152, 0.0722))) )
#endif
#define b_diff(pix) ( (blur-luma[pix])*(blur-luma[pix]) )
vec4 hook() {
// [ c22 ]
// [ c24, c9, c23 ]
// [ c21, c1, c2, c3, c18 ]
// [ c19, c10, c4, c0, c5, c11, c16 ]
// [ c20, c6, c7, c8, c17 ]
// [ c15, c12, c14 ]
// [ c13 ]
vec3 c[25] = vec3[](get( 0, 0), get(-1,-1), get( 0,-1), get( 1,-1), get(-1, 0),
get( 1, 0), get(-1, 1), get( 0, 1), get( 1, 1), get( 0,-2),
get(-2, 0), get( 2, 0), get( 0, 2), get( 0, 3), get( 1, 2),
get(-1, 2), get( 3, 0), get( 2, 1), get( 2,-1), get(-3, 0),
get(-2, 1), get(-2,-1), get( 0,-3), get( 1,-2), get(-1,-2));
float e[13] = float[](dxdy(c[0]), dxdy(c[1]), dxdy(c[2]), dxdy(c[3]), dxdy(c[4]),
dxdy(c[5]), dxdy(c[6]), dxdy(c[7]), dxdy(c[8]), dxdy(c[9]),
dxdy(c[10]), dxdy(c[11]), dxdy(c[12]));
// RGB to luma
float luma[25] = float[](CtL(c[0]), CtL(c[1]), CtL(c[2]), CtL(c[3]), CtL(c[4]), CtL(c[5]), CtL(c[6]),
CtL(c[7]), CtL(c[8]), CtL(c[9]), CtL(c[10]), CtL(c[11]), CtL(c[12]),
CtL(c[13]), CtL(c[14]), CtL(c[15]), CtL(c[16]), CtL(c[17]), CtL(c[18]),
CtL(c[19]), CtL(c[20]), CtL(c[21]), CtL(c[22]), CtL(c[23]), CtL(c[24]));
float c0_Y = luma[0];
// Blur, gauss 3x3
float blur = (2.0 * (luma[2]+luma[4]+luma[5]+luma[7]) + (luma[1]+luma[3]+luma[6]+luma[8]) + 4.0 * luma[0]) / 16.0;
// Contrast compression, center = 0.5
float c_comp = sat(0.266666681f + 0.9*exp2(blur * blur * -7.4));
// Edge detection
// Relative matrix weights
// [ 1 ]
// [ 4, 5, 4 ]
// [ 1, 5, 6, 5, 1 ]
// [ 4, 5, 4 ]
// [ 1 ]
float edge = ( 1.38*b_diff(0)
+ 1.15*(b_diff(2) + b_diff(4) + b_diff(5) + b_diff(7))
+ 0.92*(b_diff(1) + b_diff(3) + b_diff(6) + b_diff(8))
+ 0.23*(b_diff(9) + b_diff(10) + b_diff(11) + b_diff(12)) ) * c_comp;
vec2 cs = vec2(L_compr_low, D_compr_low);
if (overshoot_ctrl) {
float maxedge = max4( max4(e[1],e[2],e[3],e[4]), max4(e[5],e[6],e[7],e[8]),
max4(e[9],e[10],e[11],e[12]), e[0] );
// [ x ]
// [ z, x, w ]
// [ z, z, x, w, w ]
// [ y, y, y, 0, y, y, y ]
// [ w, w, x, z, z ]
// [ w, x, z ]
// [ x ]
float sbe = soft_if(e[2],e[9], dxdy(c[22]))*soft_if(e[7],e[12],dxdy(c[13])) // x dir
+ soft_if(e[4],e[10],dxdy(c[19]))*soft_if(e[5],e[11],dxdy(c[16])) // y dir
+ soft_if(e[1],dxdy(c[24]),dxdy(c[21]))*soft_if(e[8],dxdy(c[14]),dxdy(c[17])) // z dir
+ soft_if(e[3],dxdy(c[23]),dxdy(c[18]))*soft_if(e[6],dxdy(c[20]),dxdy(c[15])); // w dir
cs = mix(cs, vec2(L_compr_high, D_compr_high), sat(2.4002*sbe - 2.282));
}
// Precalculated default squared kernel weights
const vec3 w1 = vec3(0.5, 1.0, 1.41421356237); // 0.25, 1.0, 2.0
const vec3 w2 = vec3(0.86602540378, 1.0, 0.54772255751); // 0.75, 1.0, 0.3
// Transition to a concave kernel if the center edge val is above thr
vec3 dW = pow(mix( w1, w2, sat(2.4*edge - 0.82)), vec3(2.0));
// Use lower weights for pixels in a more active area relative to center pixel area
// This results in narrower and less visible overshoots around sharp edges
float modif_e0 = 3.0 * e[0] + 0.02/2.5;
float weights[12] = float[](( min(modif_e0/e[1], dW.y) ),
( dW.x ),
( min(modif_e0/e[3], dW.y) ),
( dW.x ),
( dW.x ),
( min(modif_e0/e[6], dW.y) ),
( dW.x ),
( min(modif_e0/e[8], dW.y) ),
( min(modif_e0/e[9], dW.z) ),
( min(modif_e0/e[10], dW.z) ),
( min(modif_e0/e[11], dW.z) ),
( min(modif_e0/e[12], dW.z) ));
weights[0] = (max(max((weights[8] + weights[9])/4.0, weights[0]), 0.25) + weights[0])/2.0;
weights[2] = (max(max((weights[8] + weights[10])/4.0, weights[2]), 0.25) + weights[2])/2.0;
weights[5] = (max(max((weights[9] + weights[11])/4.0, weights[5]), 0.25) + weights[5])/2.0;
weights[7] = (max(max((weights[10] + weights[11])/4.0, weights[7]), 0.25) + weights[7])/2.0;
// Calculate the negative part of the laplace kernel and the low threshold weight
float lowthrsum = 0.0;
float weightsum = 0.0;
float neg_laplace = 0.0;
for (int pix = 0; pix < 12; ++pix)
{
float lowthr = sat((20.*4.5*c_comp*e[pix + 1] - 0.221));
neg_laplace += luma[pix+1] * luma[pix+1] * weights[pix] * lowthr;
weightsum += weights[pix] * lowthr;
lowthrsum += lowthr / 12.0;
}
neg_laplace = sqrt(neg_laplace / weightsum);
// Compute sharpening magnitude function
float sharpen_val = curve_height/(curve_height*curveslope*edge + 0.625);
// Calculate sharpening diff and scale
float sharpdiff = (c0_Y - neg_laplace)*(lowthrsum*sharpen_val + 0.01);
// Calculate local near min & max, partial sort
float temp;
for (int i1 = 0; i1 < 24; i1 += 2)
{
temp = luma[i1];
luma[i1] = min(luma[i1], luma[i1+1]);
luma[i1+1] = max(temp, luma[i1+1]);
}
for (int i2 = 24; i2 > 0; i2 -= 2)
{
temp = luma[0];
luma[0] = min(luma[0], luma[i2]);
luma[i2] = max(temp, luma[i2]);
temp = luma[24];
luma[24] = max(luma[24], luma[i2-1]);
luma[i2-1] = min(temp, luma[i2-1]);
}
float min_dist = min(abs(luma[24] - c0_Y), abs(c0_Y - luma[0]));
min_dist = min(min_dist, scale_lim*(1.0 - scale_cs) + min_dist*scale_cs);
// Soft limited anti-ringing with tanh, wpmean to control compression slope
sharpdiff = wpmean(max(sharpdiff, 0.0), soft_lim( max(sharpdiff, 0.0), min_dist ), cs.x )
- wpmean(min(sharpdiff, 0.0), soft_lim( min(sharpdiff, 0.0), min_dist ), cs.y );
float sharpdiff_lim = sat(c0_Y + sharpdiff) - c0_Y;
/*float satmul = (c0_Y + max(sharpdiff_lim*0.9, sharpdiff_lim)*1.03 + 0.03)/(c0_Y + 0.03);
vec3 res = c0_Y + sharpdiff_lim + (c[0] - c0_Y)*satmul;
*/
return vec4(sharpdiff_lim + c[0], HOOKED_texOff(0).a);
}
@pbiancardi
Copy link

Thank you kindly for your reply. I should have pointed out that I am well aware of the implications and how wasteful this brute force approach might seem to most. Allow me to give a little background: I have been working for years adapting madVR and its arsenal of filters to all kind of high end home theater setups, I have also actively been involved in progressing its development beta testing with Mathias. Very recently I made the switch to mpv. Beyond adjusting Adaptiv Sharpen parameters for different kind of source/output device combinations (it makes a world of a difference to see the filter in action ruthlessly on a 200nit projection sitting one screen width away from huge screens compared to most other scenarios less revealing) I found the approach to apply sharpening to an internally oversampled stream the by far most "organic" use of adaptive sharpen (as well as a couple of other filters). After pixel peeping for weeks Mathias kind of agreed with me and implemented an option to force internal supersampling for these use cases. So I was applying NGU/FSRCNNX to all input streams (be it 1080p with 4x upscaling or 4k with 2x) and always internally do all the processing on a 8k stream. Sure, the argument less is more when it comes to processing/filtering is something I wholeheartly agree with (DNR!!) but all our 4k content streams have been heavily processed as is - they have been scaled and rescaled, chroma subsampled, heavily compressed with especially h265 "smoothing over information", gamut converted, ... So to take the abstraction of the original content that we have access to and try to recover as much of the pristine information originally present in the DI or even more upstream all the way to what the image sensor captured is a valid purpose in my opinion. To internally apply all processing at the highest "precision" (be it 16bit internally for 8bit/10bit RGB output or at the highest resolution that is practical) is something that is being commonly used in one form or another.
So allow me to go back to my original question: being aware and mindful of the implications and the "waste of energy" that comes with it please show me the way to force mpv to internally upscale EVERY content resolution to 8k with FSRCNNX (for me only 1080p and 2160p are relevant as source resolution so its either 2x or 4x), apply all filtering in particular adaptive sharpen to that 8k internal stream and then downscale to whatever output resolution is needed (basically 4k). How may I adjust the config settings to accomplish that? Merci beaucoup

I better understand what you want to do, for 1080p the shader would be applied on a 4k display without a doubt, however for 4k source you would need to force it by changing all WHEN conditions in the FSRCNNX shader itself:

//!WHEN OUTPUT.w LUMA.w / 1.000 > OUTPUT.h LUMA.h / 1.000 > *

This in your case on a 4k display would force the shader to be used all the time for any video below or equal to 4k resolution, but would disable it when source resolution would be bigger then 4k aka. downscaling.

I tried this but it didnt work for me.

In my case I use a 1080p projector and would like native 1080p files to be upscaled to 4k and then downscaled with SSIM (this has proven benefits based on MadVR trials). Are you able to offer assistance? Thanks in advance.

@CrHasher
Copy link

CrHasher commented May 22, 2023

Remove the WHEN conditions (all of them with OUTPUT in them not just one) completely and try again. If that does not work then I have no other ideas.

@AziRizvi
Copy link

One question please.
What line do I need to edit in this to control the intensity of this shader?
I like it but I'd like the intensity to be a little less.

@deus0ww
Copy link

deus0ww commented Aug 24, 2023

One question please. What line do I need to edit in this to control the intensity of this shader? I like it but I'd like the intensity to be a little less.

curve_height in https://gist.github.com/igv/8a77e4eb8276753b54bb94c1c50c317e#file-adaptive-sharpen-glsl-L34

@AziRizvi
Copy link

One question please. What line do I need to edit in this to control the intensity of this shader? I like it but I'd like the intensity to be a little less.

curve_height in https://gist.github.com/igv/8a77e4eb8276753b54bb94c1c50c317e#file-adaptive-sharpen-glsl-L34

Thanks a bunch, I feel dumb, wish I had actually taken a look inside the shader file because it's clearly mentioned in there.

@deus0ww
Copy link

deus0ww commented Dec 6, 2023

Could you use gather? In my test using a 4K video on an M1 Mac mini, both luma and rgb are about 25-30% faster.

#ifdef HOOKED_gather
    vec2 p = (HOOKED_pos * HOOKED_size - vec2(0.5)) * HOOKED_pt;
    ivec2 gatherOffsets[8] = {{ 1, 1}, { 0, 0}, { 3, 1}, { 1, 3}, {-1, 2}, {-2, 0}, { 0,-2}, { 2,-1}};
    vec4 g[3][8];
    for (int i = 0; i < 8; i++) {
        g[0][i] = HOOKED_mul * textureGatherOffset(HOOKED_raw, p, gatherOffsets[i], 0);
#ifndef LUMA_tex
        g[1][i] = HOOKED_mul * textureGatherOffset(HOOKED_raw, p, gatherOffsets[i], 1);
        g[2][i] = HOOKED_mul * textureGatherOffset(HOOKED_raw, p, gatherOffsets[i], 2);
#endif
    }
    vec3 c[25] = {{g[0][0].w, g[1][0].w, g[2][0].w}, {g[0][1].w, g[1][1].w, g[2][1].w}, {g[0][1].z, g[1][1].z, g[2][1].z}, {g[0][7].x, g[1][7].x, g[2][7].x}, {g[0][1].x, g[1][1].x, g[2][1].x},
                  {g[0][0].z, g[1][0].z, g[2][0].z}, {g[0][4].z, g[1][4].z, g[2][4].z}, {g[0][0].x, g[1][0].x, g[2][0].x}, {g[0][0].y, g[1][0].y, g[2][0].y}, {g[0][6].y, g[1][6].y, g[2][6].y},
                  {g[0][5].y, g[1][5].y, g[2][5].y}, {g[0][2].w, g[1][2].w, g[2][2].w}, {g[0][3].w, g[1][3].w, g[2][3].w}, {g[0][3].x, g[1][3].x, g[2][3].x}, {g[0][3].z, g[1][3].z, g[2][3].z},
                  {g[0][4].y, g[1][4].y, g[2][4].y}, {g[0][2].z, g[1][2].z, g[2][2].z}, {g[0][2].x, g[1][2].x, g[2][2].x}, {g[0][7].y, g[1][7].y, g[2][7].y}, {g[0][5].x, g[1][5].x, g[2][5].x},
                  {g[0][4].w, g[1][4].w, g[2][4].w}, {g[0][5].z, g[1][5].z, g[2][5].z}, {g[0][6].z, g[1][6].z, g[2][6].z}, {g[0][7].w, g[1][7].w, g[2][7].w}, {g[0][6].x, g[1][6].x, g[2][6].x}};
#else
    vec3 c[25] = vec3[](get( 0, 0), get(-1,-1), get( 0,-1), get( 1,-1), get(-1, 0),
                        get( 1, 0), get(-1, 1), get( 0, 1), get( 1, 1), get( 0,-2),
                        get(-2, 0), get( 2, 0), get( 0, 2), get( 0, 3), get( 1, 2),
                        get(-1, 2), get( 3, 0), get( 2, 1), get( 2,-1), get(-3, 0),
                        get(-2, 1), get(-2,-1), get( 0,-3), get( 1,-2), get(-1,-2));
#endif

@JohnChristianD
Copy link

HOOK OUTPUT with a WHEN condition doesn't seem to work with gpu-next. I'm on Windows. Moved it to POSTKERNEL since this is supposedly tuned for post-resize. I only have MATLAB coding experience so I'm not sure if this solution is ideal or if this is considered a bug.

@JohnChristianD
Copy link

JohnChristianD commented Sep 17, 2024

Tried it on FineSharp and Anime4kDenoise and I found the same problem so it might be a general mpv gpu-next bug. Damn, I wish FineSharp worked fully with gpu-next. Also does anyone know if anti-ringing works on vo=gpu for polar downscaling and if anti-ringing works on vo=gpu-next for orthogonal downscaling? I've read that the latter doesn't work but I've only experienced the former but haven't read about it. This is using the latest chocolatey version of mpv

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment