realmonster/crt-royale-pal-r57shell.cgp

## crt-royale-pal-r57shell.cgp
# IMPORTANT:
# Shader passes need to know details about the image in the mask_texture LUT
# files, so set the following constants in user-cgp-constants.h accordingly:
# 1.) mask_triads_per_tile = (number of horizontal triads in mask texture LUT's)
# 2.) mask_texture_small_size = (texture size of mask*texture_small LUT's)
# 3.) mask_texture_large_size = (texture size of mask*texture_large LUT's)
# 4.) mask_grille_avg_color = (avg. brightness of mask_grille_texture* LUT's, in [0, 1])
# 5.) mask_slot_avg_color = (avg. brightness of mask_slot_texture* LUT's, in [0, 1])
# 6.) mask_shadow_avg_color = (avg. brightness of mask_shadow_texture* LUT's, in [0, 1])
# Shader passes also need to know certain scales set in this .cgp, but their
# compilation model doesn't currently allow the .cgp file to tell them.  Make
# sure to set the following constants in user-cgp-constants.h accordingly too:
# 1.) bloom_approx_scale_x = scale_x3
# 2.) mask_resize_viewport_scale = float2(scale_x7, scale_y6)
# Finally, shader passes need to know the value of geom_max_aspect_ratio used to
# calculate scale_y5 (among other values):
# 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5)

shaders = "13"

shader0 = "../pal/shaders/pal-r57shell.cg"

nes_lut = "../pal/resources/nes_lut.png"
nes_lut_linear = "false"
nes_lut_wrap_mode = "repeat"
nes_lut_mipmap = "false"

filter_linear0 = false
scale_type_x0 = absolute
scale_type_y0 = source
scale_x0 = 1024
scale_y0 = 1.0
frame_count_mod0 = 2

# Set an identifier, filename, and sampling traits for the phosphor mask texture.
# Load an aperture grille, slot mask, and an EDP shadow mask, and load a small
# non-mipmapped version and a large mipmapped version.
# TODO: Test masks in other directories.
textures = "nes_lut;mask_grille_texture_small;mask_grille_texture_large;mask_slot_texture_small;mask_slot_texture_large;mask_shadow_texture_small;mask_shadow_texture_large"
mask_grille_texture_small = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"
mask_grille_texture_large = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"
mask_slot_texture_small = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"
mask_slot_texture_large = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"
mask_shadow_texture_small = "shaders/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"
mask_shadow_texture_large = "shaders/crt-royale/TileableLinearShadowMaskEDP.png"
mask_grille_texture_small_wrap_mode = "repeat"
mask_grille_texture_large_wrap_mode = "repeat"
mask_slot_texture_small_wrap_mode = "repeat"
mask_slot_texture_large_wrap_mode = "repeat"
mask_shadow_texture_small_wrap_mode = "repeat"
mask_shadow_texture_large_wrap_mode = "repeat"
mask_grille_texture_small_linear = "true"
mask_grille_texture_large_linear = "true"
mask_slot_texture_small_linear = "true"
mask_slot_texture_large_linear = "true"
mask_shadow_texture_small_linear = "true"
mask_shadow_texture_large_linear = "true"
mask_grille_texture_small_mipmap = "false"  # Mipmapping causes artifacts with manually resized masks without tex2Dlod
mask_grille_texture_large_mipmap = "true"   # Essential for hardware-resized masks
mask_slot_texture_small_mipmap = "false"    # Mipmapping causes artifacts with manually resized masks without tex2Dlod
mask_slot_texture_large_mipmap = "true"     # Essential for hardware-resized masks
mask_shadow_texture_small_mipmap = "false"  # Mipmapping causes artifacts with manually resized masks without tex2Dlod
mask_shadow_texture_large_mipmap = "true"   # Essential for hardware-resized masks


# Pass1: Linearize the input based on CRT gamma and bob interlaced fields.
# (Bobbing ensures we can immediately blur without getting artifacts.)
shader1 = "shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.cg"
alias1 = "ORIG_LINEARIZED"
filter_linear1 = "false"
scale_type1 = "source"
scale1 = "1.0"
srgb_framebuffer1 = "true"

# Pass2: Resample interlaced (and misconverged) scanlines vertically.
# Separating vertical/horizontal scanline sampling is faster: It lets us
# consider more scanlines while calculating weights for fewer pixels, and
# it reduces our samples from vertical*horizontal to vertical+horizontal.
# This has to come right after ORIG_LINEARIZED, because there's no
# "original_source" scale_type we can use later.
shader2 = "shaders/crt-royale/src/crt-royale-scanlines-vertical-interlacing.cg"
alias2 = "VERTICAL_SCANLINES"
filter_linear2 = "true"
scale_type_x2 = "source"
scale_x2 = "1.0"
scale_type_y2 = "viewport"
scale_y2 = "1.0"
#float_framebuffer2 = "true"
srgb_framebuffer2 = "true"

# Pass3: Do a small resize blur of ORIG_LINEARIZED at an absolute size, and
# account for convergence offsets.  We want to blur a predictable portion of the
# screen to match the phosphor bloom, and absolute scale works best for
# reliable results with a fixed-size bloom.  Picking a scale is tricky:
# a.) 400x300 is a good compromise for the "fake-bloom" version: It's low enough
#     to blur high-res/interlaced sources but high enough that resampling
#     doesn't smear low-res sources too much.
# b.) 320x240 works well for the "real bloom" version: It's 1-1.5% faster, and
#     the only noticeable visual difference is a larger halation spread (which
#     may be a good thing for people who like to crank it up).
# Note the 4:3 aspect ratio assumes the input has cropped geom_overscan (so it's
# *intended* for an ~4:3 aspect ratio).
shader3 = "shaders/crt-royale/src/crt-royale-bloom-approx.cg"
alias3 = "BLOOM_APPROX"
filter_linear3 = "true"
scale_type3 = "absolute"
scale_x3 = "320"
scale_y3 = "240"
srgb_framebuffer3 = "true"

# Pass4: Vertically blur the input for halation and refractive diffusion.
# Base this on BLOOM_APPROX: This blur should be small and fast, and blurring
# a constant portion of the screen is probably physically correct if the
# viewport resolution is proportional to the simulated CRT size.
shader4 = "../blurs/blur9fast-vertical.cg"
filter_linear4 = "true"
scale_type4 = "source"
scale4 = "1.0"
srgb_framebuffer4 = "true"

# Pass5: Horizontally blur the input for halation and refractive diffusion.
# Note: Using a one-pass 9x9 blur is about 1% slower.
shader5 = "../blurs/blur9fast-horizontal.cg"
alias5 = "HALATION_BLUR"
filter_linear5 = "true"
scale_type5 = "source"
scale5 = "1.0"
srgb_framebuffer5 = "true"

# Pass6: Lanczos-resize the phosphor mask vertically.  Set the absolute
# scale_x5 == mask_texture_small_size.x (see IMPORTANT above).  Larger scales
# will blur, and smaller scales could get nasty.  The vertical size must be
# based on the viewport size and calculated carefully to avoid artifacts later.
# First calculate the minimum number of mask tiles we need to draw.
# Since curvature is computed after the scanline masking pass:
#   num_resized_mask_tiles = 2.0;
# If curvature were computed in the scanline masking pass (it's not):
#   max_mask_texel_border = ~3.0 * (1/3.0 + 4.0*sqrt(2.0) + 0.5 + 1.0);
#   max_mask_tile_border = max_mask_texel_border/
#       (min_resized_phosphor_triad_size * mask_triads_per_tile);
#   num_resized_mask_tiles = max(2.0, 1.0 + max_mask_tile_border * 2.0);
#   At typical values (triad_size >= 2.0, mask_triads_per_tile == 8):
#       num_resized_mask_tiles = ~3.8
# Triad sizes are given in horizontal terms, so we need geom_max_aspect_ratio
# to relate them to vertical resolution.  The widest we expect is:
#   geom_max_aspect_ratio = 4.0/3.0  # Note: Shader passes need to know this!
# The fewer triads we tile across the screen, the larger each triad will be as a
# fraction of the viewport size, and the larger scale_y5 must be to draw a full
# num_resized_mask_tiles.  Therefore, we must decide the smallest number of
# triads we'll guarantee can be displayed on screen.  We'll set this according
# to 3-pixel triads at 768p resolution (the lowest anyone's likely to use):
#   min_allowed_viewport_triads = 768.0*geom_max_aspect_ratio / 3.0 = 341.333333
# Now calculate the viewport scale that ensures we can draw resized_mask_tiles:
#   min_scale_x = resized_mask_tiles * mask_triads_per_tile /
#       min_allowed_viewport_triads
#   scale_y6 = geom_max_aspect_ratio * min_scale_x
#   # Some code might depend on equal scales:
#   scale_x7 = scale_y6
# Given our default geom_max_aspect_ratio and min_allowed_viewport_triads:
#   scale_y6 = 4.0/3.0 * 2.0/(341.33333 / 8.0) = 0.0625
# IMPORTANT: The scales MUST be calculated in this way.  If you wish to change
# geom_max_aspect_ratio, update that constant in user-cgp-constants.h!
shader6 = "shaders/crt-royale/src/crt-royale-mask-resize-vertical.cg"
filter_linear6 = "true"
scale_type_x6 = "absolute"
scale_x6 = "64"
scale_type_y6 = "viewport"
scale_y6 = "0.0625" # Safe for >= 341.333 horizontal triads at viewport size
#srgb_framebuffer6 = "false" # mask_texture is already assumed linear

# Pass7: Lanczos-resize the phosphor mask horizontally.  scale_x7 = scale_y6.
# TODO: Check again if the shaders actually require equal scales.
shader7 = "shaders/crt-royale/src/crt-royale-mask-resize-horizontal.cg"
alias7 = "MASK_RESIZE"
filter_linear7 = "false"
scale_type_x7 = "viewport"
scale_x7 = "0.0625"
scale_type_y7 = "source"
scale_y7 = "1.0"
#srgb_framebuffer7 = "false" # mask_texture is already assumed linear

# Pass8: Resample (misconverged) scanlines horizontally, apply halation, and
# apply the phosphor mask.
shader8 = "shaders/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.cg"
alias8 = "MASKED_SCANLINES"
filter_linear8 = "true" # This could just as easily be nearest neighbor.
scale_type8 = "viewport"
scale8 = "1.0"
#float_framebuffer8 = "true"
srgb_framebuffer8 = "true"

# Pass 9: Compute a brightpass.  This will require reading the final mask.
shader9 = "shaders/crt-royale/src/crt-royale-brightpass.cg"
alias9 = "BRIGHTPASS"
filter_linear9 = "true" # This could just as easily be nearest neighbor.
scale_type9 = "viewport"
scale9 = "1.0"
srgb_framebuffer9 = "true"

# Pass 10: Blur the brightpass vertically
shader10 = "shaders/crt-royale/src/crt-royale-bloom-vertical.cg"
filter_linear10 = "true" # This could just as easily be nearest neighbor.
scale_type10 = "source"
scale10 = "1.0"
srgb_framebuffer10 = "true"

# Pass 11: Blur the brightpass horizontally and combine it with the dimpass:
shader11 = "shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.cg"
filter_linear11 = "true"
scale_type11 = "source"
scale11 = "1.0"
srgb_framebuffer11 = "true"

# Pass 12: Compute curvature/AA:
shader12 = "shaders/crt-royale/src/crt-royale-geometry-aa-last-pass.cg"
filter_linear12 = "true"
scale_type12 = "viewport"
mipmap_input12 = "true"
texture_wrap_mode12 = "clamp_to_edge"


## nes_lut.png

      
    Raw
  

              nes_lut.png
            
          
## pal-r57shell.cg
// NES PAL composite signal simulation for RetroArch
// shader by r57shell
// thanks to feos & HardWareMan & NewRisingSun

// also TV subpixels and scanlines

// LICENSE: PUBLIC DOMAIN

// NOTE: for nice TV subpixels and scanlines I recommend to
// disable this features here and apply CRT-specialized shader.

// Quality considerations

// there are three main options:
// USE_RAW (R), USE_DELAY_LINE (D), USE_COLORIMETRY (C)
// here is table of quality in decreasing order:
// RDC, RD, RC, DC, D, C

// TWEAKS start

// uncomment this to disable dynamic settings, and use static.
// if you unable to compile shader with dynamic settings,
// and you want to tune parameters in menu, then
// try to reduce somewhere below Mwidth from 32 to lower,
// or disable USE_DELAY_LINE or USE_RAW, or all at once.
//#undef PARAMETER_UNIFORM

// use delay line technique
// without delay line technique, color would interleave
// to avoid this, set HueRotation to zero.
#define USE_DELAY_LINE

// use this if you need to swap even/odd V sign.
// sign of V changes each scanline
// so if some scanline is positive, then next is negative
// and if you want to match picture
// to actual running PAL NES on TV
// you may want to have this option, to change signs
// if they don't match
//#define SWAP_VSIGN

// phase shift from frame to frame as NTSC NES does.
// but PAL NES doesn't
//#define ANIMATE_PHASE

// rough simulation of scanlines
// better if you use additional shader instead
// if you still use it, make sure that SizeY
// is at least twice lower than output height
//#define USE_SCANLINES

// this option changes active visible fields.
// this is not how actual NES works
// it does not alter fields.
//#define ANIMATE_SCANLINE

// simulate CRT TV subpixels
// better if you use CRT-specialized shader instead
//#define USE_SUBPIXELS

// to change gamma of virtual TV from 2.2 to something else
//#define USE_GAMMA

// use core size. for NES use this, for other cores turn off
// for other cores use "size" tweak.
//#define USE_CORE_SIZE

// use raw palette, turn it on if you
// have nestopia and having using raw palette
//#define USE_RAW

// use lookup texture, faster but less accuracy
// it's working only if USE_RAW enabled.
//#define USE_LUT

// compensate filter width
// it will make width of picture shorter
// to make picture right border visible
#define COMPENSATE_WIDTH

// use sampled version. it's much more slower version of shader.
// because it is computing x4 more values. NOT RECOMMENDED.
//#define USE_SAMPLED

#ifndef PARAMETER_UNIFORM

// NTSC standard gamma = 2.2
// PAL standard gamma = 2.8
// according to many sources, very unlikely gamma of TV is 2.8
// most likely gamma of PAL TV is in range 2.4-2.5
static const float Gamma_static = 2.5; // gamma of virtual TV

static const float Brightness_static = 0.0;
static const float Contrast_static = 1.0;
static const float Saturation_static = 1.0;

static const int
	Ywidth_static = 12,
	Uwidth_static = 23,
	Vwidth_static = 23;

// correct one is -2.5
// works only with USE_RAW
static const float HueShift = -2.5;

// rotation of hue due to luma level.
static const float HueRotation = 2.;

// touch this only if you know what you doing
static const float Phase_Y = 2.; // mod(341*10,12)
static const float Phase_One = 0.; // alternating phases.
static const float Phase_Two = 8.;

// screen size, scanlines = y*2; y one field, and y other field.
static const int SizeX = 256;
static const int SizeY = 240;

// count of pixels of virtual TV.
// value close to 1000 produce small artifacts
static const int TV_Pixels = 400;

static const float dark_scanline = 0.5; // half

#endif

// this is using following matrixes.
// it provides more scientific approach
// by conversion into linear XYZ space
// and back to sRGB.
// it's using Gamma setting too.
// define USE_GAMMA is not required.
#define USE_COLORIMETRY

static const float3x3 RGB_to_XYZ =
{
	0.4306190, 0.3415419, 0.1783091,
	0.2220379, 0.7066384, 0.0713236,
	0.0201853, 0.1295504, 0.9390944
};

static const float3x3 XYZ_to_sRGB =
{
	 3.2406, -1.5372, -0.4986,
	-0.9689,  1.8758,  0.0415,
	 0.0557, -0.2040,  1.0570
};

// TWEAKS end

#ifdef PARAMETER_UNIFORM

#pragma parameter Gamma "PAL Gamma" 2.5 0.0 10.0 0.03125
#pragma parameter Brightness "PAL Brightness" 0.0 -1.0 2.0 0.03125
#pragma parameter Contrast "PAL Contrast" 1.0 -1.0 2.0 0.03125
#pragma parameter Saturation "PAL Saturation" 1.0 -1.0 2.0 0.03125
#pragma parameter HueShift "PAL Hue Shift" -2.5 -6.0 6.0 0.015625
#pragma parameter HueRotation "PAL Hue Rotation" 2.0 -5.0 5.0 0.015625
#pragma parameter Ywidth "PAL Y Width" 12.0 1.0 32.0 1.0
#pragma parameter Uwidth "PAL U Width" 23.0 1.0 32.0 1.0
#pragma parameter Vwidth "PAL V Width" 23.0 1.0 32.0 1.0
#pragma parameter SizeX "Active Width" 256.0 1.0 4096.0 1.0
#pragma parameter SizeY "Active Height" 240.0 1.0 4096.0 1.0
#pragma parameter TV_Pixels "PAL TV Pixels" 200.0 1.0 2400.0 1.0
#pragma parameter dark_scanline "PAL Scanline" 0.5 0.0 1.0 0.025
#pragma parameter Phase_Y "PAL Phase Y" 2.0 0.0 12.0 0.025
#pragma parameter Phase_One "PAL Phase One" 0.0 0.0 12.0 0.025
#pragma parameter Phase_Two "PAL Phase Two" 8.0 0.0 12.0 0.025

uniform float Gamma;
uniform float Brightness;
uniform float Contrast;
uniform float Saturation;
uniform float HueShift;
uniform float HueRotation;
uniform int Ywidth;
uniform int Uwidth;
uniform int Vwidth;
uniform int TV_Pixels;
uniform int SizeX;
uniform int SizeY;
uniform float dark_scanline;
uniform float Phase_Y;
uniform float Phase_One;
uniform float Phase_Two;

static const float Mwidth = 24;

static const int Ywidth_static = 1;
static const int Uwidth_static = 1;
static const int Vwidth_static = 1;

static const float Contrast_static = 1.;
static const float Saturation_static = 1.;

#else

#define Brightness Brightness_static
#define Gamma Gamma_static

#define Ywidth Ywidth_static
#define Uwidth Uwidth_static
#define Vwidth Vwidth_static

static const int Mwidth = max(float(Ywidth), max(float(Uwidth), float(Vwidth)));

#ifdef USE_CORE_SIZE
// just use core output size.
#define size (IN.video_size.xy)
#else
static const float2 size = float2(SizeX,SizeY);
#endif

#endif

static const float YUV_u = 0.492;
static const float YUV_v = 0.877;

static const float3x3 RGB_to_YUV =
{
	float3( 0.299, 0.587, 0.114), //Y
	float3(-0.299,-0.587, 0.886)*YUV_u, //B-Y
	float3( 0.701,-0.587,-0.114)*YUV_v //R-Y
};

#ifdef USE_RAW
#ifndef USE_LUT
static const float Voltage_0 = 0.518;
static const float Voltage_1 = 1.962;
static const float DeltaV = (Voltage_1-Voltage_0);
#else
static const float Voltage_0 = 0.15103768593097774;
static const float Voltage_1 = 1.;
static const float DeltaV = (Voltage_1-Voltage_0);
#endif

#else
static const float DeltaV = 1.;
#endif

#ifdef USE_DELAY_LINE
static const float comb_line = 1.;
#else
static const float comb_line = 2.;
#endif

static const double RGB_y = Contrast_static/Ywidth_static/DeltaV;
static const double RGB_u = comb_line*Contrast_static*Saturation_static/YUV_u/Uwidth_static/DeltaV;
static const double RGB_v = comb_line*Contrast_static*Saturation_static/YUV_v/Vwidth_static/DeltaV;

static const float3x3 YUV_to_RGB =
{
	float3(1., 1., 1.)*RGB_y,
	float3(0., -0.114/0.587, 1.)*RGB_u,
	float3(1., -0.299/0.587, 0.)*RGB_v
};

static const float pi = 3.1415926535897932384626433832795;

void main_vertex
(
	float4 position : POSITION,
	out float4 oPosition : POSITION,
	uniform float4x4 modelViewProj,

	float2 tex : TEXCOORD,
	out float2 oTex : TEXCOORD
)
{
	oPosition = mul(modelViewProj, position);
	oTex = tex;
}

struct input
{
	float2 video_size;
	float2 texture_size;
	float2 output_size;
	float  frame_count;
	float  frame_direction;
	float frame_rotation;
};

#ifdef USE_RAW

bool InColorPhase(int color, float phase)
{
	return mod((color*2. + phase),24.) < 12.;
}

#ifndef USE_LUT
// from nesdev wiki page NTSC_video
float NTSCsignal(vec3 pixel, float phase)
{
	// Voltage levels, relative to synch voltage
	static const float black=.518f, white=1.962f, attenuation=.746f,
		levels[8] = {.350f, .518f, .962f,1.550f,  // Signal low
				1.094f,1.506f,1.962f,1.962f}; // Signal high

	// Decode the NES color.
	int color = int(pixel.r*15);	// 0..15 "cccc"
	int level = int(pixel.g*3);	// 0..3  "ll"
	int emphasis = int(pixel.b*7+0.1);	// 0..7  "eee"
	if (color > 13) { level = 1; }	// For colors 14..15, level 1 is forced.

	// The square wave for this color alternates between these two voltages:
	float low = levels[0], high = levels[4];
	if (level == 1) { low = levels[1], high = levels[5]; }
	if (level == 2) { low = levels[2], high = levels[6]; }
	if (level == 3) { low = levels[3], high = levels[7]; }
	if(color == 0) { low = high; } // For color 0, only high level is emitted
	if(color > 12) { high = low; } // For colors 13..15, only low level is emitted


	// Generate the square wave
	// When de-emphasis bits are set, some parts of the signal are attenuated:
	float2 e = mod(float2(emphasis), float2(2,4));
	float signal = InColorPhase(color,phase) ? high : low;

	if( ((e.x != 0) && InColorPhase(0,phase))
	||  ((e.y-e.x != 0) && InColorPhase(4,phase))
	||  ((emphasis-e.y != 0) && InColorPhase(8,phase)) )
		return signal * attenuation;
	else
		return signal;
}

#else

uniform sampler2D nes_lut;
float NTSCsignal(vec3 pixel, float phase)
{
	return tex2D(nes_lut,float2(dot(pixel,float3(
		15.*(8.)/512.,
		3.*(16.*8.)/512.,
		7./512.)
		) + 0.5/(4.*16.*8.), fract(phase/24.))).r;
}

#endif

#endif

float sinn(float x)
{
	return sin(/*mod(x,24)*/x*(pi*2./24.));
}

float coss(float x)
{
	return cos(/*mod(x,24)*/x*(pi*2./24.));
}

vec3 monitor(uniform sampler2D tex : TEXUNIT0, vec2 p, uniform input IN)
{
#ifdef PARAMETER_UNIFORM
	const float2 size = float2(SizeX,SizeY);
#endif
	// align vertical coord to center of texel
	vec2 uv = vec2(
#ifdef COMPENSATE_WIDTH
		p.x+p.x*(Ywidth/8.)/size.x,
#else
		p.x,
#endif
		(floor(p.y*IN.texture_size.y)+0.5)/IN.texture_size.y);
#ifdef USE_DELAY_LINE
	vec2 sh = (IN.video_size/IN.texture_size/size)*vec2(14./10.,-1.0);
#endif
	vec2 pc = uv*IN.texture_size/IN.video_size*size*vec2(10.,1.);
	float alpha = dot(floor(vec2(pc.x,pc.y)),vec2(2.,Phase_Y*2.));
	alpha += Phase_One*2.;
#ifdef ANIMATE_PHASE
	if (mod(IN.frame_count,2) > 1.)
		alpha += (Phase_Two-Phase_One)*2.;
#endif

	// 1/size.x of screen in uv coords = IN.video_size.x/IN.texture_size.x/size.x;
	// then 1/10*size.x of screen:
	float ustep = IN.video_size.x/IN.texture_size.x/size.x/10.;

	float border = IN.video_size.x/IN.texture_size.x;
	float ss = 2.0;
#ifdef SWAP_VSIGN
#define PAL_SWITCH(A) A < 1.
#else
#define PAL_SWITCH(A) A > 1.
#endif
	if (PAL_SWITCH(mod(uv.y*IN.texture_size.y/IN.video_size.y*size.y,2.0)))
	{
		// cos(pi-alpha) = -cos(alpha)
		// sin(pi-alpha) = sin(alpha)
		// pi - alpha
		alpha = -alpha+12012.0;
		ss = -2.0;
	}

	float ysum = 0., usum = 0., vsum = 0.;
	for (int i=0; i<Mwidth; ++i)
	{
		vec4 res = tex2D(tex, uv);
#ifdef USE_RAW
		float sig = NTSCsignal(res.xyz,HueShift*2.+alpha-res.g*ss*HueRotation)-Voltage_0;
		// outside of texture is 0,0,0 which is white instead of black
		if (uv.x <= 0.0 || uv.x >= border)
			sig = 0;
#ifdef USE_DELAY_LINE
		vec4 res1 = tex2D(tex, uv+sh);
		float sig1 = NTSCsignal(res1.xyz,HueShift*2.+12012.0-alpha+res.g*ss*HueRotation)-Voltage_0;
		if (uv.x + sh.x <= 0.0 || uv.x + sh.x >= border)
			sig1 = 0;
#endif

#else
		vec3 yuv = mul(RGB_to_YUV, res.xyz);
		const float a1 = alpha+(HueShift+2.5)*2.-yuv.x*ss*HueRotation;
		float sig = yuv.x+dot(yuv.yz,sign(vec2(sinn(a1),coss(a1))));
#ifdef USE_DELAY_LINE
		vec4 res1 = tex2D(tex, uv+sh);
		vec3 yuv1 = mul(RGB_to_YUV, res1.xyz);
		const float a2 = (HueShift+2.5)*2.+12012.0-alpha+yuv.x*ss*HueRotation;
		float sig1 = yuv1.x+dot(yuv1.yz,sign(vec2(sinn(a2),coss(a2))));
#endif

#endif
		if (i < Ywidth)
			ysum += sig;

#ifdef USE_DELAY_LINE
		if (i < Uwidth)
			usum += (sig+sig1)*sinn(alpha);
		if (i < Vwidth)
			vsum += (sig-sig1)*coss(alpha);
#else
		if (i < Uwidth)
			usum += sig*sinn(alpha);
		if (i < Vwidth)
			vsum += sig*coss(alpha);
#endif
		alpha -= ss;
		uv.x -= ustep;
	}

#ifdef PARAMETER_UNIFORM
	ysum *= Contrast/Ywidth;
	usum *= Contrast*Saturation/Uwidth;
	vsum *= Contrast*Saturation/Vwidth;
#endif

	vec3 rgb = mul(vec3(ysum+Brightness*Ywidth_static,usum,vsum), YUV_to_RGB);
#if defined(USE_GAMMA) && !defined(USE_COLORIMETRY)
	vec3 rgb1 = saturate(rgb);
	rgb = pow(rgb1, Gamma/2.2);
#endif

#ifdef USE_COLORIMETRY
	vec3 rgb1 = saturate(rgb);
	rgb = pow(rgb1, Gamma);
#endif

#if (defined(USE_SUBPIXELS) || defined(USE_SCANLINES))
	vec2 q = (p*IN.texture_size/IN.video_size)*vec2(TV_Pixels*3,size.y*2);
#endif

#ifdef USE_SCANLINES
	float scanlines = size.y/IN.output_size.x;
	float top = mod(q.y-0.5*scanlines*2,2);
	float bottom = top+fract(scanlines)*2;
	vec2 sw = saturate(min(vec2(1.,2.),vec2(bottom))
		-max(vec2(0.,1.),vec2(top)))
		+saturate(min(vec2(3.,4.),vec2(bottom))
		-max(vec2(2.,3.),vec2(top)))
		+floor(scanlines);
#ifdef ANIMATE_SCANLINE
#define SCANLINE_MUL (mod(int(IN.frame_count),2.0)<1 \
		? sw.x*dark_scanline+sw.y \
		: sw.x+sw.y*dark_scanline)
#else
#define SCANLINE_MUL (sw.x*dark_scanline+sw.y)
#endif
	rgb = rgb*SCANLINE_MUL/(sw.x+sw.y);

/*
	//old stupid method
	float z =
#ifdef ANIMATE_SCANLINE
	mod(IN.frame_count,2.0)+
#endif
		0.5;

	if (abs(mod(q.y+0.5,2)-z)<0.5)
		rgb *= dark_scanline;
*/
#endif

	// size of pixel screen in texture coords:
	//float output_pixel_size = IN.video_size.x/(IN.output_size.x*IN.texture_size.x);

	// correctness check
	//if (mod(p.x*output_pixel_size,2.0) < 1.0)
	//	rgb = vec3(0);

#ifdef USE_SUBPIXELS
	float pixels = TV_Pixels/IN.output_size.x;
	float left = mod(q.x-0.5*pixels*3,3);
	float right = left+fract(pixels)*3.;
	vec3 w = saturate(min(vec3(1.,2.,3.),vec3(right))
		-max(vec3(0.,1.,2.),vec3(left)))
		+saturate(min(vec3(4.,5.,6.),vec3(right))
		-max(vec3(3.,4.,5.),vec3(left)))
		+floor(pixels);
	rgb = rgb*3.*w/(w.x+w.y+w.z);
#endif

#ifdef USE_COLORIMETRY
	vec3 xyz1 = mul(RGB_to_XYZ,rgb);
	vec3 srgb = saturate(mul(XYZ_to_sRGB,xyz1));
	vec3 a1 = 12.92*srgb;
	vec3 a2 = 1.055*pow(srgb,1/2.4)-0.055;
	vec3 ssrgb = (srgb<vec3(0.0031308)?a1:a2);
	return ssrgb;
#else
	return rgb;
#endif
}

// pos (left corner, sample size)
vec4 monitor_sample(uniform sampler2D tex : TEXUNIT0, vec2 p, vec2 sample, uniform input IN)
{
	// linear interpolation was...
	// now other thing.
	// http://imgur.com/m8Z8trV
	// AT LAST IT WORKS!!!!
	// going to check in retroarch...
	float2 size = IN.texture_size;
	vec2 next = vec2(.25,1.)/size;
	vec2 f = fract(vec2(4.,1.)*size*p);
	sample *= vec2(4.,1.)*size;
	vec2 l;
	vec2 r;
	if (f.x+sample.x < 1.)
	{
		l.x = f.x+sample.x;
		r.x = 0.;
	}
	else
	{
		l.x = 1.-f.x;
		r.x = min(1.,f.x+sample.x-1.);
	}
	if (f.y+sample.y < 1.)
	{
		l.y = f.y+sample.y;
		r.y = 0.;
	}
	else
	{
		l.y = 1.-f.y;
		r.y = min(1.,f.y+sample.y-1.);
	}
	vec3 top = mix(monitor(tex, p, IN), monitor(tex, p+vec2(next.x,0.), IN), r.x/(l.x+r.x));
	vec3 bottom = mix(monitor(tex, p+vec2(0.,next.y), IN), monitor(tex, p+next, IN), r.x/(l.x+r.x));
	return vec4(mix(top,bottom, r.y/(l.y+r.y)),1.0);
}

float4 main_fragment(uniform sampler2D tex : TEXUNIT0, float2 coords : TEXCOORD0, uniform input IN) : COLOR
{
#ifdef USE_SAMPLED
	return monitor_sample(tex, coords, 1./IN.output_size, IN);
#else
	return vec4(monitor(tex, coords, IN), 1.);
#endif
}

## pal-r57shell.cgp
shaders = "1"
shader0 = "shaders/pal-r57shell.cg"
filter_linear0 = "false"
wrap_mode0 = "clamp_to_border"
mipmap_input0 = "false"
alias0 = "ORIG_LINEARIZED"
float_framebuffer0 = "false"
srgb_framebuffer0 = "0"
scale_type_x0 = "viewport"
scale_x0 = "1.000000"
scale_type_y0 = "source"
scale_y0 = "1.000000"
textures = "nes_lut"
nes_lut = "resources/nes_lut.png"
nes_lut_linear = "false"
nes_lut_wrap_mode = "repeat"
nes_lut_mipmap = "false"
	# IMPORTANT:
	# Shader passes need to know details about the image in the mask_texture LUT
	# files, so set the following constants in user-cgp-constants.h accordingly:
	# 1.) mask_triads_per_tile = (number of horizontal triads in mask texture LUT's)
	# 2.) mask_texture_small_size = (texture size of mask*texture_small LUT's)
	# 3.) mask_texture_large_size = (texture size of mask*texture_large LUT's)
	# 4.) mask_grille_avg_color = (avg. brightness of mask_grille_texture* LUT's, in [0, 1])
	# 5.) mask_slot_avg_color = (avg. brightness of mask_slot_texture* LUT's, in [0, 1])
	# 6.) mask_shadow_avg_color = (avg. brightness of mask_shadow_texture* LUT's, in [0, 1])
	# Shader passes also need to know certain scales set in this .cgp, but their
	# compilation model doesn't currently allow the .cgp file to tell them. Make
	# sure to set the following constants in user-cgp-constants.h accordingly too:
	# 1.) bloom_approx_scale_x = scale_x3
	# 2.) mask_resize_viewport_scale = float2(scale_x7, scale_y6)
	# Finally, shader passes need to know the value of geom_max_aspect_ratio used to
	# calculate scale_y5 (among other values):
	# 1.) geom_max_aspect_ratio = (geom_max_aspect_ratio used to calculate scale_y5)

	shaders = "13"

	shader0 = "../pal/shaders/pal-r57shell.cg"

	nes_lut = "../pal/resources/nes_lut.png"
	nes_lut_linear = "false"
	nes_lut_wrap_mode = "repeat"
	nes_lut_mipmap = "false"

	filter_linear0 = false
	scale_type_x0 = absolute
	scale_type_y0 = source
	scale_x0 = 1024
	scale_y0 = 1.0
	frame_count_mod0 = 2

	# Set an identifier, filename, and sampling traits for the phosphor mask texture.
	# Load an aperture grille, slot mask, and an EDP shadow mask, and load a small
	# non-mipmapped version and a large mipmapped version.
	# TODO: Test masks in other directories.
	textures = "nes_lut;mask_grille_texture_small;mask_grille_texture_large;mask_slot_texture_small;mask_slot_texture_large;mask_shadow_texture_small;mask_shadow_texture_large"
	mask_grille_texture_small = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5SpacingResizeTo64.png"
	mask_grille_texture_large = "shaders/crt-royale/TileableLinearApertureGrille15Wide8And5d5Spacing.png"
	mask_slot_texture_small = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacingResizeTo64.png"
	mask_slot_texture_large = "shaders/crt-royale/TileableLinearSlotMaskTall15Wide9And4d5Horizontal9d14VerticalSpacing.png"
	mask_shadow_texture_small = "shaders/crt-royale/TileableLinearShadowMaskEDPResizeTo64.png"
	mask_shadow_texture_large = "shaders/crt-royale/TileableLinearShadowMaskEDP.png"
	mask_grille_texture_small_wrap_mode = "repeat"
	mask_grille_texture_large_wrap_mode = "repeat"
	mask_slot_texture_small_wrap_mode = "repeat"
	mask_slot_texture_large_wrap_mode = "repeat"
	mask_shadow_texture_small_wrap_mode = "repeat"
	mask_shadow_texture_large_wrap_mode = "repeat"
	mask_grille_texture_small_linear = "true"
	mask_grille_texture_large_linear = "true"
	mask_slot_texture_small_linear = "true"
	mask_slot_texture_large_linear = "true"
	mask_shadow_texture_small_linear = "true"
	mask_shadow_texture_large_linear = "true"
	mask_grille_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
	mask_grille_texture_large_mipmap = "true" # Essential for hardware-resized masks
	mask_slot_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
	mask_slot_texture_large_mipmap = "true" # Essential for hardware-resized masks
	mask_shadow_texture_small_mipmap = "false" # Mipmapping causes artifacts with manually resized masks without tex2Dlod
	mask_shadow_texture_large_mipmap = "true" # Essential for hardware-resized masks


	# Pass1: Linearize the input based on CRT gamma and bob interlaced fields.
	# (Bobbing ensures we can immediately blur without getting artifacts.)
	shader1 = "shaders/crt-royale/src/crt-royale-first-pass-linearize-crt-gamma-bob-fields.cg"
	alias1 = "ORIG_LINEARIZED"
	filter_linear1 = "false"
	scale_type1 = "source"
	scale1 = "1.0"
	srgb_framebuffer1 = "true"

	# Pass2: Resample interlaced (and misconverged) scanlines vertically.
	# Separating vertical/horizontal scanline sampling is faster: It lets us
	# consider more scanlines while calculating weights for fewer pixels, and
	# it reduces our samples from vertical*horizontal to vertical+horizontal.
	# This has to come right after ORIG_LINEARIZED, because there's no
	# "original_source" scale_type we can use later.
	shader2 = "shaders/crt-royale/src/crt-royale-scanlines-vertical-interlacing.cg"
	alias2 = "VERTICAL_SCANLINES"
	filter_linear2 = "true"
	scale_type_x2 = "source"
	scale_x2 = "1.0"
	scale_type_y2 = "viewport"
	scale_y2 = "1.0"
	#float_framebuffer2 = "true"
	srgb_framebuffer2 = "true"

	# Pass3: Do a small resize blur of ORIG_LINEARIZED at an absolute size, and
	# account for convergence offsets. We want to blur a predictable portion of the
	# screen to match the phosphor bloom, and absolute scale works best for
	# reliable results with a fixed-size bloom. Picking a scale is tricky:
	# a.) 400x300 is a good compromise for the "fake-bloom" version: It's low enough
	# to blur high-res/interlaced sources but high enough that resampling
	# doesn't smear low-res sources too much.
	# b.) 320x240 works well for the "real bloom" version: It's 1-1.5% faster, and
	# the only noticeable visual difference is a larger halation spread (which
	# may be a good thing for people who like to crank it up).
	# Note the 4:3 aspect ratio assumes the input has cropped geom_overscan (so it's
	# intended for an ~4:3 aspect ratio).
	shader3 = "shaders/crt-royale/src/crt-royale-bloom-approx.cg"
	alias3 = "BLOOM_APPROX"
	filter_linear3 = "true"
	scale_type3 = "absolute"
	scale_x3 = "320"
	scale_y3 = "240"
	srgb_framebuffer3 = "true"

	# Pass4: Vertically blur the input for halation and refractive diffusion.
	# Base this on BLOOM_APPROX: This blur should be small and fast, and blurring
	# a constant portion of the screen is probably physically correct if the
	# viewport resolution is proportional to the simulated CRT size.
	shader4 = "../blurs/blur9fast-vertical.cg"
	filter_linear4 = "true"
	scale_type4 = "source"
	scale4 = "1.0"
	srgb_framebuffer4 = "true"

	# Pass5: Horizontally blur the input for halation and refractive diffusion.
	# Note: Using a one-pass 9x9 blur is about 1% slower.
	shader5 = "../blurs/blur9fast-horizontal.cg"
	alias5 = "HALATION_BLUR"
	filter_linear5 = "true"
	scale_type5 = "source"
	scale5 = "1.0"
	srgb_framebuffer5 = "true"

	# Pass6: Lanczos-resize the phosphor mask vertically. Set the absolute
	# scale_x5 == mask_texture_small_size.x (see IMPORTANT above). Larger scales
	# will blur, and smaller scales could get nasty. The vertical size must be
	# based on the viewport size and calculated carefully to avoid artifacts later.
	# First calculate the minimum number of mask tiles we need to draw.
	# Since curvature is computed after the scanline masking pass:
	# num_resized_mask_tiles = 2.0;
	# If curvature were computed in the scanline masking pass (it's not):
	# max_mask_texel_border = ~3.0 * (1/3.0 + 4.0*sqrt(2.0) + 0.5 + 1.0);
	# max_mask_tile_border = max_mask_texel_border/
	# (min_resized_phosphor_triad_size * mask_triads_per_tile);
	# num_resized_mask_tiles = max(2.0, 1.0 + max_mask_tile_border * 2.0);
	# At typical values (triad_size >= 2.0, mask_triads_per_tile == 8):
	# num_resized_mask_tiles = ~3.8
	# Triad sizes are given in horizontal terms, so we need geom_max_aspect_ratio
	# to relate them to vertical resolution. The widest we expect is:
	# geom_max_aspect_ratio = 4.0/3.0 # Note: Shader passes need to know this!
	# The fewer triads we tile across the screen, the larger each triad will be as a
	# fraction of the viewport size, and the larger scale_y5 must be to draw a full
	# num_resized_mask_tiles. Therefore, we must decide the smallest number of
	# triads we'll guarantee can be displayed on screen. We'll set this according
	# to 3-pixel triads at 768p resolution (the lowest anyone's likely to use):
	# min_allowed_viewport_triads = 768.0*geom_max_aspect_ratio / 3.0 = 341.333333
	# Now calculate the viewport scale that ensures we can draw resized_mask_tiles:
	# min_scale_x = resized_mask_tiles * mask_triads_per_tile /
	# min_allowed_viewport_triads
	# scale_y6 = geom_max_aspect_ratio * min_scale_x
	# # Some code might depend on equal scales:
	# scale_x7 = scale_y6
	# Given our default geom_max_aspect_ratio and min_allowed_viewport_triads:
	# scale_y6 = 4.0/3.0 * 2.0/(341.33333 / 8.0) = 0.0625
	# IMPORTANT: The scales MUST be calculated in this way. If you wish to change
	# geom_max_aspect_ratio, update that constant in user-cgp-constants.h!
	shader6 = "shaders/crt-royale/src/crt-royale-mask-resize-vertical.cg"
	filter_linear6 = "true"
	scale_type_x6 = "absolute"
	scale_x6 = "64"
	scale_type_y6 = "viewport"
	scale_y6 = "0.0625" # Safe for >= 341.333 horizontal triads at viewport size
	#srgb_framebuffer6 = "false" # mask_texture is already assumed linear

	# Pass7: Lanczos-resize the phosphor mask horizontally. scale_x7 = scale_y6.
	# TODO: Check again if the shaders actually require equal scales.
	shader7 = "shaders/crt-royale/src/crt-royale-mask-resize-horizontal.cg"
	alias7 = "MASK_RESIZE"
	filter_linear7 = "false"
	scale_type_x7 = "viewport"
	scale_x7 = "0.0625"
	scale_type_y7 = "source"
	scale_y7 = "1.0"
	#srgb_framebuffer7 = "false" # mask_texture is already assumed linear

	# Pass8: Resample (misconverged) scanlines horizontally, apply halation, and
	# apply the phosphor mask.
	shader8 = "shaders/crt-royale/src/crt-royale-scanlines-horizontal-apply-mask.cg"
	alias8 = "MASKED_SCANLINES"
	filter_linear8 = "true" # This could just as easily be nearest neighbor.
	scale_type8 = "viewport"
	scale8 = "1.0"
	#float_framebuffer8 = "true"
	srgb_framebuffer8 = "true"

	# Pass 9: Compute a brightpass. This will require reading the final mask.
	shader9 = "shaders/crt-royale/src/crt-royale-brightpass.cg"
	alias9 = "BRIGHTPASS"
	filter_linear9 = "true" # This could just as easily be nearest neighbor.
	scale_type9 = "viewport"
	scale9 = "1.0"
	srgb_framebuffer9 = "true"

	# Pass 10: Blur the brightpass vertically
	shader10 = "shaders/crt-royale/src/crt-royale-bloom-vertical.cg"
	filter_linear10 = "true" # This could just as easily be nearest neighbor.
	scale_type10 = "source"
	scale10 = "1.0"
	srgb_framebuffer10 = "true"

	# Pass 11: Blur the brightpass horizontally and combine it with the dimpass:
	shader11 = "shaders/crt-royale/src/crt-royale-bloom-horizontal-reconstitute.cg"
	filter_linear11 = "true"
	scale_type11 = "source"
	scale11 = "1.0"
	srgb_framebuffer11 = "true"

	# Pass 12: Compute curvature/AA:
	shader12 = "shaders/crt-royale/src/crt-royale-geometry-aa-last-pass.cg"
	filter_linear12 = "true"
	scale_type12 = "viewport"
	mipmap_input12 = "true"
	texture_wrap_mode12 = "clamp_to_edge"
	// NES PAL composite signal simulation for RetroArch
	// shader by r57shell
	// thanks to feos & HardWareMan & NewRisingSun

	// also TV subpixels and scanlines

	// LICENSE: PUBLIC DOMAIN

	// NOTE: for nice TV subpixels and scanlines I recommend to
	// disable this features here and apply CRT-specialized shader.

	// Quality considerations

	// there are three main options:
	// USE_RAW (R), USE_DELAY_LINE (D), USE_COLORIMETRY (C)
	// here is table of quality in decreasing order:
	// RDC, RD, RC, DC, D, C

	// TWEAKS start

	// uncomment this to disable dynamic settings, and use static.
	// if you unable to compile shader with dynamic settings,
	// and you want to tune parameters in menu, then
	// try to reduce somewhere below Mwidth from 32 to lower,
	// or disable USE_DELAY_LINE or USE_RAW, or all at once.
	//#undef PARAMETER_UNIFORM

	// use delay line technique
	// without delay line technique, color would interleave
	// to avoid this, set HueRotation to zero.
	#define USE_DELAY_LINE

	// use this if you need to swap even/odd V sign.
	// sign of V changes each scanline
	// so if some scanline is positive, then next is negative
	// and if you want to match picture
	// to actual running PAL NES on TV
	// you may want to have this option, to change signs
	// if they don't match
	//#define SWAP_VSIGN

	// phase shift from frame to frame as NTSC NES does.
	// but PAL NES doesn't
	//#define ANIMATE_PHASE

	// rough simulation of scanlines
	// better if you use additional shader instead
	// if you still use it, make sure that SizeY
	// is at least twice lower than output height
	//#define USE_SCANLINES

	// this option changes active visible fields.
	// this is not how actual NES works
	// it does not alter fields.
	//#define ANIMATE_SCANLINE

	// simulate CRT TV subpixels
	// better if you use CRT-specialized shader instead
	//#define USE_SUBPIXELS

	// to change gamma of virtual TV from 2.2 to something else
	//#define USE_GAMMA

	// use core size. for NES use this, for other cores turn off
	// for other cores use "size" tweak.
	//#define USE_CORE_SIZE

	// use raw palette, turn it on if you
	// have nestopia and having using raw palette
	//#define USE_RAW

	// use lookup texture, faster but less accuracy
	// it's working only if USE_RAW enabled.
	//#define USE_LUT

	// compensate filter width
	// it will make width of picture shorter
	// to make picture right border visible
	#define COMPENSATE_WIDTH

	// use sampled version. it's much more slower version of shader.
	// because it is computing x4 more values. NOT RECOMMENDED.
	//#define USE_SAMPLED

	#ifndef PARAMETER_UNIFORM

	// NTSC standard gamma = 2.2
	// PAL standard gamma = 2.8
	// according to many sources, very unlikely gamma of TV is 2.8
	// most likely gamma of PAL TV is in range 2.4-2.5
	static const float Gamma_static = 2.5; // gamma of virtual TV

	static const float Brightness_static = 0.0;
	static const float Contrast_static = 1.0;
	static const float Saturation_static = 1.0;

	static const int
	Ywidth_static = 12,
	Uwidth_static = 23,
	Vwidth_static = 23;

	// correct one is -2.5
	// works only with USE_RAW
	static const float HueShift = -2.5;

	// rotation of hue due to luma level.
	static const float HueRotation = 2.;

	// touch this only if you know what you doing
	static const float Phase_Y = 2.; // mod(341*10,12)
	static const float Phase_One = 0.; // alternating phases.
	static const float Phase_Two = 8.;

	// screen size, scanlines = y*2; y one field, and y other field.
	static const int SizeX = 256;
	static const int SizeY = 240;

	// count of pixels of virtual TV.
	// value close to 1000 produce small artifacts
	static const int TV_Pixels = 400;

	static const float dark_scanline = 0.5; // half

	#endif

	// this is using following matrixes.
	// it provides more scientific approach
	// by conversion into linear XYZ space
	// and back to sRGB.
	// it's using Gamma setting too.
	// define USE_GAMMA is not required.
	#define USE_COLORIMETRY

	static const float3x3 RGB_to_XYZ =
	{
	0.4306190, 0.3415419, 0.1783091,
	0.2220379, 0.7066384, 0.0713236,
	0.0201853, 0.1295504, 0.9390944
	};

	static const float3x3 XYZ_to_sRGB =
	{
	3.2406, -1.5372, -0.4986,
	-0.9689, 1.8758, 0.0415,
	0.0557, -0.2040, 1.0570
	};

	// TWEAKS end

	#ifdef PARAMETER_UNIFORM

	#pragma parameter Gamma "PAL Gamma" 2.5 0.0 10.0 0.03125
	#pragma parameter Brightness "PAL Brightness" 0.0 -1.0 2.0 0.03125
	#pragma parameter Contrast "PAL Contrast" 1.0 -1.0 2.0 0.03125
	#pragma parameter Saturation "PAL Saturation" 1.0 -1.0 2.0 0.03125
	#pragma parameter HueShift "PAL Hue Shift" -2.5 -6.0 6.0 0.015625
	#pragma parameter HueRotation "PAL Hue Rotation" 2.0 -5.0 5.0 0.015625
	#pragma parameter Ywidth "PAL Y Width" 12.0 1.0 32.0 1.0
	#pragma parameter Uwidth "PAL U Width" 23.0 1.0 32.0 1.0
	#pragma parameter Vwidth "PAL V Width" 23.0 1.0 32.0 1.0
	#pragma parameter SizeX "Active Width" 256.0 1.0 4096.0 1.0
	#pragma parameter SizeY "Active Height" 240.0 1.0 4096.0 1.0
	#pragma parameter TV_Pixels "PAL TV Pixels" 200.0 1.0 2400.0 1.0
	#pragma parameter dark_scanline "PAL Scanline" 0.5 0.0 1.0 0.025
	#pragma parameter Phase_Y "PAL Phase Y" 2.0 0.0 12.0 0.025
	#pragma parameter Phase_One "PAL Phase One" 0.0 0.0 12.0 0.025
	#pragma parameter Phase_Two "PAL Phase Two" 8.0 0.0 12.0 0.025

	uniform float Gamma;
	uniform float Brightness;
	uniform float Contrast;
	uniform float Saturation;
	uniform float HueShift;
	uniform float HueRotation;
	uniform int Ywidth;
	uniform int Uwidth;
	uniform int Vwidth;
	uniform int TV_Pixels;
	uniform int SizeX;
	uniform int SizeY;
	uniform float dark_scanline;
	uniform float Phase_Y;
	uniform float Phase_One;
	uniform float Phase_Two;

	static const float Mwidth = 24;

	static const int Ywidth_static = 1;
	static const int Uwidth_static = 1;
	static const int Vwidth_static = 1;

	static const float Contrast_static = 1.;
	static const float Saturation_static = 1.;

	#else

	#define Brightness Brightness_static
	#define Gamma Gamma_static

	#define Ywidth Ywidth_static
	#define Uwidth Uwidth_static
	#define Vwidth Vwidth_static

	static const int Mwidth = max(float(Ywidth), max(float(Uwidth), float(Vwidth)));

	#ifdef USE_CORE_SIZE
	// just use core output size.
	#define size (IN.video_size.xy)
	#else
	static const float2 size = float2(SizeX,SizeY);
	#endif

	#endif

	static const float YUV_u = 0.492;
	static const float YUV_v = 0.877;

	static const float3x3 RGB_to_YUV =
	{
	float3( 0.299, 0.587, 0.114), //Y
	float3(-0.299,-0.587, 0.886)*YUV_u, //B-Y
	float3( 0.701,-0.587,-0.114)*YUV_v //R-Y
	};

	#ifdef USE_RAW
	#ifndef USE_LUT
	static const float Voltage_0 = 0.518;
	static const float Voltage_1 = 1.962;
	static const float DeltaV = (Voltage_1-Voltage_0);
	#else
	static const float Voltage_0 = 0.15103768593097774;
	static const float Voltage_1 = 1.;
	static const float DeltaV = (Voltage_1-Voltage_0);
	#endif

	#else
	static const float DeltaV = 1.;
	#endif

	#ifdef USE_DELAY_LINE
	static const float comb_line = 1.;
	#else
	static const float comb_line = 2.;
	#endif

	static const double RGB_y = Contrast_static/Ywidth_static/DeltaV;
	static const double RGB_u = comb_lineContrast_staticSaturation_static/YUV_u/Uwidth_static/DeltaV;
	static const double RGB_v = comb_lineContrast_staticSaturation_static/YUV_v/Vwidth_static/DeltaV;

	static const float3x3 YUV_to_RGB =
	{
	float3(1., 1., 1.)*RGB_y,
	float3(0., -0.114/0.587, 1.)*RGB_u,
	float3(1., -0.299/0.587, 0.)*RGB_v
	};

	static const float pi = 3.1415926535897932384626433832795;

	void main_vertex
	(
	float4 position : POSITION,
	out float4 oPosition : POSITION,
	uniform float4x4 modelViewProj,

	float2 tex : TEXCOORD,
	out float2 oTex : TEXCOORD
	)
	{
	oPosition = mul(modelViewProj, position);
	oTex = tex;
	}

	struct input
	{
	float2 video_size;
	float2 texture_size;
	float2 output_size;
	float frame_count;
	float frame_direction;
	float frame_rotation;
	};

	#ifdef USE_RAW

	bool InColorPhase(int color, float phase)
	{
	return mod((color*2. + phase),24.) < 12.;
	}

	#ifndef USE_LUT
	// from nesdev wiki page NTSC_video
	float NTSCsignal(vec3 pixel, float phase)
	{
	// Voltage levels, relative to synch voltage
	static const float black=.518f, white=1.962f, attenuation=.746f,
	levels[8] = {.350f, .518f, .962f,1.550f, // Signal low
	1.094f,1.506f,1.962f,1.962f}; // Signal high

	// Decode the NES color.
	int color = int(pixel.r*15); // 0..15 "cccc"
	int level = int(pixel.g*3); // 0..3 "ll"
	int emphasis = int(pixel.b*7+0.1); // 0..7 "eee"
	if (color > 13) { level = 1; } // For colors 14..15, level 1 is forced.

	// The square wave for this color alternates between these two voltages:
	float low = levels[0], high = levels[4];
	if (level == 1) { low = levels[1], high = levels[5]; }
	if (level == 2) { low = levels[2], high = levels[6]; }
	if (level == 3) { low = levels[3], high = levels[7]; }
	if(color == 0) { low = high; } // For color 0, only high level is emitted
	if(color > 12) { high = low; } // For colors 13..15, only low level is emitted


	// Generate the square wave
	// When de-emphasis bits are set, some parts of the signal are attenuated:
	float2 e = mod(float2(emphasis), float2(2,4));
	float signal = InColorPhase(color,phase) ? high : low;

	if( ((e.x != 0) && InColorPhase(0,phase))
	\|\| ((e.y-e.x != 0) && InColorPhase(4,phase))
	\|\| ((emphasis-e.y != 0) && InColorPhase(8,phase)) )
	return signal * attenuation;
	else
	return signal;
	}

	#else

	uniform sampler2D nes_lut;
	float NTSCsignal(vec3 pixel, float phase)
	{
	return tex2D(nes_lut,float2(dot(pixel,float3(
	15.*(8.)/512.,
	3.(16.8.)/512.,
	7./512.)
	) + 0.5/(4.16.8.), fract(phase/24.))).r;
	}

	#endif

	#endif

	float sinn(float x)
	{
	return sin(/mod(x,24)/x(pi2./24.));
	}

	float coss(float x)
	{
	return cos(/mod(x,24)/x(pi2./24.));
	}

	vec3 monitor(uniform sampler2D tex : TEXUNIT0, vec2 p, uniform input IN)
	{
	#ifdef PARAMETER_UNIFORM
	const float2 size = float2(SizeX,SizeY);
	#endif
	// align vertical coord to center of texel
	vec2 uv = vec2(
	#ifdef COMPENSATE_WIDTH
	p.x+p.x*(Ywidth/8.)/size.x,
	#else
	p.x,
	#endif
	(floor(p.y*IN.texture_size.y)+0.5)/IN.texture_size.y);
	#ifdef USE_DELAY_LINE
	vec2 sh = (IN.video_size/IN.texture_size/size)*vec2(14./10.,-1.0);
	#endif
	vec2 pc = uvIN.texture_size/IN.video_sizesize*vec2(10.,1.);
	float alpha = dot(floor(vec2(pc.x,pc.y)),vec2(2.,Phase_Y*2.));
	alpha += Phase_One*2.;
	#ifdef ANIMATE_PHASE
	if (mod(IN.frame_count,2) > 1.)
	alpha += (Phase_Two-Phase_One)*2.;
	#endif

	// 1/size.x of screen in uv coords = IN.video_size.x/IN.texture_size.x/size.x;
	// then 1/10*size.x of screen:
	float ustep = IN.video_size.x/IN.texture_size.x/size.x/10.;

	float border = IN.video_size.x/IN.texture_size.x;
	float ss = 2.0;
	#ifdef SWAP_VSIGN
	#define PAL_SWITCH(A) A < 1.
	#else
	#define PAL_SWITCH(A) A > 1.
	#endif
	if (PAL_SWITCH(mod(uv.yIN.texture_size.y/IN.video_size.ysize.y,2.0)))
	{
	// cos(pi-alpha) = -cos(alpha)
	// sin(pi-alpha) = sin(alpha)
	// pi - alpha
	alpha = -alpha+12012.0;
	ss = -2.0;
	}

	float ysum = 0., usum = 0., vsum = 0.;
	for (int i=0; i<Mwidth; ++i)
	{
	vec4 res = tex2D(tex, uv);
	#ifdef USE_RAW
	float sig = NTSCsignal(res.xyz,HueShift2.+alpha-res.gss*HueRotation)-Voltage_0;
	// outside of texture is 0,0,0 which is white instead of black
	if (uv.x <= 0.0 \|\| uv.x >= border)
	sig = 0;
	#ifdef USE_DELAY_LINE
	vec4 res1 = tex2D(tex, uv+sh);
	float sig1 = NTSCsignal(res1.xyz,HueShift2.+12012.0-alpha+res.gss*HueRotation)-Voltage_0;
	if (uv.x + sh.x <= 0.0 \|\| uv.x + sh.x >= border)
	sig1 = 0;
	#endif

	#else
	vec3 yuv = mul(RGB_to_YUV, res.xyz);
	const float a1 = alpha+(HueShift+2.5)2.-yuv.xss*HueRotation;
	float sig = yuv.x+dot(yuv.yz,sign(vec2(sinn(a1),coss(a1))));
	#ifdef USE_DELAY_LINE
	vec4 res1 = tex2D(tex, uv+sh);
	vec3 yuv1 = mul(RGB_to_YUV, res1.xyz);
	const float a2 = (HueShift+2.5)2.+12012.0-alpha+yuv.xss*HueRotation;
	float sig1 = yuv1.x+dot(yuv1.yz,sign(vec2(sinn(a2),coss(a2))));
	#endif

	#endif
	if (i < Ywidth)
	ysum += sig;

	#ifdef USE_DELAY_LINE
	if (i < Uwidth)
	usum += (sig+sig1)*sinn(alpha);
	if (i < Vwidth)
	vsum += (sig-sig1)*coss(alpha);
	#else
	if (i < Uwidth)
	usum += sig*sinn(alpha);
	if (i < Vwidth)
	vsum += sig*coss(alpha);
	#endif
	alpha -= ss;
	uv.x -= ustep;
	}

	#ifdef PARAMETER_UNIFORM
	ysum *= Contrast/Ywidth;
	usum = ContrastSaturation/Uwidth;
	vsum = ContrastSaturation/Vwidth;
	#endif

	vec3 rgb = mul(vec3(ysum+Brightness*Ywidth_static,usum,vsum), YUV_to_RGB);
	#if defined(USE_GAMMA) && !defined(USE_COLORIMETRY)
	vec3 rgb1 = saturate(rgb);
	rgb = pow(rgb1, Gamma/2.2);
	#endif

	#ifdef USE_COLORIMETRY
	vec3 rgb1 = saturate(rgb);
	rgb = pow(rgb1, Gamma);
	#endif

	#if (defined(USE_SUBPIXELS) \|\| defined(USE_SCANLINES))
	vec2 q = (pIN.texture_size/IN.video_size)vec2(TV_Pixels3,size.y2);
	#endif

	#ifdef USE_SCANLINES
	float scanlines = size.y/IN.output_size.x;
	float top = mod(q.y-0.5scanlines2,2);
	float bottom = top+fract(scanlines)*2;
	vec2 sw = saturate(min(vec2(1.,2.),vec2(bottom))
	-max(vec2(0.,1.),vec2(top)))
	+saturate(min(vec2(3.,4.),vec2(bottom))
	-max(vec2(2.,3.),vec2(top)))
	+floor(scanlines);
	#ifdef ANIMATE_SCANLINE
	#define SCANLINE_MUL (mod(int(IN.frame_count),2.0)<1 \
	? sw.x*dark_scanline+sw.y \
	: sw.x+sw.y*dark_scanline)
	#else
	#define SCANLINE_MUL (sw.x*dark_scanline+sw.y)
	#endif
	rgb = rgb*SCANLINE_MUL/(sw.x+sw.y);

	/*
	//old stupid method
	float z =
	#ifdef ANIMATE_SCANLINE
	mod(IN.frame_count,2.0)+
	#endif
	0.5;

	if (abs(mod(q.y+0.5,2)-z)<0.5)
	rgb *= dark_scanline;
	*/
	#endif

	// size of pixel screen in texture coords:
	//float output_pixel_size = IN.video_size.x/(IN.output_size.x*IN.texture_size.x);

	// correctness check
	//if (mod(p.x*output_pixel_size,2.0) < 1.0)
	// rgb = vec3(0);

	#ifdef USE_SUBPIXELS
	float pixels = TV_Pixels/IN.output_size.x;
	float left = mod(q.x-0.5pixels3,3);
	float right = left+fract(pixels)*3.;
	vec3 w = saturate(min(vec3(1.,2.,3.),vec3(right))
	-max(vec3(0.,1.,2.),vec3(left)))
	+saturate(min(vec3(4.,5.,6.),vec3(right))
	-max(vec3(3.,4.,5.),vec3(left)))
	+floor(pixels);
	rgb = rgb3.w/(w.x+w.y+w.z);
	#endif

	#ifdef USE_COLORIMETRY
	vec3 xyz1 = mul(RGB_to_XYZ,rgb);
	vec3 srgb = saturate(mul(XYZ_to_sRGB,xyz1));
	vec3 a1 = 12.92*srgb;
	vec3 a2 = 1.055*pow(srgb,1/2.4)-0.055;
	vec3 ssrgb = (srgb<vec3(0.0031308)?a1:a2);
	return ssrgb;
	#else
	return rgb;
	#endif
	}

	// pos (left corner, sample size)
	vec4 monitor_sample(uniform sampler2D tex : TEXUNIT0, vec2 p, vec2 sample, uniform input IN)
	{
	// linear interpolation was...
	// now other thing.
	// http://imgur.com/m8Z8trV
	// AT LAST IT WORKS!!!!
	// going to check in retroarch...
	float2 size = IN.texture_size;
	vec2 next = vec2(.25,1.)/size;
	vec2 f = fract(vec2(4.,1.)sizep);
	sample = vec2(4.,1.)size;
	vec2 l;
	vec2 r;
	if (f.x+sample.x < 1.)
	{
	l.x = f.x+sample.x;
	r.x = 0.;
	}
	else
	{
	l.x = 1.-f.x;
	r.x = min(1.,f.x+sample.x-1.);
	}
	if (f.y+sample.y < 1.)
	{
	l.y = f.y+sample.y;
	r.y = 0.;
	}
	else
	{
	l.y = 1.-f.y;
	r.y = min(1.,f.y+sample.y-1.);
	}
	vec3 top = mix(monitor(tex, p, IN), monitor(tex, p+vec2(next.x,0.), IN), r.x/(l.x+r.x));
	vec3 bottom = mix(monitor(tex, p+vec2(0.,next.y), IN), monitor(tex, p+next, IN), r.x/(l.x+r.x));
	return vec4(mix(top,bottom, r.y/(l.y+r.y)),1.0);
	}

	float4 main_fragment(uniform sampler2D tex : TEXUNIT0, float2 coords : TEXCOORD0, uniform input IN) : COLOR
	{
	#ifdef USE_SAMPLED
	return monitor_sample(tex, coords, 1./IN.output_size, IN);
	#else
	return vec4(monitor(tex, coords, IN), 1.);
	#endif
	}
	shaders = "1"
	shader0 = "shaders/pal-r57shell.cg"
	filter_linear0 = "false"
	wrap_mode0 = "clamp_to_border"
	mipmap_input0 = "false"
	alias0 = "ORIG_LINEARIZED"
	float_framebuffer0 = "false"
	srgb_framebuffer0 = "0"
	scale_type_x0 = "viewport"
	scale_x0 = "1.000000"
	scale_type_y0 = "source"
	scale_y0 = "1.000000"
	textures = "nes_lut"
	nes_lut = "resources/nes_lut.png"
	nes_lut_linear = "false"
	nes_lut_wrap_mode = "repeat"
	nes_lut_mipmap = "false"