-
-
Save bgolus/a07ed65602c009d5e2f753826e8078a0 to your computer and use it in GitHub Desktop.
Shader "WorldNormalFromDepthTexture" | |
{ | |
Properties { | |
[KeywordEnum(3 Tap, 4 Tap, Improved, Accurate)] _ReconstructionMethod ("Normal Reconstruction Method", Float) = 0 | |
} | |
SubShader | |
{ | |
Tags { "RenderType"="Transparent" "Queue"="Transparent" } | |
LOD 100 | |
Pass | |
{ | |
Cull Off | |
ZWrite Off | |
CGPROGRAM | |
#pragma vertex vert | |
#pragma fragment frag | |
#pragma shader_feature _RECONSTRUCTIONMETHOD_3_TAP _RECONSTRUCTIONMETHOD_4_TAP _RECONSTRUCTIONMETHOD_IMPROVED _RECONSTRUCTIONMETHOD_ACCURATE | |
#include "UnityCG.cginc" | |
struct appdata | |
{ | |
float4 vertex : POSITION; | |
}; | |
struct v2f | |
{ | |
float4 pos : SV_POSITION; | |
}; | |
v2f vert (appdata v) | |
{ | |
v2f o; | |
o.pos = UnityObjectToClipPos(v.vertex); | |
return o; | |
} | |
UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture); | |
float4 _CameraDepthTexture_TexelSize; | |
float getRawDepth(float2 uv) { return SAMPLE_DEPTH_TEXTURE_LOD(_CameraDepthTexture, float4(uv, 0.0, 0.0)); } | |
// inspired by keijiro's depth inverse projection | |
// https://github.com/keijiro/DepthInverseProjection | |
// constructs view space ray at the far clip plane from the screen uv | |
// then multiplies that ray by the linear 01 depth | |
float3 viewSpacePosAtScreenUV(float2 uv) | |
{ | |
float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z); | |
float rawDepth = getRawDepth(uv); | |
return viewSpaceRay * Linear01Depth(rawDepth); | |
} | |
float3 viewSpacePosAtPixelPosition(float2 vpos) | |
{ | |
float2 uv = vpos * _CameraDepthTexture_TexelSize.xy; | |
return viewSpacePosAtScreenUV(uv); | |
} | |
#if defined(_RECONSTRUCTIONMETHOD_3_TAP) | |
// naive 3 tap normal reconstruction | |
// accurate mid triangle normals, slightly diagonally offset on edges | |
// artifacts on depth disparities | |
// unity's compiled fragment shader stats: 41 math, 3 tex | |
half3 viewNormalAtPixelPosition(float2 vpos) | |
{ | |
// get current pixel's view space position | |
half3 viewSpacePos_c = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 0.0)); | |
// get view space position at 1 pixel offsets in each major direction | |
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0)); | |
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0)); | |
// get the difference between the current and each offset position | |
half3 hDeriv = viewSpacePos_r - viewSpacePos_c; | |
half3 vDeriv = viewSpacePos_u - viewSpacePos_c; | |
// get view space normal from the cross product of the diffs | |
half3 viewNormal = normalize(cross(hDeriv, vDeriv)); | |
return viewNormal; | |
} | |
#elif defined(_RECONSTRUCTIONMETHOD_4_TAP) | |
// naive 4 tap normal reconstruction | |
// accurate mid triangle normals compared to 3 tap | |
// no diagonal offset on edges, but sharp details are softened | |
// worse artifacts on depth disparities than 3 tap | |
// probably little reason to use this over the 3 tap approach | |
// unity's compiled fragment shader stats: 50 math, 4 tex | |
half3 viewNormalAtPixelPosition(float2 vpos) | |
{ | |
// get view space position at 1 pixel offsets in each major direction | |
half3 viewSpacePos_l = viewSpacePosAtPixelPosition(vpos + float2(-1.0, 0.0)); | |
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0)); | |
half3 viewSpacePos_d = viewSpacePosAtPixelPosition(vpos + float2( 0.0,-1.0)); | |
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0)); | |
// get the difference between the current and each offset position | |
half3 hDeriv = viewSpacePos_r - viewSpacePos_l; | |
half3 vDeriv = viewSpacePos_u - viewSpacePos_d; | |
// get view space normal from the cross product of the diffs | |
half3 viewNormal = normalize(cross(hDeriv, vDeriv)); | |
return viewNormal; | |
} | |
#elif defined(_RECONSTRUCTIONMETHOD_IMPROVED) | |
// base on János Turánszki's Improved Normal Reconstruction | |
// https://wickedengine.net/2019/09/22/improved-normal-reconstruction-from-depth/ | |
// this is a minor optimization over the original, using only 2 comparisons instead of 8 | |
// at the cost of two additional vector subtractions | |
// sharpness of 3 tap with better handling of depth disparities | |
// worse artifacts on convex edges than either 3 tap or 4 tap | |
// unity's compiled fragment shader stats: 62 math, 5 tex | |
half3 viewNormalAtPixelPosition(float2 vpos) | |
{ | |
// get current pixel's view space position | |
half3 viewSpacePos_c = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 0.0)); | |
// get view space position at 1 pixel offsets in each major direction | |
half3 viewSpacePos_l = viewSpacePosAtPixelPosition(vpos + float2(-1.0, 0.0)); | |
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0)); | |
half3 viewSpacePos_d = viewSpacePosAtPixelPosition(vpos + float2( 0.0,-1.0)); | |
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0)); | |
// get the difference between the current and each offset position | |
half3 l = viewSpacePos_c - viewSpacePos_l; | |
half3 r = viewSpacePos_r - viewSpacePos_c; | |
half3 d = viewSpacePos_c - viewSpacePos_d; | |
half3 u = viewSpacePos_u - viewSpacePos_c; | |
// pick horizontal and vertical diff with the smallest z difference | |
half3 hDeriv = abs(l.z) < abs(r.z) ? l : r; | |
half3 vDeriv = abs(d.z) < abs(u.z) ? d : u; | |
// get view space normal from the cross product of the two smallest offsets | |
half3 viewNormal = normalize(cross(hDeriv, vDeriv)); | |
return viewNormal; | |
} | |
#elif defined(_RECONSTRUCTIONMETHOD_ACCURATE) | |
// based on Yuwen Wu's Accurate Normal Reconstruction | |
// https://atyuwen.github.io/posts/normal-reconstruction/ | |
// basically as accurate as you can get! | |
// no artifacts on depth disparities | |
// no artifacts on edges | |
// artifacts on triangles that are <3 pixels across | |
// unity's compiled fragment shader stats: 66 math, 9 tex | |
half3 viewNormalAtPixelPosition(float2 vpos) | |
{ | |
// screen uv from vpos | |
float2 uv = vpos * _CameraDepthTexture_TexelSize.xy; | |
// current pixel's depth | |
float c = getRawDepth(uv); | |
// get current pixel's view space position | |
half3 viewSpacePos_c = viewSpacePosAtScreenUV(uv); | |
// get view space position at 1 pixel offsets in each major direction | |
half3 viewSpacePos_l = viewSpacePosAtScreenUV(uv + float2(-1.0, 0.0) * _CameraDepthTexture_TexelSize.xy); | |
half3 viewSpacePos_r = viewSpacePosAtScreenUV(uv + float2( 1.0, 0.0) * _CameraDepthTexture_TexelSize.xy); | |
half3 viewSpacePos_d = viewSpacePosAtScreenUV(uv + float2( 0.0,-1.0) * _CameraDepthTexture_TexelSize.xy); | |
half3 viewSpacePos_u = viewSpacePosAtScreenUV(uv + float2( 0.0, 1.0) * _CameraDepthTexture_TexelSize.xy); | |
// get the difference between the current and each offset position | |
half3 l = viewSpacePos_c - viewSpacePos_l; | |
half3 r = viewSpacePos_r - viewSpacePos_c; | |
half3 d = viewSpacePos_c - viewSpacePos_d; | |
half3 u = viewSpacePos_u - viewSpacePos_c; | |
// get depth values at 1 & 2 pixels offsets from current along the horizontal axis | |
half4 H = half4( | |
getRawDepth(uv + float2(-1.0, 0.0) * _CameraDepthTexture_TexelSize.xy), | |
getRawDepth(uv + float2( 1.0, 0.0) * _CameraDepthTexture_TexelSize.xy), | |
getRawDepth(uv + float2(-2.0, 0.0) * _CameraDepthTexture_TexelSize.xy), | |
getRawDepth(uv + float2( 2.0, 0.0) * _CameraDepthTexture_TexelSize.xy) | |
); | |
// get depth values at 1 & 2 pixels offsets from current along the vertical axis | |
half4 V = half4( | |
getRawDepth(uv + float2(0.0,-1.0) * _CameraDepthTexture_TexelSize.xy), | |
getRawDepth(uv + float2(0.0, 1.0) * _CameraDepthTexture_TexelSize.xy), | |
getRawDepth(uv + float2(0.0,-2.0) * _CameraDepthTexture_TexelSize.xy), | |
getRawDepth(uv + float2(0.0, 2.0) * _CameraDepthTexture_TexelSize.xy) | |
); | |
// current pixel's depth difference from slope of offset depth samples | |
// differs from original article because we're using non-linear depth values | |
// see article's comments | |
half2 he = abs((2 * H.xy - H.zw) - c); | |
half2 ve = abs((2 * V.xy - V.zw) - c); | |
// pick horizontal and vertical diff with the smallest depth difference from slopes | |
half3 hDeriv = he.x < he.y ? l : r; | |
half3 vDeriv = ve.x < ve.y ? d : u; | |
// get view space normal from the cross product of the best derivatives | |
half3 viewNormal = normalize(cross(hDeriv, vDeriv)); | |
return viewNormal; | |
} | |
#endif | |
half4 frag (v2f i) : SV_Target | |
{ | |
// get view space normal at the current pixel position | |
half3 viewNormal = viewNormalAtPixelPosition(i.pos.xy); | |
// transform normal from view space to world space | |
half3 WorldNormal = mul((float3x3)unity_MatrixInvV, viewNormal); | |
// alternative that should work when using this for post processing | |
// we have to invert the view normal z because Unity's view space z is flipped | |
// thus the above code using unity_MatrixInvV is doing this flip, but the | |
// unity_CameraToWorld does not flip the z, so we have to do it manually | |
// half3 WorldNormal = mul((float3x3)unity_CameraToWorld, viewNormal * half3(1.0, 1.0, -1.0)); | |
// visualize normal (assumes you're using linear space rendering) | |
return half4(GammaToLinearSpace(WorldNormal.xyz * 0.5 + 0.5), 1.0); | |
} | |
ENDCG | |
} | |
} | |
} |
The camera GameObject Transform is indeed +Z forward. But Unity's View Space matrix is -Z forward. The reasons for View Space having a -Z forward likely has to do with legacy implementation examples for OpenGL having a -Z forward View Space, and thus a lot of OpenGL implementations use that same View Space setup. It's important to remember that Unity was originally an OpenGL only engine, specifically a MacOS, and then MacOS and iPhone only OpenGL engine. When they eventually added Windows and Direct3D support they put work into keeping as much the same between OpenGL and Direct3D, which includes keeping the -Z forward View Space among other things.
Why wouldn't Unity have changed the handedness of View Space to be left handed instead of right handed when the rest of the engine is left handed? Especially since the handedness and even the forward axis for View Space is entirely arbitrary? Probably because the projection transform calculations also depend on the orientation of the view space, and it was probably easier to flip the handedness of the view matrix than figure out how to calculate the projection matrix from a left handed View Space when all of the code examples available in OpenGL books and SDKs would have likely also been assuming a right handed View Space.
So here we are 15 year later still having to deal with that.
But why do I say we only need to flip the Z when using the alternative code? Because Unity's "View" matrices and "Camera" matrices are different! That's what the rest of that comment mentions. The UNITY_MATRIX_V
and UNITY_MATRIX_I_V
matrices are using a -Z forward right handed matrix for View Space. The unity_WorldToCamera
and unity_CameraToWorld
matrices are not, they're +Z forward left handed matrices.
Similar to how the UNITY_MATRIX_V
and unity_WorldToCamera
matrices are similar but not quite the same, the UNITY_MATRIX_P
and unity_CameraProjection
matrices also do not match. (And you'll notice I'm using unity_CameraInvProjection
in the code as there's no built in UNITY_MATRIX_I_P
matrix.) So you might assume the unity_CameraProjection
would match up with the unity_WorldToCamera
matrix and work with the right handed View Matrix matrix ... but you'd be wrong. The unity_CameraProjection
is the original, unmodified OpenGL projection matrix that is on the Camera component. Where as the UNITY_MATRIX_P
is the projection matrix that's been transformed to work for the current graphics API, as OpenGL needs a slightly different projection matrix from all other graphics APIs (which otherwise all use the same projection matrix). Basically that means the unity_CameraProjection
matrix (and its inverse unity_CameraInvProjection
) still work a -Z forward right handed View Space.
Thank you for your elaboration!
I made a quick test in Unity SRP (DX11) project and yes, the UNITY_MATRIX_V
is indeed a -Z forward right handed matrix. unity_WorldToCamera
is the actual camera transformation matrix, using a +Z forward left handed coordinate system.
UNITY_MATRIX_P
in DX11 graphics API is this (hope I made this correctly):
which projects near clip plane to 1, far clip plane to 0, and screen space uv starts from top left corner. Unity uses -1
in UNITY_MATRIX_P
instead of 1
to handle -Z forward right handed view matrix, which is slightly different from common DirectX implementations.
unity_CameraProjection
is this:
which projects near clip plane to -1, far clip plane to 1, and screen space uv starts from bottom left corner.
Also I found that the handedness inconsistency is mentioned in Unity's github repository Unity-Technologies/Graphics.
unity_WorldToCamera
is the actual camera transformation matrix, using a +Z forward left handed coordinate system.
Not exactly. The camera's GameObject Transform can have scale, or even shear in cases of complex parental scaling. But unity_WorldToCamera
will always have a uniform scale of 1. It'll be equivalent to:
Matrix4x4.TRS(camera.transform.position, camera.transform.rotation, Vector3.one)
UNITY_MATRIX_P
in DX11 graphics API is this (hope I made this correctly):
Unity may also flip the projection matrix's Y axis in an attempt to match OpenGL's behavior, which is extra. fun because they try to match the fact OpenGL has inconsistent behaviour when rendering to MSAA vs non MSAA targets.
Hey bgolus,
Thank you for the examples!
I modified a previous version to work with single pass stereo rendering and 'post processing'. I'm using cmd.drawmesh from this example: https://docs.unity3d.com/Packages/com.unity.render-pipelines.universal@12.1/manual/renderer-features/how-to-fullscreen-blit-in-xr-spi.html
The below script seems to work well. I mainly just setup the proper stereo macros and corrected the vert function since it's already rendering in clip space.
I'm wondering if you know of any examples that work with this approach to reconstruct world position. You mentioned in a post that it's possible to do it without computing the inverse matrix in c#. I haven't been able to find an example that works in this context though. The documentation around single pass instance is sparse.
Shader "ColorBlit"
{
SubShader
{
Tags { "RenderType"="Transparent" "Queue"="Transparent" }
LOD 100
Pass
{
Cull Off
ZWrite Off
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
#include "UnityCG.cginc"
struct appdata
{
float4 vertex : POSITION;
UNITY_VERTEX_INPUT_INSTANCE_ID
};
struct v2f
{
float4 pos : SV_POSITION;
float2 uv : TEXCOORD0;
UNITY_VERTEX_OUTPUT_STEREO
};
v2f vert (appdata v)
{
v2f o;
UNITY_SETUP_INSTANCE_ID(v);
UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);
// // Note: The pass is setup with a mesh already in clip
// // space, that's why, it's enough to just output vertex
// // positions
o.pos = float4(v.vertex.xyz, 1.0);
#if UNITY_UV_STARTS_AT_TOP
o.pos.y *= -1;
#endif
return o;
}
UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture);
float4 _CameraDepthTexture_TexelSize;
// inspired by keijiro's depth inverse projection
// https://github.com/keijiro/DepthInverseProjection
// constructs view space ray at the far clip plane from the vpos
// then multiplies that ray by the linear 01 depth
float3 viewSpacePosAtPixelPosition(float2 vpos)
{
float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);
float rawDepth = SAMPLE_DEPTH_TEXTURE_LOD(_CameraDepthTexture, float4(uv, 0.0, 0.0));
return viewSpaceRay * Linear01Depth(rawDepth);
}
// inspired by János Turánszki's improved normal reconstruction technique
// https://wickedengine.net/2019/09/22/improved-normal-reconstruction-from-depth/
// this is a minor optimization over the original, using only 2 comparisons instead of 8
// at the cost of two additional vector subtractions
half4 frag (v2f i) : SV_Target
{
UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);
// get current pixel's view space position
half3 viewSpacePos_c = viewSpacePosAtPixelPosition(i.pos.xy + float2( 0.0, 0.0));
// if depth is at the far plane, then assume skybox
// if (abs(viewSpacePos_c.z) >= _ProjectionParams.z)
// return 0;
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_l = viewSpacePosAtPixelPosition(i.pos.xy + float2(-1.0, 0.0));
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(i.pos.xy + float2( 1.0, 0.0));
half3 viewSpacePos_d = viewSpacePosAtPixelPosition(i.pos.xy + float2( 0.0,-1.0));
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(i.pos.xy + float2( 0.0, 1.0));
// get the difference between the current and each offset position
half3 l = viewSpacePos_c - viewSpacePos_l;
half3 r = viewSpacePos_r - viewSpacePos_c;
half3 d = viewSpacePos_c - viewSpacePos_d;
half3 u = viewSpacePos_u - viewSpacePos_c;
// pick horizontal and vertical diff with the smallest z difference
half3 h = abs(l.z) < abs(r.z) ? l : r;
half3 v = abs(d.z) < abs(u.z) ? d : u;
// get view space normal from the cross product of the two smallest offsets
half3 viewNormal = normalize(cross(h, v));
// transform normal from view space to world space
half3 WorldNormal = mul((float3x3)unity_MatrixInvV, viewNormal);
// visualize normal (assumes you're using linear space rendering)
return half4(GammaToLinearSpace(WorldNormal.xyz * 0.5 + 0.5), 1.0);
}
ENDCG
}
}
}
I think I'm close. This looks correct when the camera is at 0,0,0. It would make sense that I need to somehow translate this by worldpos.
Shader "WorldNormalFromDepthTexture"
{
Properties {
[KeywordEnum(3 Tap, 4 Tap, Improved, Accurate)] _ReconstructionMethod ("Normal Reconstruction Method", Float) = 0
}
SubShader
{
Tags { "RenderType"="Transparent" "Queue"="Transparent" }
LOD 100
Pass
{
Cull Off
ZWrite Off
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
#pragma shader_feature _RECONSTRUCTIONMETHOD_3_TAP _RECONSTRUCTIONMETHOD_4_TAP _RECONSTRUCTIONMETHOD_IMPROVED _RECONSTRUCTIONMETHOD_ACCURATE
#include "UnityCG.cginc"
struct appdata
{
float4 vertex : POSITION;
UNITY_VERTEX_INPUT_INSTANCE_ID
};
struct v2f
{
float4 pos : SV_POSITION;
UNITY_VERTEX_OUTPUT_STEREO
};
v2f vert (appdata v)
{
v2f o;
UNITY_SETUP_INSTANCE_ID(v);
UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);
//o.pos = UnityObjectToClipPos(v.vertex);
o.pos = float4(v.vertex.xyz, 1.0);
return o;
}
UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture);
float4 _CameraDepthTexture_TexelSize;
float getRawDepth(float2 uv) { return SAMPLE_DEPTH_TEXTURE_LOD(_CameraDepthTexture, float4(uv, 0.0, 0.0)); }
// inspired by keijiro's depth inverse projection
// https://github.com/keijiro/DepthInverseProjection
// constructs view space ray at the far clip plane from the screen uv
// then multiplies that ray by the linear 01 depth
float3 viewSpacePosAtScreenUV(float2 uv)
{
float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);
float rawDepth = getRawDepth(uv);
return viewSpaceRay * Linear01Depth(rawDepth);
}
float3 viewSpacePosAtPixelPosition(float2 vpos)
{
float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
return viewSpacePosAtScreenUV(uv);
}
#if defined(_RECONSTRUCTIONMETHOD_3_TAP)
// naive 3 tap normal reconstruction
// accurate mid triangle normals, slightly diagonally offset on edges
// artifacts on depth disparities
// unity's compiled fragment shader stats: 41 math, 3 tex
half3 viewNormalAtPixelPosition(float2 vpos)
{
// get current pixel's view space position
half3 viewSpacePos_c = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 0.0));
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));
// get the difference between the current and each offset position
half3 hDeriv = viewSpacePos_r - viewSpacePos_c;
half3 vDeriv = viewSpacePos_u - viewSpacePos_c;
// get view space normal from the cross product of the diffs
half3 viewNormal = normalize(cross(hDeriv, vDeriv));
return viewNormal;
}
#elif defined(_RECONSTRUCTIONMETHOD_4_TAP)
// naive 4 tap normal reconstruction
// accurate mid triangle normals compared to 3 tap
// no diagonal offset on edges, but sharp details are softened
// worse artifacts on depth disparities than 3 tap
// probably little reason to use this over the 3 tap approach
// unity's compiled fragment shader stats: 50 math, 4 tex
half3 viewNormalAtPixelPosition(float2 vpos)
{
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_l = viewSpacePosAtPixelPosition(vpos + float2(-1.0, 0.0));
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
half3 viewSpacePos_d = viewSpacePosAtPixelPosition(vpos + float2( 0.0,-1.0));
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));
// get the difference between the current and each offset position
half3 hDeriv = viewSpacePos_r - viewSpacePos_l;
half3 vDeriv = viewSpacePos_u - viewSpacePos_d;
// get view space normal from the cross product of the diffs
half3 viewNormal = normalize(cross(hDeriv, vDeriv));
return viewNormal;
}
#elif defined(_RECONSTRUCTIONMETHOD_IMPROVED)
// base on János Turánszki's Improved Normal Reconstruction
// https://wickedengine.net/2019/09/22/improved-normal-reconstruction-from-depth/
// this is a minor optimization over the original, using only 2 comparisons instead of 8
// at the cost of two additional vector subtractions
// sharpness of 3 tap with better handling of depth disparities
// worse artifacts on convex edges than either 3 tap or 4 tap
// unity's compiled fragment shader stats: 62 math, 5 tex
half3 viewNormalAtPixelPosition(float2 vpos)
{
// get current pixel's view space position
half3 viewSpacePos_c = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 0.0));
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_l = viewSpacePosAtPixelPosition(vpos + float2(-1.0, 0.0));
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
half3 viewSpacePos_d = viewSpacePosAtPixelPosition(vpos + float2( 0.0,-1.0));
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));
// get the difference between the current and each offset position
half3 l = viewSpacePos_c - viewSpacePos_l;
half3 r = viewSpacePos_r - viewSpacePos_c;
half3 d = viewSpacePos_c - viewSpacePos_d;
half3 u = viewSpacePos_u - viewSpacePos_c;
// pick horizontal and vertical diff with the smallest z difference
half3 hDeriv = abs(l.z) < abs(r.z) ? l : r;
half3 vDeriv = abs(d.z) < abs(u.z) ? d : u;
// get view space normal from the cross product of the two smallest offsets
half3 viewNormal = normalize(cross(hDeriv, vDeriv));
return viewNormal;
}
#elif defined(_RECONSTRUCTIONMETHOD_ACCURATE)
// based on Yuwen Wu's Accurate Normal Reconstruction
// https://atyuwen.github.io/posts/normal-reconstruction/
// basically as accurate as you can get!
// no artifacts on depth disparities
// no artifacts on edges
// artifacts on triangles that are <3 pixels across
// unity's compiled fragment shader stats: 66 math, 9 tex
half3 viewNormalAtPixelPosition(float2 vpos)
{
// screen uv from vpos
float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
// current pixel's depth
float c = getRawDepth(uv);
// get current pixel's view space position
half3 viewSpacePos_c = viewSpacePosAtScreenUV(uv);
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_l = viewSpacePosAtScreenUV(uv + float2(-1.0, 0.0) * _CameraDepthTexture_TexelSize.xy);
half3 viewSpacePos_r = viewSpacePosAtScreenUV(uv + float2( 1.0, 0.0) * _CameraDepthTexture_TexelSize.xy);
half3 viewSpacePos_d = viewSpacePosAtScreenUV(uv + float2( 0.0,-1.0) * _CameraDepthTexture_TexelSize.xy);
half3 viewSpacePos_u = viewSpacePosAtScreenUV(uv + float2( 0.0, 1.0) * _CameraDepthTexture_TexelSize.xy);
// get the difference between the current and each offset position
half3 l = viewSpacePos_c - viewSpacePos_l;
half3 r = viewSpacePos_r - viewSpacePos_c;
half3 d = viewSpacePos_c - viewSpacePos_d;
half3 u = viewSpacePos_u - viewSpacePos_c;
// get depth values at 1 & 2 pixels offsets from current along the horizontal axis
half4 H = half4(
getRawDepth(uv + float2(-1.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2( 1.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2(-2.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2( 2.0, 0.0) * _CameraDepthTexture_TexelSize.xy)
);
// get depth values at 1 & 2 pixels offsets from current along the vertical axis
half4 V = half4(
getRawDepth(uv + float2(0.0,-1.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2(0.0, 1.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2(0.0,-2.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2(0.0, 2.0) * _CameraDepthTexture_TexelSize.xy)
);
// current pixel's depth difference from slope of offset depth samples
// differs from original article because we're using non-linear depth values
// see article's comments
half2 he = abs((2 * H.xy - H.zw) - c);
half2 ve = abs((2 * V.xy - V.zw) - c);
// pick horizontal and vertical diff with the smallest depth difference from slopes
half3 hDeriv = he.x < he.y ? l : r;
half3 vDeriv = ve.x < ve.y ? d : u;
// get view space normal from the cross product of the best derivatives
half3 viewNormal = normalize(cross(hDeriv, vDeriv));
return viewNormal;
}
#endif
half4 frag (v2f i) : SV_Target
{
UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);
// get view space normal at the current pixel position
half3 viewNormal = viewNormalAtPixelPosition(i.pos.xy);
// transform normal from view space to world space
half3 WorldNormal = mul((float3x3)unity_MatrixInvV, viewNormal);
float2 vpos = i.pos.xy;
float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
float3 viewSpacePos = viewSpacePosAtScreenUV(uv);
half3 worldPos = mul((float3x3)unity_MatrixInvV, viewSpacePos);
return half4(worldPos,1);
//float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);
// alternative that should work when using this for post processing
// we have to invert the view normal z because Unity's view space z is flipped
// thus the above code using unity_MatrixInvV is doing this flip, but the
// unity_CameraToWorld does not flip the z, so we have to do it manually
//half3 WorldNormal = mul((float3x3)unity_CameraToWorld, viewNormal * half3(1.0, 1.0, -1.0));
// visualize normal (assumes you're using linear space rendering)
return half4(GammaToLinearSpace(WorldNormal.xyz * 0.5 + 0.5), 1.0);
}
ENDCG
}
}
}
Sorry for the onslaught, but I think I got it!
half3 worldPos = mul((float3x3)unity_MatrixInvV, viewSpacePos) + _WorldSpaceCameraPos;
@bgolus Thanks for this writeup!
Do you happen to know how things change for projection matrices that are off center? In ARKit land the projection matrix of the camera has slightly offset left/right and top/bottom frustum, and it causes the normals to change slightly as the device rotates.
Thinking it has something to do with
float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);
Thoughts?
PosWS = ComputeWorldSpacePosition( screen_space_uv , depth , UNITY_MATRIX_I_VP );
bgolus I have some questions.
In line 230 you said "we have to invert the view normal z because Unity's view space z is flipped", can you explain this in more detail? In my previous understanding, the camera in unity is positioned at view space (0, 0, 0), looking towards +Z axis, and using a left handed coordinate system.
If that is the case, the view space normal should be equal to the cross product of
vDeriv
andhDeriv
, therefore we should havehalf3 viewNormal = normalize(cross(vDeriv, hDeriv));
. I made a diagram for this (sorry for my poor diagram):Also even though DirectX flips uv.y in its projection matrix, I have noticed that when doing post-processing blit, the screen space bottom left corner's uv value is (0, 0). So I suppose
UNITY_UV_STARTS_AT_TOP
is not the issue here?