Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@bgolus
Last active March 28, 2024 22:54
Show Gist options
  • Star 86 You must be signed in to star a gist
  • Fork 11 You must be signed in to fork a gist
  • Save bgolus/a07ed65602c009d5e2f753826e8078a0 to your computer and use it in GitHub Desktop.
Save bgolus/a07ed65602c009d5e2f753826e8078a0 to your computer and use it in GitHub Desktop.
Different methods for getting World Normal from Depth Texture, without any external script dependencies.
Shader "WorldNormalFromDepthTexture"
{
Properties {
[KeywordEnum(3 Tap, 4 Tap, Improved, Accurate)] _ReconstructionMethod ("Normal Reconstruction Method", Float) = 0
}
SubShader
{
Tags { "RenderType"="Transparent" "Queue"="Transparent" }
LOD 100
Pass
{
Cull Off
ZWrite Off
CGPROGRAM
#pragma vertex vert
#pragma fragment frag
#pragma shader_feature _RECONSTRUCTIONMETHOD_3_TAP _RECONSTRUCTIONMETHOD_4_TAP _RECONSTRUCTIONMETHOD_IMPROVED _RECONSTRUCTIONMETHOD_ACCURATE
#include "UnityCG.cginc"
struct appdata
{
float4 vertex : POSITION;
};
struct v2f
{
float4 pos : SV_POSITION;
};
v2f vert (appdata v)
{
v2f o;
o.pos = UnityObjectToClipPos(v.vertex);
return o;
}
UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture);
float4 _CameraDepthTexture_TexelSize;
float getRawDepth(float2 uv) { return SAMPLE_DEPTH_TEXTURE_LOD(_CameraDepthTexture, float4(uv, 0.0, 0.0)); }
// inspired by keijiro's depth inverse projection
// https://github.com/keijiro/DepthInverseProjection
// constructs view space ray at the far clip plane from the screen uv
// then multiplies that ray by the linear 01 depth
float3 viewSpacePosAtScreenUV(float2 uv)
{
float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);
float rawDepth = getRawDepth(uv);
return viewSpaceRay * Linear01Depth(rawDepth);
}
float3 viewSpacePosAtPixelPosition(float2 vpos)
{
float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
return viewSpacePosAtScreenUV(uv);
}
#if defined(_RECONSTRUCTIONMETHOD_3_TAP)
// naive 3 tap normal reconstruction
// accurate mid triangle normals, slightly diagonally offset on edges
// artifacts on depth disparities
// unity's compiled fragment shader stats: 41 math, 3 tex
half3 viewNormalAtPixelPosition(float2 vpos)
{
// get current pixel's view space position
half3 viewSpacePos_c = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 0.0));
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));
// get the difference between the current and each offset position
half3 hDeriv = viewSpacePos_r - viewSpacePos_c;
half3 vDeriv = viewSpacePos_u - viewSpacePos_c;
// get view space normal from the cross product of the diffs
half3 viewNormal = normalize(cross(hDeriv, vDeriv));
return viewNormal;
}
#elif defined(_RECONSTRUCTIONMETHOD_4_TAP)
// naive 4 tap normal reconstruction
// accurate mid triangle normals compared to 3 tap
// no diagonal offset on edges, but sharp details are softened
// worse artifacts on depth disparities than 3 tap
// probably little reason to use this over the 3 tap approach
// unity's compiled fragment shader stats: 50 math, 4 tex
half3 viewNormalAtPixelPosition(float2 vpos)
{
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_l = viewSpacePosAtPixelPosition(vpos + float2(-1.0, 0.0));
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
half3 viewSpacePos_d = viewSpacePosAtPixelPosition(vpos + float2( 0.0,-1.0));
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));
// get the difference between the current and each offset position
half3 hDeriv = viewSpacePos_r - viewSpacePos_l;
half3 vDeriv = viewSpacePos_u - viewSpacePos_d;
// get view space normal from the cross product of the diffs
half3 viewNormal = normalize(cross(hDeriv, vDeriv));
return viewNormal;
}
#elif defined(_RECONSTRUCTIONMETHOD_IMPROVED)
// base on János Turánszki's Improved Normal Reconstruction
// https://wickedengine.net/2019/09/22/improved-normal-reconstruction-from-depth/
// this is a minor optimization over the original, using only 2 comparisons instead of 8
// at the cost of two additional vector subtractions
// sharpness of 3 tap with better handling of depth disparities
// worse artifacts on convex edges than either 3 tap or 4 tap
// unity's compiled fragment shader stats: 62 math, 5 tex
half3 viewNormalAtPixelPosition(float2 vpos)
{
// get current pixel's view space position
half3 viewSpacePos_c = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 0.0));
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_l = viewSpacePosAtPixelPosition(vpos + float2(-1.0, 0.0));
half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
half3 viewSpacePos_d = viewSpacePosAtPixelPosition(vpos + float2( 0.0,-1.0));
half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));
// get the difference between the current and each offset position
half3 l = viewSpacePos_c - viewSpacePos_l;
half3 r = viewSpacePos_r - viewSpacePos_c;
half3 d = viewSpacePos_c - viewSpacePos_d;
half3 u = viewSpacePos_u - viewSpacePos_c;
// pick horizontal and vertical diff with the smallest z difference
half3 hDeriv = abs(l.z) < abs(r.z) ? l : r;
half3 vDeriv = abs(d.z) < abs(u.z) ? d : u;
// get view space normal from the cross product of the two smallest offsets
half3 viewNormal = normalize(cross(hDeriv, vDeriv));
return viewNormal;
}
#elif defined(_RECONSTRUCTIONMETHOD_ACCURATE)
// based on Yuwen Wu's Accurate Normal Reconstruction
// https://atyuwen.github.io/posts/normal-reconstruction/
// basically as accurate as you can get!
// no artifacts on depth disparities
// no artifacts on edges
// artifacts on triangles that are <3 pixels across
// unity's compiled fragment shader stats: 66 math, 9 tex
half3 viewNormalAtPixelPosition(float2 vpos)
{
// screen uv from vpos
float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
// current pixel's depth
float c = getRawDepth(uv);
// get current pixel's view space position
half3 viewSpacePos_c = viewSpacePosAtScreenUV(uv);
// get view space position at 1 pixel offsets in each major direction
half3 viewSpacePos_l = viewSpacePosAtScreenUV(uv + float2(-1.0, 0.0) * _CameraDepthTexture_TexelSize.xy);
half3 viewSpacePos_r = viewSpacePosAtScreenUV(uv + float2( 1.0, 0.0) * _CameraDepthTexture_TexelSize.xy);
half3 viewSpacePos_d = viewSpacePosAtScreenUV(uv + float2( 0.0,-1.0) * _CameraDepthTexture_TexelSize.xy);
half3 viewSpacePos_u = viewSpacePosAtScreenUV(uv + float2( 0.0, 1.0) * _CameraDepthTexture_TexelSize.xy);
// get the difference between the current and each offset position
half3 l = viewSpacePos_c - viewSpacePos_l;
half3 r = viewSpacePos_r - viewSpacePos_c;
half3 d = viewSpacePos_c - viewSpacePos_d;
half3 u = viewSpacePos_u - viewSpacePos_c;
// get depth values at 1 & 2 pixels offsets from current along the horizontal axis
half4 H = half4(
getRawDepth(uv + float2(-1.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2( 1.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2(-2.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2( 2.0, 0.0) * _CameraDepthTexture_TexelSize.xy)
);
// get depth values at 1 & 2 pixels offsets from current along the vertical axis
half4 V = half4(
getRawDepth(uv + float2(0.0,-1.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2(0.0, 1.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2(0.0,-2.0) * _CameraDepthTexture_TexelSize.xy),
getRawDepth(uv + float2(0.0, 2.0) * _CameraDepthTexture_TexelSize.xy)
);
// current pixel's depth difference from slope of offset depth samples
// differs from original article because we're using non-linear depth values
// see article's comments
half2 he = abs((2 * H.xy - H.zw) - c);
half2 ve = abs((2 * V.xy - V.zw) - c);
// pick horizontal and vertical diff with the smallest depth difference from slopes
half3 hDeriv = he.x < he.y ? l : r;
half3 vDeriv = ve.x < ve.y ? d : u;
// get view space normal from the cross product of the best derivatives
half3 viewNormal = normalize(cross(hDeriv, vDeriv));
return viewNormal;
}
#endif
half4 frag (v2f i) : SV_Target
{
// get view space normal at the current pixel position
half3 viewNormal = viewNormalAtPixelPosition(i.pos.xy);
// transform normal from view space to world space
half3 WorldNormal = mul((float3x3)unity_MatrixInvV, viewNormal);
// alternative that should work when using this for post processing
// we have to invert the view normal z because Unity's view space z is flipped
// thus the above code using unity_MatrixInvV is doing this flip, but the
// unity_CameraToWorld does not flip the z, so we have to do it manually
// half3 WorldNormal = mul((float3x3)unity_CameraToWorld, viewNormal * half3(1.0, 1.0, -1.0));
// visualize normal (assumes you're using linear space rendering)
return half4(GammaToLinearSpace(WorldNormal.xyz * 0.5 + 0.5), 1.0);
}
ENDCG
}
}
}
@zznewclear13
Copy link

bgolus I have some questions.

In line 230 you said "we have to invert the view normal z because Unity's view space z is flipped", can you explain this in more detail? In my previous understanding, the camera in unity is positioned at view space (0, 0, 0), looking towards +Z axis, and using a left handed coordinate system.

If that is the case, the view space normal should be equal to the cross product of vDeriv and hDeriv, therefore we should have half3 viewNormal = normalize(cross(vDeriv, hDeriv));. I made a diagram for this (sorry for my poor diagram):

viewNormal

Also even though DirectX flips uv.y in its projection matrix, I have noticed that when doing post-processing blit, the screen space bottom left corner's uv value is (0, 0). So I suppose UNITY_UV_STARTS_AT_TOP is not the issue here?

@bgolus
Copy link
Author

bgolus commented Jan 24, 2022

The camera GameObject Transform is indeed +Z forward. But Unity's View Space matrix is -Z forward. The reasons for View Space having a -Z forward likely has to do with legacy implementation examples for OpenGL having a -Z forward View Space, and thus a lot of OpenGL implementations use that same View Space setup. It's important to remember that Unity was originally an OpenGL only engine, specifically a MacOS, and then MacOS and iPhone only OpenGL engine. When they eventually added Windows and Direct3D support they put work into keeping as much the same between OpenGL and Direct3D, which includes keeping the -Z forward View Space among other things.

Why wouldn't Unity have changed the handedness of View Space to be left handed instead of right handed when the rest of the engine is left handed? Especially since the handedness and even the forward axis for View Space is entirely arbitrary? Probably because the projection transform calculations also depend on the orientation of the view space, and it was probably easier to flip the handedness of the view matrix than figure out how to calculate the projection matrix from a left handed View Space when all of the code examples available in OpenGL books and SDKs would have likely also been assuming a right handed View Space.

So here we are 15 year later still having to deal with that.

But why do I say we only need to flip the Z when using the alternative code? Because Unity's "View" matrices and "Camera" matrices are different! That's what the rest of that comment mentions. The UNITY_MATRIX_V and UNITY_MATRIX_I_V matrices are using a -Z forward right handed matrix for View Space. The unity_WorldToCamera and unity_CameraToWorld matrices are not, they're +Z forward left handed matrices.

Similar to how the UNITY_MATRIX_V and unity_WorldToCamera matrices are similar but not quite the same, the UNITY_MATRIX_P and unity_CameraProjection matrices also do not match. (And you'll notice I'm using unity_CameraInvProjection in the code as there's no built in UNITY_MATRIX_I_P matrix.) So you might assume the unity_CameraProjection would match up with the unity_WorldToCamera matrix and work with the right handed View Matrix matrix ... but you'd be wrong. The unity_CameraProjection is the original, unmodified OpenGL projection matrix that is on the Camera component. Where as the UNITY_MATRIX_P is the projection matrix that's been transformed to work for the current graphics API, as OpenGL needs a slightly different projection matrix from all other graphics APIs (which otherwise all use the same projection matrix). Basically that means the unity_CameraProjection matrix (and its inverse unity_CameraInvProjection) still work a -Z forward right handed View Space.

@zznewclear13
Copy link

Thank you for your elaboration!

I made a quick test in Unity SRP (DX11) project and yes, the UNITY_MATRIX_V is indeed a -Z forward right handed matrix. unity_WorldToCamera is the actual camera transformation matrix, using a +Z forward left handed coordinate system.

UNITY_MATRIX_P in DX11 graphics API is this (hope I made this correctly):

which projects near clip plane to 1, far clip plane to 0, and screen space uv starts from top left corner. Unity uses -1 in UNITY_MATRIX_P instead of 1 to handle -Z forward right handed view matrix, which is slightly different from common DirectX implementations.

unity_CameraProjection is this:

which projects near clip plane to -1, far clip plane to 1, and screen space uv starts from bottom left corner.

Also I found that the handedness inconsistency is mentioned in Unity's github repository Unity-Technologies/Graphics.

@bgolus
Copy link
Author

bgolus commented Jan 25, 2022

unity_WorldToCamera is the actual camera transformation matrix, using a +Z forward left handed coordinate system.

Not exactly. The camera's GameObject Transform can have scale, or even shear in cases of complex parental scaling. But unity_WorldToCamera will always have a uniform scale of 1. It'll be equivalent to:
Matrix4x4.TRS(camera.transform.position, camera.transform.rotation, Vector3.one)

UNITY_MATRIX_P in DX11 graphics API is this (hope I made this correctly):

Unity may also flip the projection matrix's Y axis in an attempt to match OpenGL's behavior, which is extra. fun because they try to match the fact OpenGL has inconsistent behaviour when rendering to MSAA vs non MSAA targets.

@pajama
Copy link

pajama commented Mar 24, 2022

Hey bgolus,
Thank you for the examples!
I modified a previous version to work with single pass stereo rendering and 'post processing'. I'm using cmd.drawmesh from this example: https://docs.unity3d.com/Packages/com.unity.render-pipelines.universal@12.1/manual/renderer-features/how-to-fullscreen-blit-in-xr-spi.html

The below script seems to work well. I mainly just setup the proper stereo macros and corrected the vert function since it's already rendering in clip space.

I'm wondering if you know of any examples that work with this approach to reconstruct world position. You mentioned in a post that it's possible to do it without computing the inverse matrix in c#. I haven't been able to find an example that works in this context though. The documentation around single pass instance is sparse.

Shader "ColorBlit"
{
    SubShader
    {
        Tags { "RenderType"="Transparent" "Queue"="Transparent" }
        LOD 100
 
        Pass
        {
            Cull Off
            ZWrite Off
 
            CGPROGRAM
            #pragma vertex vert
            #pragma fragment frag
 
            #include "UnityCG.cginc"
 
            struct appdata
            {
                float4 vertex : POSITION;
                UNITY_VERTEX_INPUT_INSTANCE_ID
            };
 
            struct v2f
            {
                float4 pos : SV_POSITION;
                float2  uv          : TEXCOORD0;
                UNITY_VERTEX_OUTPUT_STEREO
            };
 
            v2f vert (appdata v)
            {
                v2f o;
                 UNITY_SETUP_INSTANCE_ID(v);
                UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);


                // // Note: The pass is setup with a mesh already in clip
//                 // space, that's why, it's enough to just output vertex
//                 // positions
                o.pos = float4(v.vertex.xyz, 1.0);

                #if UNITY_UV_STARTS_AT_TOP
                o.pos.y *= -1;
                #endif
                
                

                return o;
            }
 
            UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture);
            float4 _CameraDepthTexture_TexelSize;
 
            // inspired by keijiro's depth inverse projection
            // https://github.com/keijiro/DepthInverseProjection
            // constructs view space ray at the far clip plane from the vpos
            // then multiplies that ray by the linear 01 depth
            float3 viewSpacePosAtPixelPosition(float2 vpos)
            {
                float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
                float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);
                float rawDepth = SAMPLE_DEPTH_TEXTURE_LOD(_CameraDepthTexture, float4(uv, 0.0, 0.0));
                return viewSpaceRay * Linear01Depth(rawDepth);
            }
 
            // inspired by János Turánszki's improved normal reconstruction technique
            // https://wickedengine.net/2019/09/22/improved-normal-reconstruction-from-depth/
            // this is a minor optimization over the original, using only 2 comparisons instead of 8
            // at the cost of two additional vector subtractions
            half4 frag (v2f i) : SV_Target
            {
                UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);
                // get current pixel's view space position
                half3 viewSpacePos_c = viewSpacePosAtPixelPosition(i.pos.xy + float2( 0.0, 0.0));
 
                // if depth is at the far plane, then assume skybox
                // if (abs(viewSpacePos_c.z) >= _ProjectionParams.z)
                    // return 0;
 
                // get view space position at 1 pixel offsets in each major direction
                half3 viewSpacePos_l = viewSpacePosAtPixelPosition(i.pos.xy + float2(-1.0, 0.0));
                half3 viewSpacePos_r = viewSpacePosAtPixelPosition(i.pos.xy + float2( 1.0, 0.0));
                half3 viewSpacePos_d = viewSpacePosAtPixelPosition(i.pos.xy + float2( 0.0,-1.0));
                half3 viewSpacePos_u = viewSpacePosAtPixelPosition(i.pos.xy + float2( 0.0, 1.0));
 
                // get the difference between the current and each offset position
                half3 l = viewSpacePos_c - viewSpacePos_l;
                half3 r = viewSpacePos_r - viewSpacePos_c;
                half3 d = viewSpacePos_c - viewSpacePos_d;
                half3 u = viewSpacePos_u - viewSpacePos_c;
 
                // pick horizontal and vertical diff with the smallest z difference
                half3 h = abs(l.z) < abs(r.z) ? l : r;
                half3 v = abs(d.z) < abs(u.z) ? d : u;
 
                // get view space normal from the cross product of the two smallest offsets
                half3 viewNormal = normalize(cross(h, v));
 
                // transform normal from view space to world space
                half3 WorldNormal = mul((float3x3)unity_MatrixInvV, viewNormal);
 
                // visualize normal (assumes you're using linear space rendering)
                return half4(GammaToLinearSpace(WorldNormal.xyz * 0.5 + 0.5), 1.0);
            }
            ENDCG
        }
    }
}

@pajama
Copy link

pajama commented Mar 24, 2022

I think I'm close. This looks correct when the camera is at 0,0,0. It would make sense that I need to somehow translate this by worldpos.

image

Shader "WorldNormalFromDepthTexture"
{
    Properties {
        [KeywordEnum(3 Tap, 4 Tap, Improved, Accurate)] _ReconstructionMethod ("Normal Reconstruction Method", Float) = 0
    }

    SubShader
    {
        Tags { "RenderType"="Transparent" "Queue"="Transparent" }
        LOD 100

        Pass
        {
            Cull Off
            ZWrite Off

            CGPROGRAM
            #pragma vertex vert
            #pragma fragment frag

            #pragma shader_feature _RECONSTRUCTIONMETHOD_3_TAP _RECONSTRUCTIONMETHOD_4_TAP _RECONSTRUCTIONMETHOD_IMPROVED _RECONSTRUCTIONMETHOD_ACCURATE

            #include "UnityCG.cginc"

            struct appdata
            {
                float4 vertex : POSITION;
                UNITY_VERTEX_INPUT_INSTANCE_ID
            };

            struct v2f
            {
                float4 pos : SV_POSITION;
                UNITY_VERTEX_OUTPUT_STEREO
            };

            v2f vert (appdata v)
            {
                v2f o;
                UNITY_SETUP_INSTANCE_ID(v);
                UNITY_INITIALIZE_VERTEX_OUTPUT_STEREO(o);
                //o.pos = UnityObjectToClipPos(v.vertex);
                o.pos = float4(v.vertex.xyz, 1.0);
                return o;
            }

            UNITY_DECLARE_DEPTH_TEXTURE(_CameraDepthTexture);
            float4 _CameraDepthTexture_TexelSize;

            float getRawDepth(float2 uv) { return SAMPLE_DEPTH_TEXTURE_LOD(_CameraDepthTexture, float4(uv, 0.0, 0.0)); }

            // inspired by keijiro's depth inverse projection
            // https://github.com/keijiro/DepthInverseProjection
            // constructs view space ray at the far clip plane from the screen uv
            // then multiplies that ray by the linear 01 depth
            float3 viewSpacePosAtScreenUV(float2 uv)
            {
                float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);
                float rawDepth = getRawDepth(uv);
                return viewSpaceRay * Linear01Depth(rawDepth);
            }
            float3 viewSpacePosAtPixelPosition(float2 vpos)
            {
                float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
                return viewSpacePosAtScreenUV(uv);
            }

        #if defined(_RECONSTRUCTIONMETHOD_3_TAP)

            // naive 3 tap normal reconstruction
            // accurate mid triangle normals, slightly diagonally offset on edges
            // artifacts on depth disparities

            // unity's compiled fragment shader stats: 41 math, 3 tex
            half3 viewNormalAtPixelPosition(float2 vpos)
            {
                // get current pixel's view space position
                half3 viewSpacePos_c = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 0.0));

                // get view space position at 1 pixel offsets in each major direction
                half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
                half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));

                // get the difference between the current and each offset position
                half3 hDeriv = viewSpacePos_r - viewSpacePos_c;
                half3 vDeriv = viewSpacePos_u - viewSpacePos_c;

                // get view space normal from the cross product of the diffs
                half3 viewNormal = normalize(cross(hDeriv, vDeriv));

                return viewNormal;
            }

        #elif defined(_RECONSTRUCTIONMETHOD_4_TAP)

            // naive 4 tap normal reconstruction
            // accurate mid triangle normals compared to 3 tap
            // no diagonal offset on edges, but sharp details are softened
            // worse artifacts on depth disparities than 3 tap
            // probably little reason to use this over the 3 tap approach

            // unity's compiled fragment shader stats: 50 math, 4 tex
            half3 viewNormalAtPixelPosition(float2 vpos)
            {
                // get view space position at 1 pixel offsets in each major direction
                half3 viewSpacePos_l = viewSpacePosAtPixelPosition(vpos + float2(-1.0, 0.0));
                half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
                half3 viewSpacePos_d = viewSpacePosAtPixelPosition(vpos + float2( 0.0,-1.0));
                half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));

                // get the difference between the current and each offset position
                half3 hDeriv = viewSpacePos_r - viewSpacePos_l;
                half3 vDeriv = viewSpacePos_u - viewSpacePos_d;

                // get view space normal from the cross product of the diffs
                half3 viewNormal = normalize(cross(hDeriv, vDeriv));

                return viewNormal;
            }

        #elif defined(_RECONSTRUCTIONMETHOD_IMPROVED)

            // base on János Turánszki's Improved Normal Reconstruction
            // https://wickedengine.net/2019/09/22/improved-normal-reconstruction-from-depth/
            // this is a minor optimization over the original, using only 2 comparisons instead of 8
            // at the cost of two additional vector subtractions
            // sharpness of 3 tap with better handling of depth disparities
            // worse artifacts on convex edges than either 3 tap or 4 tap

            // unity's compiled fragment shader stats: 62 math, 5 tex
            half3 viewNormalAtPixelPosition(float2 vpos)
            {
                // get current pixel's view space position
                half3 viewSpacePos_c = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 0.0));

                // get view space position at 1 pixel offsets in each major direction
                half3 viewSpacePos_l = viewSpacePosAtPixelPosition(vpos + float2(-1.0, 0.0));
                half3 viewSpacePos_r = viewSpacePosAtPixelPosition(vpos + float2( 1.0, 0.0));
                half3 viewSpacePos_d = viewSpacePosAtPixelPosition(vpos + float2( 0.0,-1.0));
                half3 viewSpacePos_u = viewSpacePosAtPixelPosition(vpos + float2( 0.0, 1.0));

                // get the difference between the current and each offset position
                half3 l = viewSpacePos_c - viewSpacePos_l;
                half3 r = viewSpacePos_r - viewSpacePos_c;
                half3 d = viewSpacePos_c - viewSpacePos_d;
                half3 u = viewSpacePos_u - viewSpacePos_c;

                // pick horizontal and vertical diff with the smallest z difference
                half3 hDeriv = abs(l.z) < abs(r.z) ? l : r;
                half3 vDeriv = abs(d.z) < abs(u.z) ? d : u;

                // get view space normal from the cross product of the two smallest offsets
                half3 viewNormal = normalize(cross(hDeriv, vDeriv));

                return viewNormal;
            }

        #elif defined(_RECONSTRUCTIONMETHOD_ACCURATE)

            // based on Yuwen Wu's Accurate Normal Reconstruction 
            // https://atyuwen.github.io/posts/normal-reconstruction/
            // basically as accurate as you can get!
            // no artifacts on depth disparities
            // no artifacts on edges
            // artifacts on triangles that are <3 pixels across

            // unity's compiled fragment shader stats: 66 math, 9 tex
            half3 viewNormalAtPixelPosition(float2 vpos)
            {
                // screen uv from vpos
                float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;

                // current pixel's depth
                float c = getRawDepth(uv);

                // get current pixel's view space position
                half3 viewSpacePos_c = viewSpacePosAtScreenUV(uv);

                // get view space position at 1 pixel offsets in each major direction
                half3 viewSpacePos_l = viewSpacePosAtScreenUV(uv + float2(-1.0, 0.0) * _CameraDepthTexture_TexelSize.xy);
                half3 viewSpacePos_r = viewSpacePosAtScreenUV(uv + float2( 1.0, 0.0) * _CameraDepthTexture_TexelSize.xy);
                half3 viewSpacePos_d = viewSpacePosAtScreenUV(uv + float2( 0.0,-1.0) * _CameraDepthTexture_TexelSize.xy);
                half3 viewSpacePos_u = viewSpacePosAtScreenUV(uv + float2( 0.0, 1.0) * _CameraDepthTexture_TexelSize.xy);

                // get the difference between the current and each offset position
                half3 l = viewSpacePos_c - viewSpacePos_l;
                half3 r = viewSpacePos_r - viewSpacePos_c;
                half3 d = viewSpacePos_c - viewSpacePos_d;
                half3 u = viewSpacePos_u - viewSpacePos_c;

                // get depth values at 1 & 2 pixels offsets from current along the horizontal axis
                half4 H = half4(
                    getRawDepth(uv + float2(-1.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
                    getRawDepth(uv + float2( 1.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
                    getRawDepth(uv + float2(-2.0, 0.0) * _CameraDepthTexture_TexelSize.xy),
                    getRawDepth(uv + float2( 2.0, 0.0) * _CameraDepthTexture_TexelSize.xy)
                );

                // get depth values at 1 & 2 pixels offsets from current along the vertical axis
                half4 V = half4(
                    getRawDepth(uv + float2(0.0,-1.0) * _CameraDepthTexture_TexelSize.xy),
                    getRawDepth(uv + float2(0.0, 1.0) * _CameraDepthTexture_TexelSize.xy),
                    getRawDepth(uv + float2(0.0,-2.0) * _CameraDepthTexture_TexelSize.xy),
                    getRawDepth(uv + float2(0.0, 2.0) * _CameraDepthTexture_TexelSize.xy)
                );

                // current pixel's depth difference from slope of offset depth samples
                // differs from original article because we're using non-linear depth values
                // see article's comments
                half2 he = abs((2 * H.xy - H.zw) - c);
                half2 ve = abs((2 * V.xy - V.zw) - c);

                // pick horizontal and vertical diff with the smallest depth difference from slopes
                half3 hDeriv = he.x < he.y ? l : r;
                half3 vDeriv = ve.x < ve.y ? d : u;

                // get view space normal from the cross product of the best derivatives
                half3 viewNormal = normalize(cross(hDeriv, vDeriv));

                return viewNormal;
            }

        #endif


            half4 frag (v2f i) : SV_Target
            {
                UNITY_SETUP_STEREO_EYE_INDEX_POST_VERTEX(i);
                // get view space normal at the current pixel position
                half3 viewNormal = viewNormalAtPixelPosition(i.pos.xy);

                // transform normal from view space to world space
                half3 WorldNormal = mul((float3x3)unity_MatrixInvV, viewNormal);

                float2 vpos = i.pos.xy;
                float2 uv = vpos * _CameraDepthTexture_TexelSize.xy;
                float3 viewSpacePos = viewSpacePosAtScreenUV(uv);
                half3 worldPos = mul((float3x3)unity_MatrixInvV, viewSpacePos);
                return half4(worldPos,1);
            
                
                //float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);

                
                // alternative that should work when using this for post processing
                // we have to invert the view normal z because Unity's view space z is flipped
                // thus the above code using unity_MatrixInvV is doing this flip, but the 
                // unity_CameraToWorld does not flip the z, so we have to do it manually
                 //half3 WorldNormal = mul((float3x3)unity_CameraToWorld, viewNormal * half3(1.0, 1.0, -1.0));

                // visualize normal (assumes you're using linear space rendering)
                return half4(GammaToLinearSpace(WorldNormal.xyz * 0.5 + 0.5), 1.0);
            }
            ENDCG
        }
    }
}

@pajama
Copy link

pajama commented Mar 24, 2022

Sorry for the onslaught, but I think I got it!
half3 worldPos = mul((float3x3)unity_MatrixInvV, viewSpacePos) + _WorldSpaceCameraPos;

@pushmatrix
Copy link

@bgolus Thanks for this writeup!

Do you happen to know how things change for projection matrices that are off center? In ARKit land the projection matrix of the camera has slightly offset left/right and top/bottom frustum, and it causes the normals to change slightly as the device rotates.

Thinking it has something to do with
float3 viewSpaceRay = mul(unity_CameraInvProjection, float4(uv * 2.0 - 1.0, 1.0, 1.0) * _ProjectionParams.z);

Thoughts?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment