Hurleyworks/gist:6fb983964304496d3b9eee800e639fd4

## gistfile1.txt
// tracing algorithms. Yocto/Shading is implemented in `yocto_shading.h`.
//

//
// LICENSE:
//
// Copyright (c) 2016 -- 2020 Fabio Pellacini
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
#pragma once

 static constexpr float Pi = 3.14159265358979323846f;

// yocto converted to cuda

CUDA_DEVICE_FUNCTION float sum (const float3& a) { return a.x + a.y + a.z; }
CUDA_DEVICE_FUNCTION float mean (const float3& a) { return sum (a) / 3; }

template <typename T>
CUDA_DEVICE_FUNCTION T lerp (const T& v0, const T& v1, float t)
{
    return (1 - t) * v0 + t * v1;
}


CUDA_DEVICE_FUNCTION float max (const float3& a)
{
    return max (max (a.x, a.y), a.z);
}

CUDA_DEVICE_FUNCTION float3 log (const float3& a)
{
    return {log (a.x), log (a.y), log (a.z)};
}
CUDA_DEVICE_FUNCTION float clamp (float a, float min_, float max_)
{
    return min (max (a, min_), max_);
}

CUDA_DEVICE_FUNCTION float3 clamp (const float3& x, float min, float max)
{
    return make_float3 (clamp (x.x, min, max), clamp (x.y, min, max), clamp (x.z, min, max));
}

CUDA_DEVICE_FUNCTION float3 operator- (float a, const float3& b)
{
    return make_float3 (a - b.x, a - b.y, a - b.z);
}
CUDA_DEVICE_FUNCTION float3 operator- (const float3& a, float b)
{
    return make_float3 (a.x - b, a.y - b, a.z - b);
}

CUDA_DEVICE_FUNCTION float3 Sqrt (const float3& a)
{
    return make_float3 (std::sqrt (a.x), sqrt (a.y), sqrt (a.z));
}

CUDA_DEVICE_FUNCTION float3 operator+ (const float3& a, float b)
{
    return make_float3 (a.x + b, a.y + b, a.z + b);
}

CUDA_DEVICE_FUNCTION float3 operator+ (float a, const float3& b)
{
    return make_float3 (a + b.x, a + b.y, a + b.z);
}

CUDA_DEVICE_FUNCTION float3 operator/ (float a, const float3& b)
{
    return {a / b.x, a / b.y, a / b.z};
}

CUDA_DEVICE_FUNCTION float3 lerp (const float3& a, const float3& b, const float3& u)
{
    return a * (1 - u) + b * u;
}

//CUDA_DEVICE_FUNCTION bool operator== (const float3& v0, const float3& v1)
//{
//    return v0.x == v1.x && v0.y == v1.y && v0.z == v1.z;
//}

// Convert eta to reflectivity
CUDA_DEVICE_FUNCTION float3 eta_to_reflectivity (const float3& eta)
{
    return ((eta - 1) * (eta - 1)) / ((eta + 1) * (eta + 1));
}

// Reflected and refracted vector.
CUDA_DEVICE_FUNCTION float3 reflect (const float3& w, const float3& n)
{
    return -w + 2 * dot (n, w) * n;
}

// Schlick approximation of the Fresnel term
CUDA_DEVICE_FUNCTION float3 fresnel_schlick (
    const float3& specular, const float3& normal, const float3& outgoing)
{
    if (specular == float3{0, 0, 0}) return {0, 0, 0};
    auto cosine = dot (normal, outgoing);
    return specular +
           (1 - specular) * pow (clamp (1 - abs (cosine), 0.0f, 1.0f), 5.0f);
}

// Evaluate microfacet distribution
CUDA_DEVICE_FUNCTION float microfacet_distribution (
    float roughness, const float3& normal, const float3& halfway, bool ggx)
{
    // https://google.github.io/filament/Filament.html#materialsystem/specularbrdf
    // http://graphicrants.blogspot.com/2013/08/specular-brdf-reference.html
    auto cosine = dot (normal, halfway);
    if (cosine <= 0) return 0;
    auto roughness2 = roughness * roughness;
    auto cosine2 = cosine * cosine;
    if (ggx)
    {
        return roughness2 / (Pi * (cosine2 * roughness2 + 1 - cosine2) *
                             (cosine2 * roughness2 + 1 - cosine2));
    }
    else
    {
        return exp ((cosine2 - 1) / (roughness2 * cosine2)) /
               (Pi * roughness2 * cosine2 * cosine2);
    }
}

// Evaluate the microfacet shadowing1
CUDA_DEVICE_FUNCTION float microfacet_shadowing1 (float roughness, const float3& normal,
                                                  const float3& halfway, const float3& direction, bool ggx)
{
    // https://google.github.io/filament/Filament.html#materialsystem/specularbrdf
    // http://graphicrants.blogspot.com/2013/08/specular-brdf-reference.html
    // https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#appendix-b-brdf-implementation
    auto cosine = dot (normal, direction);
    auto cosineh = dot (halfway, direction);
    if (cosine * cosineh <= 0) return 0;
    auto roughness2 = roughness * roughness;
    auto cosine2 = cosine * cosine;
    if (ggx)
    {
        return 2 * abs (cosine) /
               (abs (cosine) + sqrt (cosine2 - roughness2 * cosine2 + roughness2));
    }
    else
    {
        auto ci = abs (cosine) / (roughness * sqrt (1 - cosine2));
        return ci < 1.6f ? (3.535f * ci + 2.181f * ci * ci) /
                               (1.0f + 2.276f * ci + 2.577f * ci * ci)
                         : 1.0f;
    }
}

// Evaluate microfacet shadowing
CUDA_DEVICE_FUNCTION float microfacet_shadowing (float roughness, const float3& normal,
                                                 const float3& halfway, const float3& outgoing, const float3& incoming,
                                                 bool ggx)
{
    return microfacet_shadowing1 (roughness, normal, halfway, outgoing, ggx) *
           microfacet_shadowing1 (roughness, normal, halfway, incoming, ggx);
}

CUDA_DEVICE_FUNCTION float3 transform_vector (const Matrix3x3& a, const float3& b)
{
    return a * b;
}

CUDA_DEVICE_FUNCTION float3 transform_direction (const Matrix3x3& a, const float3& b)
{
    return normalize (transform_vector (a, b));
}

// Check if on the same side of the hemisphere
CUDA_DEVICE_FUNCTION bool same_hemisphere (
    const float3& normal, const float3& outgoing, const float3& incoming)
{
    return dot (normal, outgoing) * dot (normal, incoming) >= 0;
}

// Constructs a basis from a direction
CUDA_DEVICE_FUNCTION Matrix3x3 basis_fromz (const float3& v)
{
    // https://graphics.pixar.com/library/OrthonormalB/paper.pdf
    auto z = normalize (v);
    auto sign = copysignf (1.0f, z.z);
    auto a = -1.0f / (sign + z.z);
    auto b = z.x * z.y * a;
    auto x = float3{1.0f + sign * z.x * z.x * a, sign * b, -sign * z.x};
    auto y = float3{b, sign + z.y * z.y * a, -z.y};
    return Matrix3x3 (x, y, z);
};

// Sample an hemispherical direction with cosine distribution.
CUDA_DEVICE_FUNCTION float3 sample_hemisphere_cos (const float3& normal, const float2& ruv)
{
    auto z = sqrt (ruv.y);
    auto r = sqrt (1 - z * z);
    auto phi = 2 * Pi * ruv.x;
    auto local_direction = float3{r * cos (phi), r * sin (phi), z};
    return transform_direction (basis_fromz (normal), local_direction);
}

// Sample a microfacet ditribution.
CUDA_DEVICE_FUNCTION float3 sample_microfacet (
    float roughness, const float3& normal, const float2& rn, bool ggx = true)
{
    auto phi = 2 * Pi * rn.x;
    auto theta = 0.0f;
    if (ggx)
    {
        theta = atan (roughness * sqrt (rn.y / (1 - rn.y)));
    }
    else
    {
        auto roughness2 = roughness * roughness;
        theta = atan (sqrt (-roughness2 * log (1 - rn.y)));
    }
    auto local_half_vector = float3{
        cos (phi) * sin (theta), sin (phi) * sin (theta), cos (theta)};
    return transform_direction (basis_fromz (normal), local_half_vector);
}

CUDA_DEVICE_FUNCTION float3 sample_gltfpbr (const float3& color, float ior, float roughness,
                                            float metallic, const float3& normal, const float3& outgoing, float rnl,
                                            const float2& rn)
{
    auto up_normal = dot (normal, outgoing) <= 0 ? -normal : normal;

    auto reflectivity = lerp (
        eta_to_reflectivity (float3{ior, ior, ior}), color, metallic);
    if (rnl < mean (fresnel_schlick (reflectivity, up_normal, outgoing)))
    {
        auto halfway = sample_microfacet (roughness, up_normal, rn);
        auto incoming = reflect (outgoing, halfway);
        if (!same_hemisphere (up_normal, outgoing, incoming)) return {0, 0, 0};
        return incoming;
    }
    else
    {
        return sample_hemisphere_cos (up_normal, rn);
    }
}

CUDA_DEVICE_FUNCTION float3 eval_gltfpbr (const float3& color, float ior, float roughness,
                                          float metallic, const float3& normal, const float3& outgoing,
                                          const float3& incoming)
{
    if (dot (normal, incoming) * dot (normal, outgoing) <= 0) return {0, 0, 0};
    auto reflectivity = lerp (
        eta_to_reflectivity (float3{ior, ior, ior}), color, metallic);

    auto up_normal = dot (normal, outgoing) <= 0 ? -normal : normal;
    auto F1 = fresnel_schlick (reflectivity, up_normal, outgoing);
    auto halfway = normalize (incoming + outgoing);
    auto F = fresnel_schlick (reflectivity, halfway, incoming);
    auto D = microfacet_distribution (roughness, up_normal, halfway, true);
    auto G = microfacet_shadowing (
        roughness, up_normal, halfway, outgoing, incoming, true);

    return color * (1 - metallic) * (1 - F1) / Pi *
               abs (dot (up_normal, incoming)) +
           F * D * G / (4 * dot (up_normal, outgoing) * dot (up_normal, incoming)) *
               abs (dot (up_normal, incoming));
}

CUDA_DEVICE_FUNCTION float sample_hemisphere_cos_pdf (
    const float3& normal, const float3& direction)
{
    auto cosw = dot (normal, direction);
    return (cosw <= 0) ? 0 : cosw / Pi;
}

// Pdf for microfacet distribution sampling.
CUDA_DEVICE_FUNCTION float sample_microfacet_pdf (
    float roughness, const float3& normal, const float3& halfway, bool ggx)
{
    auto cosine = dot (normal, halfway);
    if (cosine < 0) return 0;
    return microfacet_distribution (roughness, normal, halfway, ggx) * cosine;
}

// Pdf for specular BRDF lobe sampling.
CUDA_DEVICE_FUNCTION float sample_gltfpbr_pdf (const float3& color, float ior, float roughness,
                                               float metallic, const float3& normal, const float3& outgoing,
                                               const float3& incoming)
{
    if (dot (normal, incoming) * dot (normal, outgoing) <= 0) return 0;
    auto up_normal = dot (normal, outgoing) <= 0 ? -normal : normal;
    auto halfway = normalize (outgoing + incoming);
    auto reflectivity = lerp (
        eta_to_reflectivity (float3{ior, ior, ior}), color, metallic);
    auto F = mean (fresnel_schlick (reflectivity, up_normal, outgoing));
    return F * sample_microfacet_pdf (roughness, up_normal, halfway, true) /
               (4 * abs (dot (outgoing, halfway))) +
           (1 - F) * sample_hemisphere_cos_pdf (up_normal, incoming);
}

// ( 0, 0,  1) <=> phi:      0
// (-1, 0,  0) <=> phi: 1/2 pi
// ( 0, 0, -1) <=> phi:   1 pi
// ( 1, 0,  0) <=> phi: 3/2 pi
CUDA_DEVICE_FUNCTION float3 fromPolarYUp (float phi, float theta)
{
    float sinPhi, cosPhi;
    float sinTheta, cosTheta;
    sincosf (phi, &sinPhi, &cosPhi);
    sincosf (theta, &sinTheta, &cosTheta);
    return make_float3 (-sinPhi * sinTheta, cosTheta, cosPhi * sinTheta);
}
CUDA_DEVICE_FUNCTION void toPolarYUp (const float3& v, float* phi, float* theta)
{
    *theta = std::acos (min (max (v.y, -1.0f), 1.0f));
    *phi = std::fmod (std::atan2 (-v.x, v.z) + 2 * Pi,
                      2 * Pi);
}
	// tracing algorithms. Yocto/Shading is implemented in `yocto_shading.h`.
	//

	//
	// LICENSE:
	//
	// Copyright (c) 2016 -- 2020 Fabio Pellacini
	//
	// Permission is hereby granted, free of charge, to any person obtaining a copy
	// of this software and associated documentation files (the "Software"), to deal
	// in the Software without restriction, including without limitation the rights
	// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	// copies of the Software, and to permit persons to whom the Software is
	// furnished to do so, subject to the following conditions:
	//
	// The above copyright notice and this permission notice shall be included in
	// all copies or substantial portions of the Software.
	//
	// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
	// SOFTWARE.
	//
	#pragma once

	static constexpr float Pi = 3.14159265358979323846f;

	// yocto converted to cuda

	CUDA_DEVICE_FUNCTION float sum (const float3& a) { return a.x + a.y + a.z; }
	CUDA_DEVICE_FUNCTION float mean (const float3& a) { return sum (a) / 3; }

	template <typename T>
	CUDA_DEVICE_FUNCTION T lerp (const T& v0, const T& v1, float t)
	{
	return (1 - t) * v0 + t * v1;
	}


	CUDA_DEVICE_FUNCTION float max (const float3& a)
	{
	return max (max (a.x, a.y), a.z);
	}

	CUDA_DEVICE_FUNCTION float3 log (const float3& a)
	{
	return {log (a.x), log (a.y), log (a.z)};
	}
	CUDA_DEVICE_FUNCTION float clamp (float a, float min_, float max_)
	{
	return min (max (a, min_), max_);
	}

	CUDA_DEVICE_FUNCTION float3 clamp (const float3& x, float min, float max)
	{
	return make_float3 (clamp (x.x, min, max), clamp (x.y, min, max), clamp (x.z, min, max));
	}

	CUDA_DEVICE_FUNCTION float3 operator- (float a, const float3& b)
	{
	return make_float3 (a - b.x, a - b.y, a - b.z);
	}
	CUDA_DEVICE_FUNCTION float3 operator- (const float3& a, float b)
	{
	return make_float3 (a.x - b, a.y - b, a.z - b);
	}

	CUDA_DEVICE_FUNCTION float3 Sqrt (const float3& a)
	{
	return make_float3 (std::sqrt (a.x), sqrt (a.y), sqrt (a.z));
	}

	CUDA_DEVICE_FUNCTION float3 operator+ (const float3& a, float b)
	{
	return make_float3 (a.x + b, a.y + b, a.z + b);
	}

	CUDA_DEVICE_FUNCTION float3 operator+ (float a, const float3& b)
	{
	return make_float3 (a + b.x, a + b.y, a + b.z);
	}

	CUDA_DEVICE_FUNCTION float3 operator/ (float a, const float3& b)
	{
	return {a / b.x, a / b.y, a / b.z};
	}

	CUDA_DEVICE_FUNCTION float3 lerp (const float3& a, const float3& b, const float3& u)
	{
	return a * (1 - u) + b * u;
	}

	//CUDA_DEVICE_FUNCTION bool operator== (const float3& v0, const float3& v1)
	//{
	// return v0.x == v1.x && v0.y == v1.y && v0.z == v1.z;
	//}

	// Convert eta to reflectivity
	CUDA_DEVICE_FUNCTION float3 eta_to_reflectivity (const float3& eta)
	{
	return ((eta - 1) * (eta - 1)) / ((eta + 1) * (eta + 1));
	}

	// Reflected and refracted vector.
	CUDA_DEVICE_FUNCTION float3 reflect (const float3& w, const float3& n)
	{
	return -w + 2 * dot (n, w) * n;
	}

	// Schlick approximation of the Fresnel term
	CUDA_DEVICE_FUNCTION float3 fresnel_schlick (
	const float3& specular, const float3& normal, const float3& outgoing)
	{
	if (specular == float3{0, 0, 0}) return {0, 0, 0};
	auto cosine = dot (normal, outgoing);
	return specular +
	(1 - specular) * pow (clamp (1 - abs (cosine), 0.0f, 1.0f), 5.0f);
	}

	// Evaluate microfacet distribution
	CUDA_DEVICE_FUNCTION float microfacet_distribution (
	float roughness, const float3& normal, const float3& halfway, bool ggx)
	{
	// https://google.github.io/filament/Filament.html#materialsystem/specularbrdf
	// http://graphicrants.blogspot.com/2013/08/specular-brdf-reference.html
	auto cosine = dot (normal, halfway);
	if (cosine <= 0) return 0;
	auto roughness2 = roughness * roughness;
	auto cosine2 = cosine * cosine;
	if (ggx)
	{
	return roughness2 / (Pi * (cosine2 * roughness2 + 1 - cosine2) *
	(cosine2 * roughness2 + 1 - cosine2));
	}
	else
	{
	return exp ((cosine2 - 1) / (roughness2 * cosine2)) /
	(Pi * roughness2 * cosine2 * cosine2);
	}
	}

	// Evaluate the microfacet shadowing1
	CUDA_DEVICE_FUNCTION float microfacet_shadowing1 (float roughness, const float3& normal,
	const float3& halfway, const float3& direction, bool ggx)
	{
	// https://google.github.io/filament/Filament.html#materialsystem/specularbrdf
	// http://graphicrants.blogspot.com/2013/08/specular-brdf-reference.html
	// https://github.com/KhronosGroup/glTF/tree/master/specification/2.0#appendix-b-brdf-implementation
	auto cosine = dot (normal, direction);
	auto cosineh = dot (halfway, direction);
	if (cosine * cosineh <= 0) return 0;
	auto roughness2 = roughness * roughness;
	auto cosine2 = cosine * cosine;
	if (ggx)
	{
	return 2 * abs (cosine) /
	(abs (cosine) + sqrt (cosine2 - roughness2 * cosine2 + roughness2));
	}
	else
	{
	auto ci = abs (cosine) / (roughness * sqrt (1 - cosine2));
	return ci < 1.6f ? (3.535f * ci + 2.181f * ci * ci) /
	(1.0f + 2.276f * ci + 2.577f * ci * ci)
	: 1.0f;
	}
	}

	// Evaluate microfacet shadowing
	CUDA_DEVICE_FUNCTION float microfacet_shadowing (float roughness, const float3& normal,
	const float3& halfway, const float3& outgoing, const float3& incoming,
	bool ggx)
	{
	return microfacet_shadowing1 (roughness, normal, halfway, outgoing, ggx) *
	microfacet_shadowing1 (roughness, normal, halfway, incoming, ggx);
	}

	CUDA_DEVICE_FUNCTION float3 transform_vector (const Matrix3x3& a, const float3& b)
	{
	return a * b;
	}

	CUDA_DEVICE_FUNCTION float3 transform_direction (const Matrix3x3& a, const float3& b)
	{
	return normalize (transform_vector (a, b));
	}

	// Check if on the same side of the hemisphere
	CUDA_DEVICE_FUNCTION bool same_hemisphere (
	const float3& normal, const float3& outgoing, const float3& incoming)
	{
	return dot (normal, outgoing) * dot (normal, incoming) >= 0;
	}

	// Constructs a basis from a direction
	CUDA_DEVICE_FUNCTION Matrix3x3 basis_fromz (const float3& v)
	{
	// https://graphics.pixar.com/library/OrthonormalB/paper.pdf
	auto z = normalize (v);
	auto sign = copysignf (1.0f, z.z);
	auto a = -1.0f / (sign + z.z);
	auto b = z.x * z.y * a;
	auto x = float3{1.0f + sign * z.x * z.x * a, sign * b, -sign * z.x};
	auto y = float3{b, sign + z.y * z.y * a, -z.y};
	return Matrix3x3 (x, y, z);
	};

	// Sample an hemispherical direction with cosine distribution.
	CUDA_DEVICE_FUNCTION float3 sample_hemisphere_cos (const float3& normal, const float2& ruv)
	{
	auto z = sqrt (ruv.y);
	auto r = sqrt (1 - z * z);
	auto phi = 2 * Pi * ruv.x;
	auto local_direction = float3{r * cos (phi), r * sin (phi), z};
	return transform_direction (basis_fromz (normal), local_direction);
	}

	// Sample a microfacet ditribution.
	CUDA_DEVICE_FUNCTION float3 sample_microfacet (
	float roughness, const float3& normal, const float2& rn, bool ggx = true)
	{
	auto phi = 2 * Pi * rn.x;
	auto theta = 0.0f;
	if (ggx)
	{
	theta = atan (roughness * sqrt (rn.y / (1 - rn.y)));
	}
	else
	{
	auto roughness2 = roughness * roughness;
	theta = atan (sqrt (-roughness2 * log (1 - rn.y)));
	}
	auto local_half_vector = float3{
	cos (phi) * sin (theta), sin (phi) * sin (theta), cos (theta)};
	return transform_direction (basis_fromz (normal), local_half_vector);
	}

	CUDA_DEVICE_FUNCTION float3 sample_gltfpbr (const float3& color, float ior, float roughness,
	float metallic, const float3& normal, const float3& outgoing, float rnl,
	const float2& rn)
	{
	auto up_normal = dot (normal, outgoing) <= 0 ? -normal : normal;

	auto reflectivity = lerp (
	eta_to_reflectivity (float3{ior, ior, ior}), color, metallic);
	if (rnl < mean (fresnel_schlick (reflectivity, up_normal, outgoing)))
	{
	auto halfway = sample_microfacet (roughness, up_normal, rn);
	auto incoming = reflect (outgoing, halfway);
	if (!same_hemisphere (up_normal, outgoing, incoming)) return {0, 0, 0};
	return incoming;
	}
	else
	{
	return sample_hemisphere_cos (up_normal, rn);
	}
	}

	CUDA_DEVICE_FUNCTION float3 eval_gltfpbr (const float3& color, float ior, float roughness,
	float metallic, const float3& normal, const float3& outgoing,
	const float3& incoming)
	{
	if (dot (normal, incoming) * dot (normal, outgoing) <= 0) return {0, 0, 0};
	auto reflectivity = lerp (
	eta_to_reflectivity (float3{ior, ior, ior}), color, metallic);

	auto up_normal = dot (normal, outgoing) <= 0 ? -normal : normal;
	auto F1 = fresnel_schlick (reflectivity, up_normal, outgoing);
	auto halfway = normalize (incoming + outgoing);
	auto F = fresnel_schlick (reflectivity, halfway, incoming);
	auto D = microfacet_distribution (roughness, up_normal, halfway, true);
	auto G = microfacet_shadowing (
	roughness, up_normal, halfway, outgoing, incoming, true);

	return color * (1 - metallic) * (1 - F1) / Pi *
	abs (dot (up_normal, incoming)) +
	F * D * G / (4 * dot (up_normal, outgoing) * dot (up_normal, incoming)) *
	abs (dot (up_normal, incoming));
	}

	CUDA_DEVICE_FUNCTION float sample_hemisphere_cos_pdf (
	const float3& normal, const float3& direction)
	{
	auto cosw = dot (normal, direction);
	return (cosw <= 0) ? 0 : cosw / Pi;
	}

	// Pdf for microfacet distribution sampling.
	CUDA_DEVICE_FUNCTION float sample_microfacet_pdf (
	float roughness, const float3& normal, const float3& halfway, bool ggx)
	{
	auto cosine = dot (normal, halfway);
	if (cosine < 0) return 0;
	return microfacet_distribution (roughness, normal, halfway, ggx) * cosine;
	}

	// Pdf for specular BRDF lobe sampling.
	CUDA_DEVICE_FUNCTION float sample_gltfpbr_pdf (const float3& color, float ior, float roughness,
	float metallic, const float3& normal, const float3& outgoing,
	const float3& incoming)
	{
	if (dot (normal, incoming) * dot (normal, outgoing) <= 0) return 0;
	auto up_normal = dot (normal, outgoing) <= 0 ? -normal : normal;
	auto halfway = normalize (outgoing + incoming);
	auto reflectivity = lerp (
	eta_to_reflectivity (float3{ior, ior, ior}), color, metallic);
	auto F = mean (fresnel_schlick (reflectivity, up_normal, outgoing));
	return F * sample_microfacet_pdf (roughness, up_normal, halfway, true) /
	(4 * abs (dot (outgoing, halfway))) +
	(1 - F) * sample_hemisphere_cos_pdf (up_normal, incoming);
	}

	// ( 0, 0, 1) <=> phi: 0
	// (-1, 0, 0) <=> phi: 1/2 pi
	// ( 0, 0, -1) <=> phi: 1 pi
	// ( 1, 0, 0) <=> phi: 3/2 pi
	CUDA_DEVICE_FUNCTION float3 fromPolarYUp (float phi, float theta)
	{
	float sinPhi, cosPhi;
	float sinTheta, cosTheta;
	sincosf (phi, &sinPhi, &cosPhi);
	sincosf (theta, &sinTheta, &cosTheta);
	return make_float3 (-sinPhi * sinTheta, cosTheta, cosPhi * sinTheta);
	}
	CUDA_DEVICE_FUNCTION void toPolarYUp (const float3& v, float* phi, float* theta)
	{
	*theta = std::acos (min (max (v.y, -1.0f), 1.0f));
	phi = std::fmod (std::atan2 (-v.x, v.z) + 2 Pi,
	2 * Pi);
	}