Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
SIMD quad bvh
#pragma once
#include <iostream>
#include <vector>
#include "common\reflectcuts.h"
#include "common\accel.h"
// require linearbvhnode
#include "accels\linearbvh.h"
#include "math\aabb.h"
#include <immintrin.h>
class alignas(128) QbvhNode4
{
public:
__m128 p[3][2]; // bbox
int child[4];
int fill[4];
inline __m128 intersectP(const Ray & r) const
{
__m128 t0 = _mm_set_ps1(static_cast<float>(r.tmin)), t1 = _mm_set_ps1(static_cast<float>(r.tmax));
__m128 invD[3] = {
_mm_set1_ps(1.0f / static_cast<float>(r.direction[0])),
_mm_set1_ps(1.0f / static_cast<float>(r.direction[1])),
_mm_set1_ps(1.0f / static_cast<float>(r.direction[2]))
};
__m128 origin[3] = {
_mm_set1_ps(static_cast<float>(r.origin[0])),
_mm_set1_ps(static_cast<float>(r.origin[1])),
_mm_set1_ps(static_cast<float>(r.origin[2]))
};
__m128 isInvalid = _mm_set1_ps(0);
for (size_t dim = 0;dim < 3;dim++)
{
// check if dir[dim] == 0 or not
if (r.direction[dim] != 0)
{
__m128 tNear = _mm_mul_ps(_mm_sub_ps(p[dim][0], origin[dim]), invD[dim]);
__m128 tFar = _mm_mul_ps(_mm_sub_ps(p[dim][1], origin[dim]), invD[dim]);
t0 = _mm_max_ps(_mm_min_ps(tNear, tFar), t0);
t1 = _mm_min_ps(_mm_max_ps(tNear, tFar), t1);
}
else
{
__m128 isLess = _mm_cmp_ps(origin[dim], p[dim][0], 1); // compare less than
__m128 isMore = _mm_cmp_ps(origin[dim], p[dim][1], 14); // compare greater than
__m128 isNotIntersect = _mm_or_ps(isLess, isMore); // outside aabb
isInvalid = _mm_or_ps(isInvalid, isNotIntersect);
}
}
__m128 result = _mm_cmp_ps(t0, t1, 2);
result = _mm_andnot_ps(isInvalid, result); // ~invalid && result = valid && result
return result;
}
};
class Qbvh4 : public Accel
{
public:
Qbvh4() {};
bool intersect(Intersection * isectPtr, const Ray & r) const override;
bool intersectP(const Ray & r) const override;
void build(const std::vector<shared_ptr<Shape>> & shapePtrs) override;
inline Aabb computeBbox() const override { return this->bound; }
private:
bool intersectBranchSimple(Intersection * isectPtr, const Ray & r, const size_t index) const;
bool intersectBranchP(const Ray & r, const size_t index) const;
int buildBranch(const std::vector<LinearBvhNode> & linearBvhNodes, int currentNodeIndex);
private:
Aabb bound;
std::vector<shared_ptr<Shape>> _mShapePtrs;
std::vector<QbvhNode4> _mQbvhNodes;
};
class QbvhNode8
{
public:
__m256 p[3][2];
int child[8];
inline __m256 intersect(const Ray & r) const
{
__m256 t0 = _mm256_set1_ps(static_cast<float>(r.tmin)), t1 = _mm256_set1_ps(static_cast<float>(r.tmax));
__m256 invD[3] = {
_mm256_set1_ps(1.0f / static_cast<float>(r.direction[0])),
_mm256_set1_ps(1.0f / static_cast<float>(r.direction[1])),
_mm256_set1_ps(1.0f / static_cast<float>(r.direction[2]))
};
__m256 origin[3] = {
_mm256_set1_ps(static_cast<float>(r.origin[0])),
_mm256_set1_ps(static_cast<float>(r.origin[1])),
_mm256_set1_ps(static_cast<float>(r.origin[2]))
};
for (size_t dim = 0;dim < 3;dim++)
{
__m256 ta = _mm256_mul_ps(_mm256_sub_ps(p[dim][0], origin[dim]), invD[dim]);
__m256 tb = _mm256_mul_ps(_mm256_sub_ps(p[dim][1], origin[dim]), invD[dim]);
t0 = _mm256_max_ps(_mm256_min_ps(ta, tb), t0);
t1 = _mm256_min_ps(_mm256_max_ps(ta, tb), t0);
}
return _mm256_cmp_ps(t0, t1, 2); // immediate mode = 2 cmp le
}
};
#include "accels\sbvh.h"
#include "accels\linearbvh.h"
#include "math\math.h"
#include "common\intersection.h"
bool Qbvh4::intersect(Intersection * isectPtr, const Ray & r) const
{
isectPtr->t = r.tmax;
if (!bound.intersectP(r))
{
return false;
}
return this->intersectBranchSimple(isectPtr, r, 0);
}
bool Qbvh4::intersectBranchSimple(Intersection * isectPtr, const Ray & r, const size_t index) const
{
const QbvhNode4 & currentNode = this->_mQbvhNodes[index];
__m128 result = currentNode.intersectP(r);
bool isIntersect = false;
for (size_t i = 0;i < 4;i++)
{
if (result.m128_f32[i] != 0) // if hit
{
if (currentNode.child[i] < 0) // if leaf
{
int32_t hitableIndex = -currentNode.child[i] - 1;
bool isHit = this->_mShapePtrs[hitableIndex]->intersect(isectPtr, Ray(r.origin, r.direction, r.tmin, isectPtr->t));
isIntersect = isHit || isIntersect;
}
else if (currentNode.child[i] > 0) // if node
{
bool isHit = intersectBranchSimple(isectPtr, Ray(r.origin, r.direction, r.tmin, isectPtr->t), currentNode.child[i]);
isIntersect = isHit || isIntersect;
}
}
}
return isIntersect;
}
bool Qbvh4::intersectP(const Ray & r) const
{
if (!bound.intersectP(r))
{
return false;
}
return this->intersectBranchP(r, 0);
}
bool Qbvh4::intersectBranchP(const Ray & r, const size_t index) const
{
const QbvhNode4 & currentNode = this->_mQbvhNodes[index];
__m128 result = currentNode.intersectP(r);
bool isIntersect = false;
for (size_t i = 0;i < 4;i++)
{
if (result.m128_f32[i] != 0) // if hit
{
if (currentNode.child[i] < 0) // if leaf
{
int32_t hitableIndex = -currentNode.child[i] - 1;
if (this->_mShapePtrs[hitableIndex]->intersectP(r))
{
return true;
}
}
else if (currentNode.child[i] > 0)
{
if (this->intersectBranchP(r, currentNode.child[i]))
{
return true;
}
}
}
}
return false;
}
void Qbvh4::build(const std::vector<shared_ptr<Shape>>& shapePtrs)
{
SplitBvh linearAccel;
linearAccel.build(shapePtrs);
this->bound = linearAccel.bound;
this->_mShapePtrs = linearAccel._mShapePtrs;
this->buildBranch(linearAccel._mBvhNodes, 0);
}
int Qbvh4::buildBranch(const std::vector<LinearBvhNode>& linearBvhNodes, int currentLinearNodeIndex)
{
const LinearBvhNode & currentNode = linearBvhNodes[currentLinearNodeIndex];
QbvhNode4 result;
// gather all info
int linearChildIndices[4];
Aabb linearChildAabbs[4];
size_t child4Count = 0;
for (size_t i = 0;i < 2;i++)
{
int c1Index = currentNode.child[i];
if (c1Index > 0) // is leaf
{
for (size_t j = 0;j < 2;j++)
{
linearChildIndices[child4Count] = linearBvhNodes[c1Index].child[j];
linearChildAabbs[child4Count] = linearBvhNodes[c1Index].bbox[j];
child4Count++;
}
}
else
{
linearChildIndices[child4Count] = c1Index;
linearChildAabbs[child4Count] = currentNode.bbox[i];
child4Count++;
}
}
// restructure bbox for __m128
for (size_t i = 0;i < child4Count;i++)
{
for (size_t dim = 0;dim < 3;dim++)
{
result.p[dim][0].m128_f32[i] = static_cast<float>(linearChildAabbs[i].pMin[dim]);
result.p[dim][1].m128_f32[i] = static_cast<float>(linearChildAabbs[i].pMax[dim]);
}
}
// init empty node in result
for (size_t i = child4Count;i < 4;i++)
{
result.child[i] = 0;
}
// build 4 nodes
size_t currentNodeIndex = this->_mQbvhNodes.size();
this->_mQbvhNodes.push_back(result);
int32_t resultIndex = static_cast<int32_t>(currentNodeIndex + 1);
for (size_t i = 0;i < child4Count;i++)
{
if (linearChildIndices[i] <= 0) // is leaf
{
this->_mQbvhNodes[currentNodeIndex].child[i] = linearChildIndices[i];
}
else
{
this->_mQbvhNodes[currentNodeIndex].child[i] = resultIndex;
resultIndex = this->buildBranch(linearBvhNodes, linearChildIndices[i]);
}
}
return resultIndex;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment