Created
June 20, 2021 20:28
-
-
Save jamornsriwasansak/0c4d09787da0d3c97bd8a1272f5d8ab7 to your computer and use it in GitHub Desktop.
SIMD quad bvh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <iostream> | |
#include <vector> | |
#include "common\reflectcuts.h" | |
#include "common\accel.h" | |
// require linearbvhnode | |
#include "accels\linearbvh.h" | |
#include "math\aabb.h" | |
#include <immintrin.h> | |
class alignas(128) QbvhNode4 | |
{ | |
public: | |
__m128 p[3][2]; // bbox | |
int child[4]; | |
int fill[4]; | |
inline __m128 intersectP(const Ray & r) const | |
{ | |
__m128 t0 = _mm_set_ps1(static_cast<float>(r.tmin)), t1 = _mm_set_ps1(static_cast<float>(r.tmax)); | |
__m128 invD[3] = { | |
_mm_set1_ps(1.0f / static_cast<float>(r.direction[0])), | |
_mm_set1_ps(1.0f / static_cast<float>(r.direction[1])), | |
_mm_set1_ps(1.0f / static_cast<float>(r.direction[2])) | |
}; | |
__m128 origin[3] = { | |
_mm_set1_ps(static_cast<float>(r.origin[0])), | |
_mm_set1_ps(static_cast<float>(r.origin[1])), | |
_mm_set1_ps(static_cast<float>(r.origin[2])) | |
}; | |
__m128 isInvalid = _mm_set1_ps(0); | |
for (size_t dim = 0;dim < 3;dim++) | |
{ | |
// check if dir[dim] == 0 or not | |
if (r.direction[dim] != 0) | |
{ | |
__m128 tNear = _mm_mul_ps(_mm_sub_ps(p[dim][0], origin[dim]), invD[dim]); | |
__m128 tFar = _mm_mul_ps(_mm_sub_ps(p[dim][1], origin[dim]), invD[dim]); | |
t0 = _mm_max_ps(_mm_min_ps(tNear, tFar), t0); | |
t1 = _mm_min_ps(_mm_max_ps(tNear, tFar), t1); | |
} | |
else | |
{ | |
__m128 isLess = _mm_cmp_ps(origin[dim], p[dim][0], 1); // compare less than | |
__m128 isMore = _mm_cmp_ps(origin[dim], p[dim][1], 14); // compare greater than | |
__m128 isNotIntersect = _mm_or_ps(isLess, isMore); // outside aabb | |
isInvalid = _mm_or_ps(isInvalid, isNotIntersect); | |
} | |
} | |
__m128 result = _mm_cmp_ps(t0, t1, 2); | |
result = _mm_andnot_ps(isInvalid, result); // ~invalid && result = valid && result | |
return result; | |
} | |
}; | |
class Qbvh4 : public Accel | |
{ | |
public: | |
Qbvh4() {}; | |
bool intersect(Intersection * isectPtr, const Ray & r) const override; | |
bool intersectP(const Ray & r) const override; | |
void build(const std::vector<shared_ptr<Shape>> & shapePtrs) override; | |
inline Aabb computeBbox() const override { return this->bound; } | |
private: | |
bool intersectBranchSimple(Intersection * isectPtr, const Ray & r, const size_t index) const; | |
bool intersectBranchP(const Ray & r, const size_t index) const; | |
int buildBranch(const std::vector<LinearBvhNode> & linearBvhNodes, int currentNodeIndex); | |
private: | |
Aabb bound; | |
std::vector<shared_ptr<Shape>> _mShapePtrs; | |
std::vector<QbvhNode4> _mQbvhNodes; | |
}; | |
class QbvhNode8 | |
{ | |
public: | |
__m256 p[3][2]; | |
int child[8]; | |
inline __m256 intersect(const Ray & r) const | |
{ | |
__m256 t0 = _mm256_set1_ps(static_cast<float>(r.tmin)), t1 = _mm256_set1_ps(static_cast<float>(r.tmax)); | |
__m256 invD[3] = { | |
_mm256_set1_ps(1.0f / static_cast<float>(r.direction[0])), | |
_mm256_set1_ps(1.0f / static_cast<float>(r.direction[1])), | |
_mm256_set1_ps(1.0f / static_cast<float>(r.direction[2])) | |
}; | |
__m256 origin[3] = { | |
_mm256_set1_ps(static_cast<float>(r.origin[0])), | |
_mm256_set1_ps(static_cast<float>(r.origin[1])), | |
_mm256_set1_ps(static_cast<float>(r.origin[2])) | |
}; | |
for (size_t dim = 0;dim < 3;dim++) | |
{ | |
__m256 ta = _mm256_mul_ps(_mm256_sub_ps(p[dim][0], origin[dim]), invD[dim]); | |
__m256 tb = _mm256_mul_ps(_mm256_sub_ps(p[dim][1], origin[dim]), invD[dim]); | |
t0 = _mm256_max_ps(_mm256_min_ps(ta, tb), t0); | |
t1 = _mm256_min_ps(_mm256_max_ps(ta, tb), t0); | |
} | |
return _mm256_cmp_ps(t0, t1, 2); // immediate mode = 2 cmp le | |
} | |
}; | |
#include "accels\sbvh.h" | |
#include "accels\linearbvh.h" | |
#include "math\math.h" | |
#include "common\intersection.h" | |
bool Qbvh4::intersect(Intersection * isectPtr, const Ray & r) const | |
{ | |
isectPtr->t = r.tmax; | |
if (!bound.intersectP(r)) | |
{ | |
return false; | |
} | |
return this->intersectBranchSimple(isectPtr, r, 0); | |
} | |
bool Qbvh4::intersectBranchSimple(Intersection * isectPtr, const Ray & r, const size_t index) const | |
{ | |
const QbvhNode4 & currentNode = this->_mQbvhNodes[index]; | |
__m128 result = currentNode.intersectP(r); | |
bool isIntersect = false; | |
for (size_t i = 0;i < 4;i++) | |
{ | |
if (result.m128_f32[i] != 0) // if hit | |
{ | |
if (currentNode.child[i] < 0) // if leaf | |
{ | |
int32_t hitableIndex = -currentNode.child[i] - 1; | |
bool isHit = this->_mShapePtrs[hitableIndex]->intersect(isectPtr, Ray(r.origin, r.direction, r.tmin, isectPtr->t)); | |
isIntersect = isHit || isIntersect; | |
} | |
else if (currentNode.child[i] > 0) // if node | |
{ | |
bool isHit = intersectBranchSimple(isectPtr, Ray(r.origin, r.direction, r.tmin, isectPtr->t), currentNode.child[i]); | |
isIntersect = isHit || isIntersect; | |
} | |
} | |
} | |
return isIntersect; | |
} | |
bool Qbvh4::intersectP(const Ray & r) const | |
{ | |
if (!bound.intersectP(r)) | |
{ | |
return false; | |
} | |
return this->intersectBranchP(r, 0); | |
} | |
bool Qbvh4::intersectBranchP(const Ray & r, const size_t index) const | |
{ | |
const QbvhNode4 & currentNode = this->_mQbvhNodes[index]; | |
__m128 result = currentNode.intersectP(r); | |
bool isIntersect = false; | |
for (size_t i = 0;i < 4;i++) | |
{ | |
if (result.m128_f32[i] != 0) // if hit | |
{ | |
if (currentNode.child[i] < 0) // if leaf | |
{ | |
int32_t hitableIndex = -currentNode.child[i] - 1; | |
if (this->_mShapePtrs[hitableIndex]->intersectP(r)) | |
{ | |
return true; | |
} | |
} | |
else if (currentNode.child[i] > 0) | |
{ | |
if (this->intersectBranchP(r, currentNode.child[i])) | |
{ | |
return true; | |
} | |
} | |
} | |
} | |
return false; | |
} | |
void Qbvh4::build(const std::vector<shared_ptr<Shape>>& shapePtrs) | |
{ | |
SplitBvh linearAccel; | |
linearAccel.build(shapePtrs); | |
this->bound = linearAccel.bound; | |
this->_mShapePtrs = linearAccel._mShapePtrs; | |
this->buildBranch(linearAccel._mBvhNodes, 0); | |
} | |
int Qbvh4::buildBranch(const std::vector<LinearBvhNode>& linearBvhNodes, int currentLinearNodeIndex) | |
{ | |
const LinearBvhNode & currentNode = linearBvhNodes[currentLinearNodeIndex]; | |
QbvhNode4 result; | |
// gather all info | |
int linearChildIndices[4]; | |
Aabb linearChildAabbs[4]; | |
size_t child4Count = 0; | |
for (size_t i = 0;i < 2;i++) | |
{ | |
int c1Index = currentNode.child[i]; | |
if (c1Index > 0) // is leaf | |
{ | |
for (size_t j = 0;j < 2;j++) | |
{ | |
linearChildIndices[child4Count] = linearBvhNodes[c1Index].child[j]; | |
linearChildAabbs[child4Count] = linearBvhNodes[c1Index].bbox[j]; | |
child4Count++; | |
} | |
} | |
else | |
{ | |
linearChildIndices[child4Count] = c1Index; | |
linearChildAabbs[child4Count] = currentNode.bbox[i]; | |
child4Count++; | |
} | |
} | |
// restructure bbox for __m128 | |
for (size_t i = 0;i < child4Count;i++) | |
{ | |
for (size_t dim = 0;dim < 3;dim++) | |
{ | |
result.p[dim][0].m128_f32[i] = static_cast<float>(linearChildAabbs[i].pMin[dim]); | |
result.p[dim][1].m128_f32[i] = static_cast<float>(linearChildAabbs[i].pMax[dim]); | |
} | |
} | |
// init empty node in result | |
for (size_t i = child4Count;i < 4;i++) | |
{ | |
result.child[i] = 0; | |
} | |
// build 4 nodes | |
size_t currentNodeIndex = this->_mQbvhNodes.size(); | |
this->_mQbvhNodes.push_back(result); | |
int32_t resultIndex = static_cast<int32_t>(currentNodeIndex + 1); | |
for (size_t i = 0;i < child4Count;i++) | |
{ | |
if (linearChildIndices[i] <= 0) // is leaf | |
{ | |
this->_mQbvhNodes[currentNodeIndex].child[i] = linearChildIndices[i]; | |
} | |
else | |
{ | |
this->_mQbvhNodes[currentNodeIndex].child[i] = resultIndex; | |
resultIndex = this->buildBranch(linearBvhNodes, linearChildIndices[i]); | |
} | |
} | |
return resultIndex; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment