-
-
Save sputnick1124/b40ca05d9cc7b5a5575804bc914e26bb to your computer and use it in GitHub Desktop.
#include <vector> | |
#include <iostream> | |
#include <algorithm> | |
class ThreeDLookup { | |
public: | |
ThreeDLookup(std::vector<double> &x, | |
std::vector<double> &y, | |
std::vector<double> &z, | |
std::vector< std::vector< std::vector<double> > > &a_dataTable); | |
~ThreeDLookup(); | |
double Interp(double xq, double yq, double zq); | |
private: | |
std::vector<double> xvec; | |
std::vector<double> yvec; | |
std::vector<double> zvec; | |
std::vector< std::vector< std::vector<double> > > dataTable; | |
double minx, miny, minz; | |
double maxx, maxy, maxz; | |
}; | |
ThreeDLookup::ThreeDLookup(std::vector<double> &x, | |
std::vector<double> &y, | |
std::vector<double> &z, | |
std::vector< std::vector< std::vector<double> > > &a_dataTable) { | |
xvec.insert(xvec.end(), x.begin(), x.end()); | |
yvec.insert(yvec.end(), y.begin(), y.end()); | |
zvec.insert(zvec.end(), z.begin(), z.end()); | |
dataTable = a_dataTable; | |
auto xbounds = std::minmax_element(x.begin(), x.end()); | |
minx = *xbounds.first; maxx = *xbounds.second; | |
auto ybounds = std::minmax_element(y.begin(), y.end()); | |
miny = *ybounds.first; maxy = *ybounds.second; | |
auto zbounds = std::minmax_element(z.begin(), z.end()); | |
minz = *zbounds.first; maxz = *zbounds.second; | |
} | |
ThreeDLookup::~ThreeDLookup() {}; | |
double ThreeDLookup::Interp(double xq, double yq, double zq) | |
{ | |
/* | |
* Assumes that all abscissa are monotonically increasing values | |
*/ | |
xq = std::max(minx, std::min(xq, maxx)); | |
yq = std::max(miny, std::min(yq, maxy)); | |
zq = std::max(minz, std::min(zq, maxz)); | |
auto xupper = std::upper_bound(xvec.cbegin(), xvec.cend(), xq); | |
int x1 = (xupper == xvec.cend()) ? xupper - xvec.cbegin() - 1 : xupper - xvec.cbegin(); | |
int x0 = x1 - 1; | |
auto yupper = std::upper_bound(yvec.cbegin(), yvec.cend(), yq); | |
int y1 = (yupper == yvec.cend()) ? yupper - yvec.cbegin() - 1 : yupper - yvec.cbegin(); | |
auto y0 = y1 - 1; | |
auto zupper = std::upper_bound(zvec.cbegin(), zvec.cend(), zq); | |
int z1 = (zupper == zvec.cend()) ? zupper - zvec.cbegin() - 1 : zupper - zvec.cbegin(); | |
auto z0 = z1 - 1; | |
double xd = (xq - xvec[x0])/(xvec[x1] - xvec[x0]); | |
double yd = (yq - yvec[y0])/(yvec[y1] - yvec[y0]); | |
double zd = (zq - zvec[z0])/(zvec[z1] - zvec[z0]); | |
double c000 = dataTable[x0][y0][z0]; | |
double c010 = dataTable[x0][y1][z0]; | |
double c100 = dataTable[x1][y0][z0]; | |
double c110 = dataTable[x1][y1][z0]; | |
double c001 = dataTable[x0][y0][z1]; | |
double c011 = dataTable[x0][y1][z1]; | |
double c101 = dataTable[x1][y0][z1]; | |
double c111 = dataTable[x1][y1][z1]; | |
double c00 = c000*(1 - xd) + c100*xd; | |
double c01 = c001*(1 - xd) + c101*xd; | |
double c10 = c010*(1 - xd) + c110*xd; | |
double c11 = c011*(1 - xd) + c111*xd; | |
double c0 = c00*(1 - yd) + c10*yd; | |
double c1 = c01*(1 - yd) + c11*yd; | |
double c = c0*(1 - zd) + c1*zd; | |
return c; | |
} | |
int main(int argc, char * argv[]) { | |
std::vector< std::vector< std::vector<double> > > dataTable; | |
for (auto i = 0; i < 10; ++i) { | |
std::vector< std::vector<double> > tmpi; | |
for (auto j = 0; j < 10; ++j) { | |
std::vector<double> tmpj; | |
for (auto k = 0; k < 10; ++k) { | |
tmpj.push_back( (double)i*j*k ); | |
} | |
tmpi.push_back(tmpj); | |
} | |
dataTable.push_back(tmpi); | |
} | |
std::vector<double> x, y, z; | |
for (double i = 0.0; i < 10.0; i += 1.0) { | |
x.push_back(i); | |
y.push_back(i); | |
z.push_back(i); | |
} | |
for (auto i = 0; i < 10; ++i) { | |
std::cout << x[i] << " " << y[i] << " " << z[i] << std::endl; | |
} | |
ThreeDLookup lut3d(x, y, z, dataTable); | |
double val = lut3d.Interp(1.2, 5.6, 8.3); | |
//double val = lut3d.Interp(0.0, 0.0, 0.0); | |
//double val = lut3d.Interp(9.0, 9.0, 9.0); | |
//double val = lut3d.Interp(10.0, 10.0, 10.0); | |
//double val = lut3d.Interp(-1.0, -1.0, -1.0); | |
std::cout << val << std::endl; | |
return 0; | |
} |
Is there a faster way with CUDA for this algorithm?
I'm sure there are a great many ways to improve or optimize this implementation of 3D lookup, even without having to resort to CUDA (though I'm also sure that a CUDA rewrite would see significant speed ups). This was written for a specific application within a larger C++ simulation framework and needed to be correct, simple, and self-contained, but not necessarily fast. Looking at it now, there are a few things I would implement differently these days, but if all you need is something that works, this fits the bill just fine.
Cheers.
Just to confirm, this is the interpolation for the LUT file used for video/image color grading right? It is usually a .cube file containing a 33x33x33 elements cube with RGB value in every element which makes it total of 33x33x33x3 elements. That is what I am looking for.
Oh, nope. Sorry for the miscommunication. This is just for vanilla linear interpolation with 3 input dimensions. You could write a parser to ingest a .cube
file and hook it up to this, but you're probably better off finding something already tailored to your needs if you care about performance.
There are a number of repos that have libraries/code for what you are referring to, though.
- https://github.com/AcademySoftwareFoundation/OpenColorIO
- https://github.com/Ppkturner/3DLutInterpolation
- https://github.com/haasn/libplacebo/blob/master/src%2Fshaders%2Flut.c
Good luck!
Thanks! I will try them out. I did read a bit on OpenColorIO before but I believe they do not offer GPU based processing.
Likely committing some sins here, but it works well. Dependent vectors must be sorted. There is probably a better way to store the table as well, but I don't really know what I'm doing.
Values greater or less than the dependent variables are clamped to the appropriate range.