#include "stdafx.h" | |
const float EDGE_CONSTRAINTS_WEIGHT = 5.0f; | |
const float COVERED_PIXELS_WEIGHT = 1.0f; | |
const float NONCOVERED_PIXELS_WEIGHT = 0.1f; | |
const float TOLERANCE = 0.01f; | |
#define USE_ISPC 0 | |
#define min(x,y) ((x) < (y) ? (x) : (y)) | |
#define max(x,y) ((x) > (y) ? (x) : (y)) | |
#define min3(x,y,z) min((x), min((y),(z))) | |
#define max3(x,y,z) max((x), max((y),(z))) | |
// Convenience for accessing 2D regular data (e.g. images) | |
template<typename T> | |
struct array2d | |
{ | |
std::unique_ptr<T[]> data; | |
int width, height; | |
array2d(int width, int height, T zero = T()) : data(new T[width*height]), width(width), height(height) | |
{ | |
for (int i = 0; i < width*height; ++i) | |
{ | |
data[i] = zero; | |
} | |
} | |
array2d(std::unique_ptr<T[]> data, int width, int height) : data(data.release()), width(width), height(height) {} | |
T& operator()(int column, int row) | |
{ | |
return data[row*width + column]; | |
} | |
T operator()(int column, int row) const | |
{ | |
return data[row*width + column]; | |
} | |
}; | |
// Dense vector of arbitrary length. | |
struct VectorX | |
{ | |
std::unique_ptr<float[]> vec; | |
size_t size; | |
VectorX(const VectorX& other) : vec(new float[other.size]), size(other.size) | |
{ | |
memcpy(vec.get(), other.vec.get(), size*sizeof(float)); | |
} | |
VectorX(size_t size) : vec(new float[size]), size(size) | |
{ | |
memset(vec.get(), 0, sizeof(float)*size); | |
} | |
VectorX& operator=(const VectorX& other) | |
{ | |
if (this != &other) | |
{ | |
if (size != other.size) | |
{ | |
vec.reset(new float[other.size]); | |
size = other.size; | |
} | |
memcpy(vec.get(), other.vec.get(), size*sizeof(float)); | |
} | |
return *this; | |
} | |
__forceinline float operator[](size_t ix) const { return vec[ix]; } | |
__forceinline float& operator[](size_t ix) { return vec[ix]; } | |
static void Sub(VectorX& out, const VectorX& x, const VectorX& y) | |
{ | |
assert(out.size == x.size); | |
assert(x.size == y.size); | |
for (size_t i = 0; i < x.size; ++i) | |
out.vec[i] = x.vec[i] - y.vec[i]; | |
} | |
static void Add(VectorX& out, const VectorX& x, const VectorX& y) | |
{ | |
assert(out.size == x.size); | |
assert(x.size == y.size); | |
for (size_t i = 0; i < x.size; ++i) | |
out.vec[i] = x.vec[i] + y.vec[i]; | |
} | |
static void Mul(VectorX& out, const VectorX& x, float s) | |
{ | |
assert(out.size == x.size); | |
for (size_t i = 0; i < x.size; ++i) | |
out.vec[i] = x.vec[i]*s; | |
} | |
static float Dot(const VectorX& a, const VectorX& b) | |
{ | |
assert(a.size == b.size); | |
size_t n = a.size; | |
if (n > 4000) | |
{ | |
const int NumChunks = 4; | |
int chunkSize = (int)n / NumChunks; | |
float dotProducts[NumChunks]; | |
concurrency::parallel_for(0, NumChunks, [&](auto i) { | |
int count = i != NumChunks-1 ? chunkSize : (int)n - chunkSize * (NumChunks - 1); // Treat last chunk separately | |
size_t start = i*chunkSize; | |
float* aArray = a.vec.get() + start; | |
float* bArray = b.vec.get() + start; | |
#if USE_ISPC | |
dotProducts[i] = ispc::dot(aArray, bArray, count); | |
#else | |
float sum = 0; | |
for (int j = 0; j < count; ++j) | |
{ | |
sum += aArray[j] * bArray[j]; | |
} | |
dotProducts[i] = sum; | |
#endif | |
}); | |
float finalSum = 0.0f; | |
for (int i = 0; i < NumChunks; ++i) | |
finalSum += dotProducts[i]; | |
return finalSum; | |
} | |
else | |
{ | |
float sum = 0.0f; | |
for (size_t i = 0; i < n; ++i) | |
{ | |
sum += a[i] * b[i]; | |
} | |
return sum; | |
} | |
} | |
// Returns v*a + b | |
static void MulAdd(VectorX& out, const VectorX& v, float a, const VectorX& b) | |
{ | |
assert(out.size == v.size); | |
#if USE_ISPC | |
ispc::vmadd(v.vec.get(), a, b.vec.get(), out.vec.get(), (int)v.size); | |
#else | |
for (size_t i = 0; i < v.size; ++i) | |
{ | |
out[i] = v[i] * a + b[i]; | |
} | |
#endif | |
} | |
}; | |
// Very basic sparse matrix. | |
struct SparseMat | |
{ | |
SparseMat(int numRows, int numCols) : rows(new Row[numRows]), numRows(numRows), numCols(numCols) {} | |
float& operator()(size_t row, size_t column) | |
{ | |
assert(row < numRows); assert(row >= 0); | |
assert(column < numCols); assert(column >= 0); | |
return rows[(int)row][(int)column]; | |
} | |
static void Mul(VectorX& out, const SparseMat& A, const VectorX& x) | |
{ | |
assert(out.size == A.numRows); | |
assert(x.size == A.numCols); | |
concurrency::parallel_for<size_t>(0, A.numRows, [&](auto r) { | |
out[r] = Dot(x, A.rows[r]); | |
}, concurrency::simple_partitioner(100)); | |
} | |
private: | |
struct Row | |
{ | |
template<typename T> | |
struct AlignedDeleter | |
{ | |
void operator()(T* ptr) { _aligned_free(ptr); } | |
}; | |
size_t n = 0; | |
int capacity = 0; | |
std::unique_ptr<float[], AlignedDeleter<float>> coeffs; | |
std::unique_ptr<int[], AlignedDeleter<int>> indices; | |
float& operator[](int column) | |
{ | |
// Find the element | |
size_t index = findClosestIndex(column); | |
if (n == 0 || indices[index] != column) // Add new element | |
{ | |
if (n == capacity) | |
{ | |
grow(); | |
} | |
// Put the new element in the right place, and shift existing elements down by one. | |
float prevCoeff = 0; | |
int prevIndex = column; | |
++n; | |
for (size_t i = index; i < n; ++i) | |
{ | |
float tmpCoeff = coeffs[i]; | |
int tmpIndex = indices[i]; | |
coeffs[i] = prevCoeff; | |
indices[i] = prevIndex; | |
prevCoeff = tmpCoeff; | |
prevIndex = tmpIndex; | |
} | |
} | |
return coeffs[index]; | |
} | |
void grow() | |
{ | |
capacity = capacity == 0 ? 16 : capacity + capacity/2; | |
float* newCoeffs = (float*)_aligned_malloc(sizeof(float)*capacity, 32); | |
int* newIndices = (int*)_aligned_malloc(sizeof(int)*capacity, 32); | |
// Copy existing data over | |
memcpy(newCoeffs, coeffs.get(), n*sizeof(float)); | |
memcpy(newIndices, indices.get(), n*sizeof(int)); | |
coeffs.reset(newCoeffs); | |
indices.reset(newIndices); | |
} | |
size_t findClosestIndex(int columnIndex) | |
{ | |
for (int i = 0; i < n; ++i) | |
{ | |
if (indices[i] >= columnIndex) | |
return i; | |
} | |
return n; | |
} | |
}; | |
std::unique_ptr<Row[]> rows; | |
int numRows, numCols; | |
static float Dot(const VectorX& x, const Row& row) | |
{ | |
float sum = 0.0f; | |
for (size_t i = 0; i < row.n; ++i) | |
{ | |
sum += x[row.indices[i]]*row.coeffs[i]; | |
} | |
return sum; | |
} | |
}; | |
struct Vec3 | |
{ | |
float x, y, z; | |
bool operator==(const Vec3& other) const | |
{ | |
return other.x == x && other.y == y && other.z == z; | |
} | |
bool operator!=(const Vec3& other) const | |
{ | |
return !(*this == other); | |
} | |
Vec3 operator-(const Vec3& other) const | |
{ | |
return Vec3{ x - other.x, y - other.y, z - other.z }; | |
} | |
Vec3 operator+(const Vec3& other) const | |
{ | |
return Vec3{ x + other.x, y + other.y, z + other.z }; | |
} | |
void operator+=(const Vec3& other) | |
{ | |
*this = *this + other; | |
} | |
Vec3 operator*(const float& s) const | |
{ | |
return Vec3{ x*s, y*s, z*s }; | |
} | |
Vec3 operator/(const float& s) const | |
{ | |
return *this * (1.0f / s); | |
} | |
}; | |
struct Vec2 | |
{ | |
float u, v; | |
bool operator==(const Vec2& other) const | |
{ | |
return other.u == u && other.v == v; | |
} | |
bool operator!=(const Vec2& other) const | |
{ | |
return !(*this == other); | |
} | |
Vec2 operator-(const Vec2& other) const | |
{ | |
return Vec2{ u - other.u, v - other.v }; | |
} | |
Vec2 operator+(const Vec2& other) const | |
{ | |
return Vec2{ u + other.u, v + other.v }; | |
} | |
void operator+=(const Vec2& other) | |
{ | |
u += other.u; v += other.v; | |
} | |
Vec2 operator*(const float& s) const | |
{ | |
return Vec2{u*s, v*s}; | |
} | |
Vec2 operator/(const float& s) const | |
{ | |
return *this * (1.0f / s); | |
} | |
float Length() const | |
{ | |
return sqrtf(u*u + v*v); | |
} | |
}; | |
Vec2 operator*(float s, const Vec2& x) | |
{ | |
return x*s; | |
} | |
struct Face | |
{ | |
int v0, v1, v2, uv0, uv1, uv2; | |
}; | |
struct HalfEdge | |
{ | |
Vec2 a, b; | |
}; | |
Vec2 UVToScreen(Vec2 in, int W, int H) | |
{ | |
in.v = 1.0f - in.v; | |
in.u *= W; in.v *= H; | |
return in - Vec2{ 0.5f, 0.5f }; | |
} | |
// Do a modulo operation with the assumption that it's usually a no-op | |
// Note: this guarantees positive return values | |
int WrapCoordinate(int x, int size) | |
{ | |
// Branch predictor will hopefully skip these two loops most of the time | |
while (x < 0) | |
{ | |
x += size; | |
} | |
while (x >= size) | |
{ | |
x -= size; | |
} | |
return x; | |
} | |
struct SeamEdge | |
{ | |
// The two half edges representing this edge (each half edge is in a different UV chart) | |
HalfEdge edges[2]; | |
int numSamples(int W, int H) const | |
{ | |
Vec2 e0 = UVToScreen(edges[0].b, W, H) - UVToScreen(edges[0].a, W, H); | |
Vec2 e1 = UVToScreen(edges[1].b, W, H) - UVToScreen(edges[1].a, W, H); | |
float len = max3(2, e0.Length(), e1.Length()); | |
return (int)(len * 3); | |
} | |
}; | |
struct Mesh | |
{ | |
std::vector<Face> faces; | |
std::vector<Vec3> verts; | |
std::vector<Vec2> uvs; | |
}; | |
// TODO: Write mesh loader that isn't awful. | |
bool LoadMesh(const char* fname, Mesh& mesh) | |
{ | |
FILE* f; | |
if (fopen_s(&f, fname, "r")) | |
{ | |
return false; | |
} | |
char buf[4096]; | |
while (!feof(f)) | |
{ | |
float x, y, z; | |
int tri0, uv0, tri1, uv1, tri2, uv2; | |
if (nullptr == fgets(buf, _countof(buf), f)) | |
{ | |
break; | |
} | |
if (sscanf_s(buf, "v %f %f %f", &x, &y, &z) == 3) | |
{ | |
mesh.verts.push_back(Vec3{ x,y,z }); | |
} | |
else if (sscanf_s(buf, "vt %f %f", &x, &y) == 2) | |
{ | |
mesh.uvs.push_back(Vec2{ x,y }); | |
} | |
else if (sscanf_s(buf, "f %d/%d %d/%d %d/%d", &tri0,&uv0, &tri1,&uv1, &tri2,&uv2) == 6) | |
{ | |
// TODO: Pretty sure OBJ has some weird "index from the back" thing too with negative indices. | |
assert(tri0 > 0); assert(uv0 > 0); | |
assert(tri1 > 0); assert(uv1 > 0); | |
assert(tri2 > 0); assert(uv2 > 0); | |
mesh.faces.push_back(Face{ tri0-1, tri1-1, tri2-1, uv0-1, uv1-1, uv2-1 }); | |
} | |
else | |
{ | |
//printf("INFO: Ignoring line: %s", buf); | |
} | |
} | |
fclose(f); | |
return true; | |
} | |
void FindSeamEdges(const Mesh& mesh, std::vector<SeamEdge>& seamEdges, int W, int H) | |
{ | |
using namespace std; | |
struct tupleHasher // Hash pairs of vectors | |
{ | |
hash<float> h; | |
size_t operator ()(const tuple<Vec3, Vec3> x) const | |
{ | |
Vec3 a = std::get<0>(x), b = std::get<1>(x); | |
return h(a.x) + 1733 * h(a.y) + 43 * h(a.z) + 3257 * h(b.x) + 1091 * h(b.y) + 557 * h(b.z); | |
} | |
}; | |
unordered_map<tuple<Vec3, Vec3>, tuple<Vec2, Vec2>, tupleHasher> edgeMap; | |
for (const auto& f : mesh.faces) | |
{ | |
// Need to loop through the edges of this face, so make a list of all the edges, including their UVs | |
tuple<int, int, int, int> edges[] = { | |
make_tuple(f.v0, f.v1, f.uv0, f.uv1), | |
make_tuple(f.v1, f.v2, f.uv1, f.uv2), | |
make_tuple(f.v2, f.v0, f.uv2, f.uv0) | |
}; | |
for (const auto& e : edges) | |
{ | |
// Grab the actual verts and UVs. The mesh may not have been "welded" properly, so we have to check | |
// the actual coordinates rather than just indices. | |
Vec3 v0 = mesh.verts[get<0>(e)], v1 = mesh.verts[get<1>(e)]; | |
Vec2 uv0 = mesh.uvs[get<2>(e)], uv1 = mesh.uvs[get<3>(e)]; | |
// Try to find the "opposite" edge (note: reversing the order of verts here) | |
auto otherEdge = edgeMap.find(make_tuple(v1, v0)); | |
if (otherEdge == edgeMap.end()) | |
{ | |
// First time we're seeing this edge, so add it to the map | |
edgeMap[make_tuple(v0, v1)] = make_tuple(uv0, uv1); | |
} | |
else | |
{ | |
// This edge has already been added once, so we have enough information to see if it's a normal edge, or a "seam edge". | |
Vec2 otheruv0 = get<0>(otherEdge->second), otheruv1 = get<1>(otherEdge->second); | |
if (otheruv0 != uv1 || otheruv1 != uv0) | |
{ | |
// UV don't match, so we have a seam | |
SeamEdge s = SeamEdge{ HalfEdge{ uv0, uv1 }, HalfEdge{ otheruv1, otheruv0 } }; | |
seamEdges.push_back(s); | |
} | |
edgeMap.erase(otherEdge); // No longer need this edge, remove it to keep storage low | |
} | |
} | |
} | |
} | |
struct RGB | |
{ | |
unsigned char R, G, B; | |
unsigned char& operator[](size_t ix) | |
{ | |
return ((unsigned char*)this)[ix]; | |
} | |
}; | |
float clamp(float x, float lo, float hi) | |
{ | |
return min(hi, max(lo, x)); | |
} | |
__forceinline Vec3 RGBToYCoCg(RGB color) | |
{ | |
return Vec3{ | |
color.R * 0.25f + color.G*0.5f + color.B*0.25f, | |
-color.R * 0.25f + color.G*0.5f - color.B*0.25f, | |
color.R * 0.5f - color.B*0.5f | |
}; | |
} | |
__forceinline RGB YCoCgToRGB(Vec3 color) | |
{ | |
float r = clamp(color.x - color.y + color.z , 0, 255); | |
float g = clamp(color.x + color.y , 0, 255); | |
float b = clamp(color.x - color.y - color.z , 0, 255); | |
return RGB{ (unsigned char)roundf(r),(unsigned char)roundf(g),(unsigned char)roundf(b) }; | |
} | |
bool isInside(int x, int y, Vec2 ea, Vec2 eb) | |
{ | |
return (float)x*(eb.v - ea.v) - (float)y*(eb.u - ea.u) - ea.u*eb.v + ea.v*eb.u >= 0; | |
} | |
void RasterizeFace(Vec2 uv0, Vec2 uv1, Vec2 uv2, array2d<uint8_t>& coverageBuf) | |
{ | |
uv0 = UVToScreen(uv0, coverageBuf.width, coverageBuf.height); | |
uv1 = UVToScreen(uv1, coverageBuf.width, coverageBuf.height); | |
uv2 = UVToScreen(uv2, coverageBuf.width, coverageBuf.height); | |
// Axis aligned bounds of the triangle | |
int minx = (int)min3(uv0.u, uv1.u, uv2.u); | |
int maxx = (int)max3(uv0.u, uv1.u, uv2.u) + 1; | |
int miny = (int)min3(uv0.v, uv1.v, uv2.v); | |
int maxy = (int)max3(uv0.v, uv1.v, uv2.v) + 1; | |
// The three edges we will test | |
Vec2 e0a = uv0, e0b = uv1; | |
Vec2 e1a = uv1, e1b = uv2; | |
Vec2 e2a = uv2, e2b = uv0; | |
// Now just loop over a screen aligned bounding box around the triangle, and test each pixel against all three edges | |
for (int y = miny; y <= maxy; ++y) | |
{ | |
for (int x = minx; x <= maxx; ++x) | |
{ | |
if (isInside(x, y, e0a, e0b) & isInside(x, y, e1a, e1b) & isInside(x, y, e2a, e2b)) | |
{ | |
coverageBuf(WrapCoordinate(x, coverageBuf.width), WrapCoordinate(y, coverageBuf.height)) = 1; | |
} | |
} | |
} | |
} | |
RGB DilatePixel(int centerx, int centery, const array2d<RGB>& image, const array2d<uint8_t>& coverageBuf) | |
{ | |
int numPixels = 0; | |
Vec3 sum = Vec3{ 0,0,0 }; | |
for (int yix = centery - 1; yix <= centery + 1; ++yix) | |
{ | |
for (int xix = centerx - 1; xix <= centerx + 1; ++xix) | |
{ | |
int x = WrapCoordinate(xix, image.width); | |
int y = WrapCoordinate(yix, image.height); | |
if (coverageBuf(x,y)) | |
{ | |
++numPixels; | |
RGB c = image(x,y); | |
sum += Vec3{ (float)c.R, (float)c.G, (float)c.B }; | |
} | |
} | |
} | |
if (numPixels > 0) | |
{ | |
sum = sum / (float)numPixels; | |
sum.x = min(255.0f, roundf(sum.x)); | |
sum.y = min(255.0f, roundf(sum.y)); | |
sum.z = min(255.0f, roundf(sum.z)); | |
return RGB{ (unsigned char)sum.x, (unsigned char)sum.y, (unsigned char)sum.z }; | |
} | |
else | |
{ | |
return RGB{ 0,0,0 }; | |
} | |
} | |
// Given a fractional sample location, compute the four integer sample locations and their weights | |
void CalculateSamplesAndWeights(const array2d<int>& pixelMap, Vec2 &sample, int *__restrict outIxs, float *__restrict outWeights) | |
{ | |
int truncu = (int)sample.u; | |
int truncv = (int)sample.v; | |
const int xs[] = { truncu, truncu + 1, truncu + 1, truncu }; | |
const int ys[] = { truncv, truncv, truncv + 1, truncv + 1 }; | |
for (int i = 0; i < 4; ++i) | |
{ | |
int x = WrapCoordinate(xs[i], pixelMap.width); | |
int y = WrapCoordinate(ys[i], pixelMap.height); | |
outIxs[i] = pixelMap(x,y); | |
} | |
float fracX = sample.u - truncu; | |
float fracY = sample.v - truncv; | |
outWeights[0] = (1.0f - fracX)*(1.0f - fracY); | |
outWeights[1] = fracX*(1.0f - fracY); | |
outWeights[2] = fracX*fracY; | |
outWeights[3] = (1.0f - fracX)*fracY; | |
for (int i = 0; i < 4; ++i) | |
{ | |
outWeights[i] *= EDGE_CONSTRAINTS_WEIGHT; | |
} | |
} | |
__forceinline float rcp(float x) | |
{ | |
__m128 tmp = _mm_load_ss(&x); | |
tmp = _mm_rcp_ss(tmp); | |
_mm_store_ss(&x, tmp); | |
return x; | |
} | |
void ConjugateGradientOptimize(VectorX& out, SparseMat& A, const VectorX& guess, const VectorX& b, int numIterations, float tolerance) | |
{ | |
size_t n = guess.size; | |
VectorX p(n), r(n), Ap(n), tmp(n); | |
VectorX& x = out; | |
// r = b - A*x; | |
SparseMat::Mul(tmp, A, x); | |
VectorX::Sub(r, b, tmp); | |
p = r; | |
float rsq = VectorX::Dot(r,r); | |
for (int i = 0; i < numIterations; ++i) | |
{ | |
SparseMat::Mul(Ap, A, p); | |
float alpha = rsq / VectorX::Dot(p,Ap); | |
VectorX::MulAdd(x, p, alpha, x); // x = x + alpha*p | |
VectorX::MulAdd(r, Ap ,-alpha, r); // r = r - alpha*Ap | |
float rsqnew = VectorX::Dot(r,r); | |
if (fabs(rsqnew-rsq) < tolerance*n) | |
break; | |
float beta = rsqnew / rsq; | |
VectorX::MulAdd(p, p, beta, r);// p = r + beta*p | |
rsq = rsqnew; | |
} | |
} | |
struct PixelInfo | |
{ | |
int x, y; | |
bool isCovered; | |
Vec3 colorYCoCg; | |
}; | |
void ComputePixelInfo(const std::vector<SeamEdge>& seamEdges, const array2d<uint8_t>& coverageBuf, const array2d<RGB>& image, std::vector<PixelInfo>& pixelInfo, array2d<int>& pixelToPixelInfoMap) | |
{ | |
// Find the pixels we will optimize for. Use a 2D map so that we can find a unique set of | |
// pixels that we need to optimize for, and a quick way to find the index of it given an (x,y) position. | |
int W = coverageBuf.width; | |
int H = coverageBuf.height; | |
for (const auto&s : seamEdges) | |
{ | |
// TODO: this is overkill.. Could do conservative rasterization instead of brute force sampling | |
// 3x per pixel and take the union of the 2x2 sampling neighborhoods. | |
int numSamples = s.numSamples(W, H); | |
for (const auto& e : s.edges) | |
{ | |
Vec2 e0 = UVToScreen(e.a, W, H); | |
Vec2 e1 = UVToScreen(e.b, W, H); | |
Vec2 dt = (e1 - e0) / (float)(numSamples-1); | |
Vec2 samplePoint = e0; | |
for (int i = 0; i < numSamples; ++i, samplePoint += dt) | |
{ | |
// Go through the four bilinear sample taps | |
int xs[] = { (int)samplePoint.u, xs[0] + 1, xs[0] + 1, xs[0] }; | |
int ys[] = { (int)samplePoint.v, ys[0] , ys[0] + 1, ys[0] + 1 }; | |
for (int tap = 0; tap < 4; ++tap) | |
{ | |
int x = WrapCoordinate(xs[tap], W); | |
int y = WrapCoordinate(ys[tap], H); | |
// If we haven't already seen this pixel, make sure we take this pixel into account when optimizing | |
if (pixelToPixelInfoMap(x,y) == -1) | |
{ | |
bool isCovered = !!coverageBuf(x,y); | |
Vec3 colorYCoCg; | |
if (isCovered) | |
{ | |
colorYCoCg = RGBToYCoCg(image(x,y)); | |
} | |
else | |
{ | |
// Do dilation... | |
colorYCoCg = RGBToYCoCg(DilatePixel(x, y, image, coverageBuf)); | |
} | |
pixelInfo.push_back(PixelInfo{ x,y, isCovered, colorYCoCg }); | |
pixelToPixelInfoMap(x,y) = (int)pixelInfo.size() - 1; | |
} | |
} | |
} | |
} | |
} | |
} | |
void SetupLeastSquares(std::vector<SeamEdge> &seamEdges, const array2d<int>& pixelToPixelInfoMap, const std::vector<PixelInfo> &pixelInfo, SparseMat& AtA, VectorX &AtbR, VectorX &AtbG, VectorX &AtbB, VectorX& initialGuessR, VectorX& initialGuessG, VectorX& initialGuessB) | |
{ | |
int W = pixelToPixelInfoMap.width; | |
int H = pixelToPixelInfoMap.height; | |
for (size_t seamIndex = 0; seamIndex < seamEdges.size(); ++seamIndex) | |
{ | |
SeamEdge s = seamEdges[seamIndex]; | |
// Step through the samples of this edge, and compute sample locations for each side of the seam | |
int numSamples = s.numSamples(W, H); | |
Vec2 firstHalfEdgeStart = UVToScreen(s.edges[0].a, W, H); | |
Vec2 firstHalfEdgeEnd = UVToScreen(s.edges[0].b, W, H); | |
Vec2 secondHalfEdgeStart = UVToScreen(s.edges[1].a, W, H); | |
Vec2 secondHalfEdgeEnd = UVToScreen(s.edges[1].b, W, H); | |
Vec2 firstHalfEdgeStep = (firstHalfEdgeEnd - firstHalfEdgeStart) / (float)(numSamples-1); | |
Vec2 secondHalfEdgeStep = (secondHalfEdgeEnd - secondHalfEdgeStart) / (float)(numSamples-1); | |
Vec2 firstHalfEdgeSample = firstHalfEdgeStart; | |
Vec2 secondHalfEdgeSample = secondHalfEdgeStart; | |
for (int sampleIx = 0; sampleIx < numSamples; ++sampleIx, firstHalfEdgeSample += firstHalfEdgeStep, secondHalfEdgeSample += secondHalfEdgeStep) | |
{ | |
// Sample locations for the two corresponding sets of sample points | |
int firstHalfEdge[4], secondHalfEdge[4]; | |
float firstHalfEdgeWeights[4], secondHalfEdgeWeights[4]; | |
CalculateSamplesAndWeights(pixelToPixelInfoMap, firstHalfEdgeSample, firstHalfEdge, firstHalfEdgeWeights); | |
CalculateSamplesAndWeights(pixelToPixelInfoMap, secondHalfEdgeSample, secondHalfEdge, secondHalfEdgeWeights); | |
/* | |
Now, compute the covariance for the difference of these two vectors. | |
If a is the first vector (first half edge) and b is the second, then we compute the covariance, without | |
intermediate storage, like so: | |
(a-b)*(a-b)^t = a*a^t + b*b^t - a*b^t-b*a^t | |
*/ | |
for (int i = 0; i < 4; ++i) | |
{ | |
for (int j = 0; j < 4; ++j) | |
{ | |
// + a*a^t | |
AtA(firstHalfEdge[i], firstHalfEdge[j]) += firstHalfEdgeWeights[i] * firstHalfEdgeWeights[j]; | |
// + b*b^t | |
AtA(secondHalfEdge[i], secondHalfEdge[j]) += secondHalfEdgeWeights[i] * secondHalfEdgeWeights[j]; | |
// - a*b^t | |
AtA(firstHalfEdge[i], secondHalfEdge[j]) -= firstHalfEdgeWeights[i] * secondHalfEdgeWeights[j]; | |
// - b*a^t | |
AtA(secondHalfEdge[i], firstHalfEdge[j]) -= secondHalfEdgeWeights[i] * firstHalfEdgeWeights[j]; | |
} | |
} | |
} | |
} | |
for (size_t i = 0; i < pixelInfo.size(); ++i) | |
{ | |
PixelInfo pi = pixelInfo[i]; | |
// Set up equality cost, trying to keep the pixel at its original value | |
// Note: for non-covered pixels the weight is much lower, since those are the pixels | |
// we primarily want to modify (we'll want to keep it >0 though, to reduce the risk | |
// of extreme values that can't fit in 8 bit color channels) | |
float weight = pi.isCovered ? COVERED_PIXELS_WEIGHT : NONCOVERED_PIXELS_WEIGHT; | |
AtA(i, i) += weight; | |
// Set up the three right hand sides (one for R, G, and B). | |
// Note AtRHS represents the transpose of the system matrix A multiplied by the RHS | |
AtbR[i] += pi.colorYCoCg.x * weight; | |
AtbG[i] += pi.colorYCoCg.y * weight; | |
AtbB[i] += pi.colorYCoCg.z * weight; | |
// Set up the initial guess for the solution. | |
initialGuessR[i] = pi.colorYCoCg.x; | |
initialGuessG[i] = pi.colorYCoCg.y; | |
initialGuessB[i] = pi.colorYCoCg.z; | |
} | |
} | |
int main(int argc, char** argv) | |
{ | |
auto t0 = std::chrono::high_resolution_clock::now(); | |
if (argc != 4) | |
{ | |
printf("Usage: TextureBorderGenerator.exe InputMesh.Obj InputTexture.png OutputTexture.png\n"); | |
return -1; | |
} | |
const char* meshFileName = argv[1]; | |
const char* texFileName = argv[2]; | |
const char* texOutFileName = argv[3]; | |
Mesh mesh; | |
if (!LoadMesh(meshFileName, mesh)) | |
{ | |
printf("Error loading OBJ file!\n"); | |
return 1; | |
} | |
int W, H, comp; | |
RGB* rawImg = (RGB*)stbi_load(texFileName, &W, &H, &comp, 3); | |
if (rawImg == nullptr) | |
{ | |
printf("Failed to load input texture!\n"); | |
return -1; | |
} | |
array2d<RGB> image(std::unique_ptr<RGB[]>(rawImg), W, H); | |
// Find all edges that have different UVs on the two sides | |
std::vector<SeamEdge> seamEdges; | |
FindSeamEdges(mesh, seamEdges, W, H); | |
// Produce a mask for all valid pixels | |
array2d<uint8_t> coverageBuf(W, H); | |
for (const auto& f : mesh.faces) | |
{ | |
Vec2 uv0 = mesh.uvs[f.uv0], uv1 = mesh.uvs[f.uv1], uv2 = mesh.uvs[f.uv2]; | |
RasterizeFace(uv0, uv1, uv2, coverageBuf); | |
} | |
#if 0 | |
// Set pixels that aren't covered to an obvious color, so we can see errors | |
for (int j = 0; j < H; ++j) | |
{ | |
for (int i = 0; i < W; ++i) | |
{ | |
if (!coverageBuf[j*W + i]) | |
{ | |
outImage[j*W + i] = RGB{ 255,0,255 }; | |
} | |
} | |
} | |
#endif | |
array2d<int> pixelToPixelInfoMap(W, H, -1); | |
std::vector<PixelInfo> pixelInfo; | |
ComputePixelInfo(seamEdges, coverageBuf, image, pixelInfo, pixelToPixelInfoMap); | |
int numPixelsToOptimize = (int)pixelInfo.size(); | |
// Build up all the matrices and vectors we need to solve the problem | |
SparseMat AtA(numPixelsToOptimize, numPixelsToOptimize); | |
VectorX AtbR(numPixelsToOptimize), AtbG(numPixelsToOptimize), AtbB(numPixelsToOptimize); | |
VectorX initialGuessR(numPixelsToOptimize), initialGuessG(numPixelsToOptimize), initialGuessB(numPixelsToOptimize); | |
SetupLeastSquares(seamEdges, pixelToPixelInfoMap, pixelInfo, AtA, AtbR, AtbG, AtbB, initialGuessR, initialGuessG, initialGuessB); | |
// Run conjugate gradient optimization one color channel at a time | |
// (this is just so I don't have to implement sparse Matrix/Matrix multiplication efficiently :-)) | |
VectorX solutionR(numPixelsToOptimize), solutionG(numPixelsToOptimize), solutionB(numPixelsToOptimize); | |
concurrency::parallel_invoke( | |
[&] {ConjugateGradientOptimize(solutionR, AtA, initialGuessR, AtbR, 10000, TOLERANCE); }, | |
[&] {ConjugateGradientOptimize(solutionG, AtA, initialGuessG, AtbG, 10000, TOLERANCE); }, | |
[&] {ConjugateGradientOptimize(solutionB, AtA, initialGuessB, AtbB, 10000, TOLERANCE); } | |
); | |
// Store the resuling optimized pixels and save out | |
for (int i = 0; i < (int)numPixelsToOptimize; ++i) | |
{ | |
PixelInfo pi = pixelInfo[i]; | |
Vec3 colorYCoCg = Vec3{ solutionR[i], solutionG[i], solutionB[i] }; | |
image(pi.x, pi.y) = YCoCgToRGB(colorYCoCg); | |
} | |
if (!stbi_write_png(texOutFileName, W, H, 3, image.data.get(), sizeof(RGB)*W)) | |
{ | |
printf("Failed to write output file!\n"); | |
return -1; | |
} | |
// Print out benchmarking times | |
auto t1 = std::chrono::high_resolution_clock::now(); | |
std::chrono::duration<float> elapsedSeconds = t1 - t0; | |
printf("Time: %.4f\n", elapsedSeconds.count()); | |
return 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment