Skip to content

Instantly share code, notes, and snippets.

@PWhiddy
Created May 31, 2019 22:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save PWhiddy/791d868929536f852fe2376bfd40d684 to your computer and use it in GitHub Desktop.
Save PWhiddy/791d868929536f852fe2376bfd40d684 to your computer and use it in GitHub Desktop.
options for speeding up the stamp generation
// original function, generates single trajectory
RawImage KBMOSearch::stackedStamps(trajectory t, int radius, std::vector<RawImage*> imgs)
{
if (radius<0) throw std::runtime_error("stamp radius must be at least 0");
int dim = radius*2+1;
RawImage stamp(dim, dim);
std::vector<float> times = stack.getTimes();
for (int i=0; i<imgs.size(); ++i)
{
for (int x=0; x<dim; ++x)
{
for (int y=0; y<dim; ++y)
{
float pixVal = imgs[i]->getPixelInterp(
t.x + times[i] * t.xVel + static_cast<float>(x-radius),
t.y + times[i] * t.yVel + static_cast<float>(y-radius));
if (pixVal == NO_DATA) pixVal = 0.0;
stamp.addToPixel(static_cast<int>(x),static_cast<int>(y), pixVal);
}
}
}
return stamp;
}
// batch (this is just the general idea jotted down, imagine it as untested psuedo-code. Not tested at all!)
std::vector<RawImage> KBMOSearch::stackedStampsBatch(std::vector<trajectory> t, int radius, std::vector<RawImage*> imgs)
{
if (radius<0) throw std::runtime_error("stamp radius must be at least 0");
int dim = radius*2+1;
std::vector<RawImage> stamps;
stamps.reserve(t.size());
std::vector<float> times = stack.getTimes();
#pragma omp parallel for
for (int s=0; s<t.size(); s++) {
stamps[i] = RawImage(dim,dim);
for (int i=0; i<imgs.size(); ++i)
{
for (int x=0; x<dim; ++x)
{
for (int y=0; y<dim; ++y)
{
float pixVal = imgs[i]->getPixelInterp(
t.x + times[i] * t.xVel + static_cast<float>(x-radius),
t.y + times[i] * t.yVel + static_cast<float>(y-radius));
if (pixVal == NO_DATA) pixVal = 0.0;
stamps[i].addToPixel(static_cast<int>(x),static_cast<int>(y), pixVal);
}
}
}
}
return stamps;
}
// gpu batch (also treat this only as a rough guideline)
std::vector<RawImage> KBMOSearch::stackedStampsBatchGPU(std::vector<trajectory> t, int radius, std::vector<RawImage*> imgs)
{
if (radius<0) throw std::runtime_error("stamp radius must be at least 0");
int dim = radius*2+1;
std::vector<RawImage> stamps;
stamps.reserve(t.size());
std::vector<float> times = stack.getTimes();
// 1. allocate GPU memory for trajctories, images, and stamps (cudaMalloc)
// 2. Then copy images and trajectories into that gpu memory (cudaMemcpy)
// 3. create thread blocks ( if the stamps are larger than 8*8,
// making each block have the dimensions of the stamp would probably work nice)
// 4. make a 1D grid that just has one threadblock for each stamp
// 5. launch a kernel that stacks together a single pixel (a block of these together makes one stamp),
// and writes the result into the gpu stamp memory allocated earlier
// stackStampKernel<<<grid,threads>>>(gpuTrajs, gpuImages, gpuStamps, radius, imageDims... imageCount...)
// 6. copy the stamps back to cpu
// 7. free gpu memory
// 8. return results
// the memory copying could be simpler using an api that handles it automatically, but I haven't used those features before
return stamps;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment