Skip to content

Instantly share code, notes, and snippets.

@9prady9
Last active June 26, 2017 10:39
Show Gist options
  • Save 9prady9/69edd5b53ad82e021dccfb1b31e22df3 to your computer and use it in GitHub Desktop.
Save 9prady9/69edd5b53ad82e021dccfb1b31e22df3 to your computer and use it in GitHub Desktop.
An illustrative example using ArrayFire to split work across multiple GPUs - One thread launched per GPU; Thread safety is available starting v3.5.0
/*******************************************************
* Copyright (c) 2017, ArrayFire
* All rights reserved.
*
* This file is distributed under 3-clause BSD license.
* The complete license agreement can be obtained at:
* http://arrayfire.com/licenses/BSD-3-Clause
********************************************************/
#include <arrayfire.h>
#include <thread>
#include <chrono>
using namespace af;
using std::vector;
using std::string;
void morphImage(const array input, const array mask, const bool isDilation, int targetDevice)
{
auto start = std::chrono::high_resolution_clock::now();
af::setDevice(targetDevice);
af::array out = isDilation ? dilate(input, mask) : erode(input, mask);
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end - start;
std::cout << "Thread(" << std::this_thread::get_id()
<< "): time taken is "
<< diff.count() << " s\n";
}
int main(void)
{
vector<bool> isDilationFlags;
vector<bool> isColorFlags;
vector<string> files;
files.push_back( string("<path to image file>") );
isDilationFlags.push_back(true);
isColorFlags.push_back(false);
files.push_back( string("<path to image file>") );
isDilationFlags.push_back(false);
isColorFlags.push_back(true);
vector<std::thread> tests;
unsigned totalTestCount = 0;
auto start = std::chrono::high_resolution_clock::now();
for(size_t pos = 0; pos<files.size(); ++pos)
{
const bool isDilation = isDilationFlags[pos];
const bool isColor = isColorFlags[pos];
const dim4 maskdims(3,3,1,1);
// Need to set the device before data is created or
// loaded onto the memory of the GPU to which
// work has to be submitted
int trgtDeviceId = totalTestCount % af::getDeviceCount();
af::setDevice(trgtDeviceId);
const array mask = constant(1.0, maskdims);
// af::loadImage is blocking call since it loads
// data from disk to GPU memory
array input = loadImage(files[pos].c_str(), isColor);
// launch the thread to erode/dilate the image
// pass the device to which the this thread needs to submit
// work as argument to the function that is executed by
// this thread
tests.emplace_back(morphImage, input, mask, isDilation, trgtDeviceId);
std::cout<<"morph test launched with the following params on device ("
<<trgtDeviceId<<"):"<<std::endl;
std::cout<<"\t Input image dims: "<<input.dims()<<std::endl;
std::cout<<"\t Mask dims: "<<mask.dims()<<std::endl;
std::cout<<"\t IsDilation : "<< (isDilation ? "True" : "False") <<std::endl;
totalTestCount++;
}
std::cout<< std::endl << "Waiting for results ..." << std::endl << std::endl;
for (size_t testId=0; testId<tests.size(); ++testId)
{
if (tests[testId].joinable()) {
std::cout<<"Attempting join for test ..."<<testId<<std::endl;
tests[testId].join();
std::cout<<"test "<< testId <<" completed." << std::endl;
}
std::cout<<std::endl;
}
auto end = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> diff = end - start;
std::cout << "Total time taken for test : " << diff.count() << " s\n";
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment