Skip to content

Instantly share code, notes, and snippets.

@panmari
Last active October 14, 2015 17:34
Show Gist options
  • Save panmari/b9654d2f3e47ca7af825 to your computer and use it in GitHub Desktop.
Save panmari/b9654d2f3e47ca7af825 to your computer and use it in GitHub Desktop.
Testing performance of different ways to divide a 3d image by a 1d image channel wise. See also http://stackoverflow.com/questions/33098797/divide-every-channel-of-image-by-weight-image-in-opencv
OpenCV version: 3.0.0-dev
OpenCV VCS version: 3.0.0-556-g33f5ac5
Build type: release
Parallel framework: pthreads
CPU features: mmx sse sse2 sse3
OpenCL Platforms:
NVIDIA CUDA
dGPU: GeForce GTX TITAN (OpenCL 1.2 CUDA)
Current OpenCL device:
Type = dGPU
Name = GeForce GTX TITAN
Version = OpenCL 1.2 CUDA
Compute units = 14
Max work group size = 1024
Local memory size = 48 kB
Max memory allocation size = 1 GB 511 MB 848 kB
Double support = Yes
Host unified memory = No
Has AMD Blas = No
Has AMD Fft = No
Preferred vector width char = 1
Preferred vector width short = 1
Preferred vector width int = 1
Preferred vector width long = 1
Preferred vector width float = 1
Preferred vector width double = 1
Note: Google Test filter = performance_test*
[==========] Running 1 test from 1 test case.
[----------] Global test environment set-up.
[----------] 1 test from performance_test
[ RUN ] performance_test.performance_test_division_of_3d_by_1d
Size Method 1 Method 2 Method 3 Method 4
[2 x 2] 0.051497 0.005057 0.001207 0.034242
[10 x 10] 0.01181 0.002343 0.00148 0.006504
[100 x 100] 0.173996 0.176055 0.071535 0.159307
[1000 x 1000] 12.8465 17.618 7.50787 10.1962
[2000 x 2000] 46.1933 70.5931 29.2439 28.4596
[ OK ] performance_test.performance_test_division_of_3d_by_1d (477 ms)
[----------] 1 test from performance_test (477 ms total)
[----------] Global test environment tear-down
[==========] 1 test from 1 test case ran. (477 ms total)
[ PASSED ] 1 test.
OpenCV version: 3.0.0-dev
OpenCV VCS version: 3.0.0-580-g0f1fdd8
Build type: release
Parallel framework: ms-concurrency
CPU features: popcnt mmx sse sse2 sse3 ssse3 sse4.1 sse4.2
OpenCL Platforms:
NVIDIA CUDA
dGPU: GeForce GTX 960 (OpenCL 1.2 CUDA)
Current OpenCL device:
Type = dGPU
Name = GeForce GTX 960
Version = OpenCL 1.2 CUDA
Compute units = 8
Max work group size = 1024
Local memory size = 48 kB
Max memory allocation size = 512 MB
Double support = Yes
Host unified memory = No
Has AMD Blas = No
Has AMD Fft = No
Preferred vector width char = 1
Preferred vector width short = 1
Preferred vector width int = 1
Preferred vector width long = 1
Preferred vector width float = 1
Preferred vector width double = 1
[----------] 1 test from performance_test
[ RUN ] performance_test.performance_test_division_of_3d_by_1d
Size Method 1 Method 2 Method 3 Method 4
[2 x 2] 0.0185935 0.00364579 0.00109374 0.00765615
[10 x 10] 0.0109374 0.00218747 0.00145831 0.00546868
[100 x 100] 0.172446 0.143644 0.0765615 0.10828
[1000 x 1000] 17.9836 15.5434 9.18447 13.7391
[2000 x 2000] 72.7801 69.2262 34.0422 56.9764
[ OK ] performance_test.performance_test_division_of_3d_by_1d (686 ms)
[----------] 1 test from performance_test (687 ms total)
#include "opencv2/ts.hpp"
#include "opencv2/imgproc/imgproc.hpp"
using namespace std;
using namespace cv;
TEST(performance_test, performance_test_division_of_3d_by_1d) {
vector<Size> sizes{Size(2, 2), Size(10, 10), Size(100, 100), Size(1000, 1000), Size(2000, 2000)};
cout << "Size \t\tMethod 1 \tMethod 2 \tMethod 3" << "\tMethod 4" << endl;
for (int is = 0; is < sizes.size(); ++is) {
Size sz = sizes[is];
Mat weighted_sum(sz, CV_32FC3);
randu(weighted_sum, 0.0, 200.0);
Mat weights(sz, CV_32FC1);
randu(weights, 1.0, 10.0);
Mat ws1 = weighted_sum.clone();
Mat ws2 = weighted_sum.clone();
Mat ws3 = weighted_sum.clone();
Mat ws4 = weighted_sum.clone();
// Method 1 @panmari
double tic1 = double(getTickCount());
Mat rec1;
vector<Mat> channels(3);
split(ws1, channels);
for (Mat chan : channels) {
divide(chan, weights, chan);
}
merge(channels, rec1);
double toc1 = (double(getTickCount() - tic1)) * 1000. / getTickFrequency();
// Method 2 @Miki
double tic2 = double(getTickCount());
Mat rec2 = ws2.reshape(3, 1);
Mat ww = weights.reshape(1, 1);
for (int i = 0; i < rec2.cols; ++i) {
float w = ww.at<float>(0, i);
Vec3f *v = rec2.ptr<Vec3f>(0, i);
v->val[0] /= w;
v->val[1] /= w;
v->val[2] /= w;
}
rec2 = rec2.reshape(3, ws2.rows);
double toc2 = (double(getTickCount() - tic2)) * 1000. / getTickFrequency();
// Method 3 @Miki (+ @Micka)
double tic3 = double(getTickCount());
Mat3f rec3 = ws3.reshape(3, 1);
//Mat3f rec3 = ws3.reshape(3, 1).clone(); // To not override original image
Mat1f ww3 = weights.reshape(1, 1);
Vec3f* prec3 = rec3.ptr<Vec3f>(0);
float* pww = ww3.ptr<float>(0);
for (int i = 0; i < rec3.cols; ++i)
{
float scale = 1. / (*pww);
(*prec3)[0] *= scale;
(*prec3)[1] *= scale;
(*prec3)[2] *= scale;
++prec3; ++pww;
}
rec3 = rec3.reshape(3, ws3.rows);
double toc3 = (double(getTickCount() - tic3)) * 1000. / getTickFrequency();
// Method 4 @Micka
double tic4 = double(getTickCount());
Mat3f rec4;
Mat3f w3ch;
cvtColor(weights, w3ch, COLOR_GRAY2BGR);
divide(ws4, w3ch, rec4);
double toc4 = (double(getTickCount() - tic4)) * 1000. / getTickFrequency();
cout << sz << " \t" << toc1 << " \t" << toc2 << " \t" << toc3 << " \t" << toc4 << endl;
// Check for equality of methods.
Mat diff;
absdiff(rec1, rec2, diff);
EXPECT_EQ(0, countNonZero(diff.reshape(1)));
absdiff(rec1, rec3, diff);
threshold(diff, diff, 1e-4, 1, THRESH_BINARY);
EXPECT_EQ(0, countNonZero(diff.reshape(1)));
absdiff(rec1, rec4, diff);
EXPECT_EQ(0, countNonZero(diff.reshape(1)));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment