Skip to content

Instantly share code, notes, and snippets.

@sh1r0
Created January 3, 2016 16:55
Show Gist options
  • Save sh1r0/fd6c029d3f9252a113f4 to your computer and use it in GitHub Desktop.
Save sh1r0/fd6c029d3f9252a113f4 to your computer and use it in GitHub Desktop.
time.cpp:112 Average time per layer:
time.cpp:115 conv1 forward: 378.389 ms.
time.cpp:118 conv1 backward: 389.994 ms.
time.cpp:115 relu1 forward: 27.806 ms.
time.cpp:118 relu1 backward: 0.0049 ms.
time.cpp:115 pool1 forward: 74.4676 ms.
time.cpp:118 pool1 backward: 0.0078 ms.
time.cpp:115 norm1 forward: 17.0962 ms.
time.cpp:118 norm1 backward: 26.6126 ms.
time.cpp:115 conv2 forward: 742.92 ms.
time.cpp:118 conv2 backward: 667.167 ms.
time.cpp:115 relu2 forward: 18.7317 ms.
time.cpp:118 relu2 backward: 0.0052 ms.
time.cpp:115 pool2 forward: 48.7595 ms.
time.cpp:118 pool2 backward: 0.0073 ms.
time.cpp:115 norm2 forward: 13.1558 ms.
time.cpp:118 norm2 backward: 19.7697 ms.
time.cpp:115 conv3 forward: 470.005 ms.
time.cpp:118 conv3 backward: 327.414 ms.
time.cpp:115 relu3 forward: 7.4486 ms.
time.cpp:118 relu3 backward: 0.0053 ms.
time.cpp:115 conv4 forward: 417.783 ms.
time.cpp:118 conv4 backward: 278.358 ms.
time.cpp:115 relu4 forward: 7.1758 ms.
time.cpp:118 relu4 backward: 0.0046 ms.
time.cpp:115 conv5 forward: 312.303 ms.
time.cpp:118 conv5 backward: 245.255 ms.
time.cpp:115 relu5 forward: 3.3518 ms.
time.cpp:118 relu5 backward: 0.0095 ms.
time.cpp:115 pool5 forward: 11.5272 ms.
time.cpp:118 pool5 backward: 0.0123 ms.
time.cpp:115 fc6 forward: 115.744 ms.
time.cpp:118 fc6 backward: 134.553 ms.
time.cpp:115 relu6 forward: 0.5094 ms.
time.cpp:118 relu6 backward: 0.0056 ms.
time.cpp:115 drop6 forward: 1.8005 ms.
time.cpp:118 drop6 backward: 0.0065 ms.
time.cpp:115 fc7 forward: 52.1866 ms.
time.cpp:118 fc7 backward: 68.1151 ms.
time.cpp:115 relu7 forward: 0.736 ms.
time.cpp:118 relu7 backward: 0.0065 ms.
time.cpp:115 drop7 forward: 2.3541 ms.
time.cpp:118 drop7 backward: 0.0063 ms.
time.cpp:115 fc8 forward: 17.4058 ms.
time.cpp:118 fc8 backward: 20.1731 ms.
time.cpp:115 prob forward: 0.8205 ms.
time.cpp:118 prob backward: 0.1849 ms.
time.cpp:123 Average Forward pass: 2742.74 ms.
time.cpp:125 Average Backward pass: 2177.88 ms.
time.cpp:127 Average Forward-Backward: 4921.5 ms.
time.cpp:129 Total Time: 49215 ms.
time.cpp:112 Average time per layer:
time.cpp:115 conv1 forward: 22486 ms.
time.cpp:118 conv1 backward: 24739.1 ms.
time.cpp:115 relu1 forward: 9.318 ms.
time.cpp:118 relu1 backward: 0.105 ms.
time.cpp:115 pool1 forward: 9.233 ms.
time.cpp:118 pool1 backward: 79.234 ms.
time.cpp:115 norm1 forward: 42.36 ms.
time.cpp:118 norm1 backward: 89.164 ms.
time.cpp:115 conv2 forward: 48035.6 ms.
time.cpp:118 conv2 backward: 48634.8 ms.
time.cpp:115 relu2 forward: 7.504 ms.
time.cpp:118 relu2 backward: 0.108 ms.
time.cpp:115 pool2 forward: 6.188 ms.
time.cpp:118 pool2 backward: 63.386 ms.
time.cpp:115 norm2 forward: 26.897 ms.
time.cpp:118 norm2 backward: 58.851 ms.
time.cpp:115 conv3 forward: 36382 ms.
time.cpp:118 conv3 backward: 32037.7 ms.
time.cpp:115 relu3 forward: 3.745 ms.
time.cpp:118 relu3 backward: 0.122 ms.
time.cpp:115 conv4 forward: 27728.1 ms.
time.cpp:118 conv4 backward: 24098.5 ms.
time.cpp:115 relu4 forward: 4.056 ms.
time.cpp:118 relu4 backward: 0.122 ms.
time.cpp:115 conv5 forward: 18275 ms.
time.cpp:118 conv5 backward: 16116.1 ms.
time.cpp:115 relu5 forward: 2.681 ms.
time.cpp:118 relu5 backward: 0.09 ms.
time.cpp:115 pool5 forward: 2.193 ms.
time.cpp:118 pool5 backward: 23.486 ms.
time.cpp:115 fc6 forward: 25063.1 ms.
time.cpp:118 fc6 backward: 9766.36 ms.
time.cpp:115 relu6 forward: 0.417 ms.
time.cpp:118 relu6 backward: 0.083 ms.
time.cpp:115 drop6 forward: 2.831 ms.
time.cpp:118 drop6 backward: 0.097 ms.
time.cpp:115 fc7 forward: 11134.9 ms.
time.cpp:118 fc7 backward: 4474.58 ms.
time.cpp:115 relu7 forward: 1.022 ms.
time.cpp:118 relu7 backward: 0.202 ms.
time.cpp:115 drop7 forward: 9.621 ms.
time.cpp:118 drop7 backward: 0.089 ms.
time.cpp:115 fc8 forward: 2818.98 ms.
time.cpp:118 fc8 backward: 3637.62 ms.
time.cpp:115 prob forward: 5.272 ms.
time.cpp:118 prob backward: 5.464 ms.
time.cpp:123 Average Forward pass: 192058 ms.
time.cpp:125 Average Backward pass: 163826 ms.
time.cpp:127 Average Forward-Backward: 355897 ms.
time.cpp:129 Total Time: 355897 ms.
time.cpp:112 Average time per layer:
time.cpp:115 conv1 forward: 22677.5 ms.
time.cpp:118 conv1 backward: 24798.7 ms.
time.cpp:115 relu1 forward: 10.753 ms.
time.cpp:118 relu1 backward: 0.023 ms.
time.cpp:115 pool1 forward: 9.713 ms.
time.cpp:118 pool1 backward: 73.002 ms.
time.cpp:115 norm1 forward: 41.152 ms.
time.cpp:118 norm1 backward: 87.996 ms.
time.cpp:115 conv2 forward: 48373.4 ms.
time.cpp:118 conv2 backward: 48922.8 ms.
time.cpp:115 relu2 forward: 7.785 ms.
time.cpp:118 relu2 backward: 0.13 ms.
time.cpp:115 pool2 forward: 6.245 ms.
time.cpp:118 pool2 backward: 63.812 ms.
time.cpp:115 norm2 forward: 26.684 ms.
time.cpp:118 norm2 backward: 60.717 ms.
time.cpp:115 conv3 forward: 36588.6 ms.
time.cpp:118 conv3 backward: 32159.5 ms.
time.cpp:115 relu3 forward: 3.532 ms.
time.cpp:118 relu3 backward: 0.123 ms.
time.cpp:115 conv4 forward: 27888 ms.
time.cpp:118 conv4 backward: 24207.5 ms.
time.cpp:115 relu4 forward: 3.644 ms.
time.cpp:118 relu4 backward: 0.121 ms.
time.cpp:115 conv5 forward: 18339.3 ms.
time.cpp:118 conv5 backward: 16162.1 ms.
time.cpp:115 relu5 forward: 2.612 ms.
time.cpp:118 relu5 backward: 0.109 ms.
time.cpp:115 pool5 forward: 2.229 ms.
time.cpp:118 pool5 backward: 23.015 ms.
time.cpp:115 fc6 forward: 25083.8 ms.
time.cpp:118 fc6 backward: 9890.16 ms.
time.cpp:115 relu6 forward: 1.058 ms.
time.cpp:118 relu6 backward: 0.079 ms.
time.cpp:115 drop6 forward: 4.834 ms.
time.cpp:118 drop6 backward: 0.092 ms.
time.cpp:115 fc7 forward: 11158.4 ms.
time.cpp:118 fc7 backward: 4413.41 ms.
time.cpp:115 relu7 forward: 1.281 ms.
time.cpp:118 relu7 backward: 0.039 ms.
time.cpp:115 drop7 forward: 9.432 ms.
time.cpp:118 drop7 backward: 0.046 ms.
time.cpp:115 fc8 forward: 2825.31 ms.
time.cpp:118 fc8 backward: 3614.89 ms.
time.cpp:115 prob forward: 5.616 ms.
time.cpp:118 prob backward: 6.987 ms.
time.cpp:123 Average Forward pass: 193072 ms.
time.cpp:125 Average Backward pass: 164486 ms.
time.cpp:127 Average Forward-Backward: 357579 ms.
time.cpp:129 Total Time: 357579 ms.
#include <glog/logging.h>
#include <cstring>
#include <map>
#include <string>
#include <vector>
#include "boost/algorithm/string.hpp"
#include "caffe/caffe.hpp"
#include "caffe/device.hpp"
using caffe::Blob;
using caffe::Caffe;
using caffe::Net;
using caffe::Layer;
using caffe::Solver;
using caffe::shared_ptr;
using caffe::string;
using caffe::Timer;
using caffe::vector;
using caffe::device;
using std::ostringstream;
int device_id = -1;
string FLAGS_model = "";
int FLAGS_iterations = 1;
// Parse GPU ids or use all available devices
static void get_gpus(vector<int>* gpus) {
int count = 0;
#ifndef CPU_ONLY
count = Caffe::EnumerateDevices(true);
#else
NO_GPU;
#endif
for (int i = 0; i < count; ++i) {
gpus->push_back(i);
}
}
// Time: benchmark the execution time of a model.
int time() {
CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time.";
// Set device id and mode
vector<int> gpus;
get_gpus(&gpus);
if (device_id >= 0 && gpus.size() != 0) {
#ifndef CPU_ONLY
LOG(INFO) << "Use GPU with device ID " << gpus[0];
Caffe::SetDevices(gpus);
Caffe::set_mode(Caffe::GPU);
Caffe::SetDevice(gpus[0]);
#endif // !CPU_ONLY
} else {
LOG(INFO) << "Use CPU.";
Caffe::set_mode(Caffe::CPU);
}
// Instantiate the caffe net.
Net<float> caffe_net(FLAGS_model, caffe::TRAIN, Caffe::GetDevice(device_id, true));
// Do a clean forward and backward pass, so that memory allocation are done
// and future iterations will be more stable.
LOG(INFO) << "Performing Forward";
// Note that for the speed benchmark, we will assume that the network does
// not take any input blobs.
float initial_loss;
caffe_net.Forward(vector<Blob<float>*>(), &initial_loss);
LOG(INFO) << "Initial loss: " << initial_loss;
LOG(INFO) << "Performing Backward";
caffe_net.Backward();
const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers();
const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs();
const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs();
const vector<vector<bool> >& bottom_need_backward =
caffe_net.bottom_need_backward();
LOG(INFO) << "*** Benchmark begins ***";
LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations.";
Timer total_timer;
total_timer.Start();
Timer forward_timer;
Timer backward_timer;
Timer timer;
std::vector<double> forward_time_per_layer(layers.size(), 0.0);
std::vector<double> backward_time_per_layer(layers.size(), 0.0);
double forward_time = 0.0;
double backward_time = 0.0;
for (int_tp j = 0; j < FLAGS_iterations; ++j) {
Timer iter_timer;
iter_timer.Start();
forward_timer.Start();
for (int_tp i = 0; i < layers.size(); ++i) {
timer.Start();
layers[i]->Forward(bottom_vecs[i], top_vecs[i]);
Caffe::Synchronize(Caffe::GetDefaultDevice()->id());
forward_time_per_layer[i] += timer.MicroSeconds();
}
forward_time += forward_timer.MicroSeconds();
backward_timer.Start();
for (int_tp i = layers.size() - 1; i >= 0; --i) {
timer.Start();
layers[i]->Backward(top_vecs[i], bottom_need_backward[i],
bottom_vecs[i]);
Caffe::Synchronize(Caffe::GetDefaultDevice()->id());
backward_time_per_layer[i] += timer.MicroSeconds();
}
backward_time += backward_timer.MicroSeconds();
LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: "
<< iter_timer.MilliSeconds() << " ms.";
}
LOG(INFO) << "Average time per layer: ";
for (int_tp i = 0; i < layers.size(); ++i) {
const caffe::string& layername = layers[i]->layer_param().name();
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
"\tforward: " << forward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername <<
"\tbackward: " << backward_time_per_layer[i] / 1000 /
FLAGS_iterations << " ms.";
}
total_timer.Stop();
LOG(INFO) << "Average Forward pass: " << forward_time / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << "Average Backward pass: " << backward_time / 1000 /
FLAGS_iterations << " ms.";
LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() /
FLAGS_iterations << " ms.";
LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms.";
LOG(INFO) << "*** Benchmark ends ***";
return 0;
}
int main(int argc, char** argv) {
if (argc < 3) {
std::cerr << "Usage: " << argv[0]
<< " <prototxt> <#iterations> [device_id]" << std::endl;
return 1;
}
FLAGS_model = argv[1];
std::istringstream iss(argv[2]);
if (!(iss >> FLAGS_iterations)) {
std::cerr << "Usage: " << argv[0]
<< " <prototxt> <#iterations> [device_id]" << std::endl;
return 1;
}
if (argc > 3) {
std::istringstream iss(argv[3]);
if (!(iss >> device_id)) {
std::cerr << "Usage: " << argv[0]
<< " <prototxt> <#iterations> [device_id]" << std::endl;
return 1;
}
}
return time();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment