-
-
Save sh1r0/fd6c029d3f9252a113f4 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
time.cpp:112 Average time per layer: | |
time.cpp:115 conv1 forward: 378.389 ms. | |
time.cpp:118 conv1 backward: 389.994 ms. | |
time.cpp:115 relu1 forward: 27.806 ms. | |
time.cpp:118 relu1 backward: 0.0049 ms. | |
time.cpp:115 pool1 forward: 74.4676 ms. | |
time.cpp:118 pool1 backward: 0.0078 ms. | |
time.cpp:115 norm1 forward: 17.0962 ms. | |
time.cpp:118 norm1 backward: 26.6126 ms. | |
time.cpp:115 conv2 forward: 742.92 ms. | |
time.cpp:118 conv2 backward: 667.167 ms. | |
time.cpp:115 relu2 forward: 18.7317 ms. | |
time.cpp:118 relu2 backward: 0.0052 ms. | |
time.cpp:115 pool2 forward: 48.7595 ms. | |
time.cpp:118 pool2 backward: 0.0073 ms. | |
time.cpp:115 norm2 forward: 13.1558 ms. | |
time.cpp:118 norm2 backward: 19.7697 ms. | |
time.cpp:115 conv3 forward: 470.005 ms. | |
time.cpp:118 conv3 backward: 327.414 ms. | |
time.cpp:115 relu3 forward: 7.4486 ms. | |
time.cpp:118 relu3 backward: 0.0053 ms. | |
time.cpp:115 conv4 forward: 417.783 ms. | |
time.cpp:118 conv4 backward: 278.358 ms. | |
time.cpp:115 relu4 forward: 7.1758 ms. | |
time.cpp:118 relu4 backward: 0.0046 ms. | |
time.cpp:115 conv5 forward: 312.303 ms. | |
time.cpp:118 conv5 backward: 245.255 ms. | |
time.cpp:115 relu5 forward: 3.3518 ms. | |
time.cpp:118 relu5 backward: 0.0095 ms. | |
time.cpp:115 pool5 forward: 11.5272 ms. | |
time.cpp:118 pool5 backward: 0.0123 ms. | |
time.cpp:115 fc6 forward: 115.744 ms. | |
time.cpp:118 fc6 backward: 134.553 ms. | |
time.cpp:115 relu6 forward: 0.5094 ms. | |
time.cpp:118 relu6 backward: 0.0056 ms. | |
time.cpp:115 drop6 forward: 1.8005 ms. | |
time.cpp:118 drop6 backward: 0.0065 ms. | |
time.cpp:115 fc7 forward: 52.1866 ms. | |
time.cpp:118 fc7 backward: 68.1151 ms. | |
time.cpp:115 relu7 forward: 0.736 ms. | |
time.cpp:118 relu7 backward: 0.0065 ms. | |
time.cpp:115 drop7 forward: 2.3541 ms. | |
time.cpp:118 drop7 backward: 0.0063 ms. | |
time.cpp:115 fc8 forward: 17.4058 ms. | |
time.cpp:118 fc8 backward: 20.1731 ms. | |
time.cpp:115 prob forward: 0.8205 ms. | |
time.cpp:118 prob backward: 0.1849 ms. | |
time.cpp:123 Average Forward pass: 2742.74 ms. | |
time.cpp:125 Average Backward pass: 2177.88 ms. | |
time.cpp:127 Average Forward-Backward: 4921.5 ms. | |
time.cpp:129 Total Time: 49215 ms. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
time.cpp:112 Average time per layer: | |
time.cpp:115 conv1 forward: 22486 ms. | |
time.cpp:118 conv1 backward: 24739.1 ms. | |
time.cpp:115 relu1 forward: 9.318 ms. | |
time.cpp:118 relu1 backward: 0.105 ms. | |
time.cpp:115 pool1 forward: 9.233 ms. | |
time.cpp:118 pool1 backward: 79.234 ms. | |
time.cpp:115 norm1 forward: 42.36 ms. | |
time.cpp:118 norm1 backward: 89.164 ms. | |
time.cpp:115 conv2 forward: 48035.6 ms. | |
time.cpp:118 conv2 backward: 48634.8 ms. | |
time.cpp:115 relu2 forward: 7.504 ms. | |
time.cpp:118 relu2 backward: 0.108 ms. | |
time.cpp:115 pool2 forward: 6.188 ms. | |
time.cpp:118 pool2 backward: 63.386 ms. | |
time.cpp:115 norm2 forward: 26.897 ms. | |
time.cpp:118 norm2 backward: 58.851 ms. | |
time.cpp:115 conv3 forward: 36382 ms. | |
time.cpp:118 conv3 backward: 32037.7 ms. | |
time.cpp:115 relu3 forward: 3.745 ms. | |
time.cpp:118 relu3 backward: 0.122 ms. | |
time.cpp:115 conv4 forward: 27728.1 ms. | |
time.cpp:118 conv4 backward: 24098.5 ms. | |
time.cpp:115 relu4 forward: 4.056 ms. | |
time.cpp:118 relu4 backward: 0.122 ms. | |
time.cpp:115 conv5 forward: 18275 ms. | |
time.cpp:118 conv5 backward: 16116.1 ms. | |
time.cpp:115 relu5 forward: 2.681 ms. | |
time.cpp:118 relu5 backward: 0.09 ms. | |
time.cpp:115 pool5 forward: 2.193 ms. | |
time.cpp:118 pool5 backward: 23.486 ms. | |
time.cpp:115 fc6 forward: 25063.1 ms. | |
time.cpp:118 fc6 backward: 9766.36 ms. | |
time.cpp:115 relu6 forward: 0.417 ms. | |
time.cpp:118 relu6 backward: 0.083 ms. | |
time.cpp:115 drop6 forward: 2.831 ms. | |
time.cpp:118 drop6 backward: 0.097 ms. | |
time.cpp:115 fc7 forward: 11134.9 ms. | |
time.cpp:118 fc7 backward: 4474.58 ms. | |
time.cpp:115 relu7 forward: 1.022 ms. | |
time.cpp:118 relu7 backward: 0.202 ms. | |
time.cpp:115 drop7 forward: 9.621 ms. | |
time.cpp:118 drop7 backward: 0.089 ms. | |
time.cpp:115 fc8 forward: 2818.98 ms. | |
time.cpp:118 fc8 backward: 3637.62 ms. | |
time.cpp:115 prob forward: 5.272 ms. | |
time.cpp:118 prob backward: 5.464 ms. | |
time.cpp:123 Average Forward pass: 192058 ms. | |
time.cpp:125 Average Backward pass: 163826 ms. | |
time.cpp:127 Average Forward-Backward: 355897 ms. | |
time.cpp:129 Total Time: 355897 ms. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
time.cpp:112 Average time per layer: | |
time.cpp:115 conv1 forward: 22677.5 ms. | |
time.cpp:118 conv1 backward: 24798.7 ms. | |
time.cpp:115 relu1 forward: 10.753 ms. | |
time.cpp:118 relu1 backward: 0.023 ms. | |
time.cpp:115 pool1 forward: 9.713 ms. | |
time.cpp:118 pool1 backward: 73.002 ms. | |
time.cpp:115 norm1 forward: 41.152 ms. | |
time.cpp:118 norm1 backward: 87.996 ms. | |
time.cpp:115 conv2 forward: 48373.4 ms. | |
time.cpp:118 conv2 backward: 48922.8 ms. | |
time.cpp:115 relu2 forward: 7.785 ms. | |
time.cpp:118 relu2 backward: 0.13 ms. | |
time.cpp:115 pool2 forward: 6.245 ms. | |
time.cpp:118 pool2 backward: 63.812 ms. | |
time.cpp:115 norm2 forward: 26.684 ms. | |
time.cpp:118 norm2 backward: 60.717 ms. | |
time.cpp:115 conv3 forward: 36588.6 ms. | |
time.cpp:118 conv3 backward: 32159.5 ms. | |
time.cpp:115 relu3 forward: 3.532 ms. | |
time.cpp:118 relu3 backward: 0.123 ms. | |
time.cpp:115 conv4 forward: 27888 ms. | |
time.cpp:118 conv4 backward: 24207.5 ms. | |
time.cpp:115 relu4 forward: 3.644 ms. | |
time.cpp:118 relu4 backward: 0.121 ms. | |
time.cpp:115 conv5 forward: 18339.3 ms. | |
time.cpp:118 conv5 backward: 16162.1 ms. | |
time.cpp:115 relu5 forward: 2.612 ms. | |
time.cpp:118 relu5 backward: 0.109 ms. | |
time.cpp:115 pool5 forward: 2.229 ms. | |
time.cpp:118 pool5 backward: 23.015 ms. | |
time.cpp:115 fc6 forward: 25083.8 ms. | |
time.cpp:118 fc6 backward: 9890.16 ms. | |
time.cpp:115 relu6 forward: 1.058 ms. | |
time.cpp:118 relu6 backward: 0.079 ms. | |
time.cpp:115 drop6 forward: 4.834 ms. | |
time.cpp:118 drop6 backward: 0.092 ms. | |
time.cpp:115 fc7 forward: 11158.4 ms. | |
time.cpp:118 fc7 backward: 4413.41 ms. | |
time.cpp:115 relu7 forward: 1.281 ms. | |
time.cpp:118 relu7 backward: 0.039 ms. | |
time.cpp:115 drop7 forward: 9.432 ms. | |
time.cpp:118 drop7 backward: 0.046 ms. | |
time.cpp:115 fc8 forward: 2825.31 ms. | |
time.cpp:118 fc8 backward: 3614.89 ms. | |
time.cpp:115 prob forward: 5.616 ms. | |
time.cpp:118 prob backward: 6.987 ms. | |
time.cpp:123 Average Forward pass: 193072 ms. | |
time.cpp:125 Average Backward pass: 164486 ms. | |
time.cpp:127 Average Forward-Backward: 357579 ms. | |
time.cpp:129 Total Time: 357579 ms. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <glog/logging.h> | |
#include <cstring> | |
#include <map> | |
#include <string> | |
#include <vector> | |
#include "boost/algorithm/string.hpp" | |
#include "caffe/caffe.hpp" | |
#include "caffe/device.hpp" | |
using caffe::Blob; | |
using caffe::Caffe; | |
using caffe::Net; | |
using caffe::Layer; | |
using caffe::Solver; | |
using caffe::shared_ptr; | |
using caffe::string; | |
using caffe::Timer; | |
using caffe::vector; | |
using caffe::device; | |
using std::ostringstream; | |
int device_id = -1; | |
string FLAGS_model = ""; | |
int FLAGS_iterations = 1; | |
// Parse GPU ids or use all available devices | |
static void get_gpus(vector<int>* gpus) { | |
int count = 0; | |
#ifndef CPU_ONLY | |
count = Caffe::EnumerateDevices(true); | |
#else | |
NO_GPU; | |
#endif | |
for (int i = 0; i < count; ++i) { | |
gpus->push_back(i); | |
} | |
} | |
// Time: benchmark the execution time of a model. | |
int time() { | |
CHECK_GT(FLAGS_model.size(), 0) << "Need a model definition to time."; | |
// Set device id and mode | |
vector<int> gpus; | |
get_gpus(&gpus); | |
if (device_id >= 0 && gpus.size() != 0) { | |
#ifndef CPU_ONLY | |
LOG(INFO) << "Use GPU with device ID " << gpus[0]; | |
Caffe::SetDevices(gpus); | |
Caffe::set_mode(Caffe::GPU); | |
Caffe::SetDevice(gpus[0]); | |
#endif // !CPU_ONLY | |
} else { | |
LOG(INFO) << "Use CPU."; | |
Caffe::set_mode(Caffe::CPU); | |
} | |
// Instantiate the caffe net. | |
Net<float> caffe_net(FLAGS_model, caffe::TRAIN, Caffe::GetDevice(device_id, true)); | |
// Do a clean forward and backward pass, so that memory allocation are done | |
// and future iterations will be more stable. | |
LOG(INFO) << "Performing Forward"; | |
// Note that for the speed benchmark, we will assume that the network does | |
// not take any input blobs. | |
float initial_loss; | |
caffe_net.Forward(vector<Blob<float>*>(), &initial_loss); | |
LOG(INFO) << "Initial loss: " << initial_loss; | |
LOG(INFO) << "Performing Backward"; | |
caffe_net.Backward(); | |
const vector<shared_ptr<Layer<float> > >& layers = caffe_net.layers(); | |
const vector<vector<Blob<float>*> >& bottom_vecs = caffe_net.bottom_vecs(); | |
const vector<vector<Blob<float>*> >& top_vecs = caffe_net.top_vecs(); | |
const vector<vector<bool> >& bottom_need_backward = | |
caffe_net.bottom_need_backward(); | |
LOG(INFO) << "*** Benchmark begins ***"; | |
LOG(INFO) << "Testing for " << FLAGS_iterations << " iterations."; | |
Timer total_timer; | |
total_timer.Start(); | |
Timer forward_timer; | |
Timer backward_timer; | |
Timer timer; | |
std::vector<double> forward_time_per_layer(layers.size(), 0.0); | |
std::vector<double> backward_time_per_layer(layers.size(), 0.0); | |
double forward_time = 0.0; | |
double backward_time = 0.0; | |
for (int_tp j = 0; j < FLAGS_iterations; ++j) { | |
Timer iter_timer; | |
iter_timer.Start(); | |
forward_timer.Start(); | |
for (int_tp i = 0; i < layers.size(); ++i) { | |
timer.Start(); | |
layers[i]->Forward(bottom_vecs[i], top_vecs[i]); | |
Caffe::Synchronize(Caffe::GetDefaultDevice()->id()); | |
forward_time_per_layer[i] += timer.MicroSeconds(); | |
} | |
forward_time += forward_timer.MicroSeconds(); | |
backward_timer.Start(); | |
for (int_tp i = layers.size() - 1; i >= 0; --i) { | |
timer.Start(); | |
layers[i]->Backward(top_vecs[i], bottom_need_backward[i], | |
bottom_vecs[i]); | |
Caffe::Synchronize(Caffe::GetDefaultDevice()->id()); | |
backward_time_per_layer[i] += timer.MicroSeconds(); | |
} | |
backward_time += backward_timer.MicroSeconds(); | |
LOG(INFO) << "Iteration: " << j + 1 << " forward-backward time: " | |
<< iter_timer.MilliSeconds() << " ms."; | |
} | |
LOG(INFO) << "Average time per layer: "; | |
for (int_tp i = 0; i < layers.size(); ++i) { | |
const caffe::string& layername = layers[i]->layer_param().name(); | |
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername << | |
"\tforward: " << forward_time_per_layer[i] / 1000 / | |
FLAGS_iterations << " ms."; | |
LOG(INFO) << std::setfill(' ') << std::setw(10) << layername << | |
"\tbackward: " << backward_time_per_layer[i] / 1000 / | |
FLAGS_iterations << " ms."; | |
} | |
total_timer.Stop(); | |
LOG(INFO) << "Average Forward pass: " << forward_time / 1000 / | |
FLAGS_iterations << " ms."; | |
LOG(INFO) << "Average Backward pass: " << backward_time / 1000 / | |
FLAGS_iterations << " ms."; | |
LOG(INFO) << "Average Forward-Backward: " << total_timer.MilliSeconds() / | |
FLAGS_iterations << " ms."; | |
LOG(INFO) << "Total Time: " << total_timer.MilliSeconds() << " ms."; | |
LOG(INFO) << "*** Benchmark ends ***"; | |
return 0; | |
} | |
int main(int argc, char** argv) { | |
if (argc < 3) { | |
std::cerr << "Usage: " << argv[0] | |
<< " <prototxt> <#iterations> [device_id]" << std::endl; | |
return 1; | |
} | |
FLAGS_model = argv[1]; | |
std::istringstream iss(argv[2]); | |
if (!(iss >> FLAGS_iterations)) { | |
std::cerr << "Usage: " << argv[0] | |
<< " <prototxt> <#iterations> [device_id]" << std::endl; | |
return 1; | |
} | |
if (argc > 3) { | |
std::istringstream iss(argv[3]); | |
if (!(iss >> device_id)) { | |
std::cerr << "Usage: " << argv[0] | |
<< " <prototxt> <#iterations> [device_id]" << std::endl; | |
return 1; | |
} | |
} | |
return time(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment