Skip to content

Instantly share code, notes, and snippets.

@erogol
Created July 13, 2015 12:54
Show Gist options
  • Star 18 You must be signed in to star a gist
  • Fork 14 You must be signed in to fork a gist
  • Save erogol/67e02e87f94ce9dc0c63 to your computer and use it in GitHub Desktop.
Save erogol/67e02e87f94ce9dc0c63 to your computer and use it in GitHub Desktop.
Caffe c++ batch based prediction
#include "caffeclassifier.h"
CaffeClassifier::CaffeClassifier(const string& model_file,
const string& trained_file,
const string& mean_file,
const string& label_file,
const bool use_GPU,
const int batch_size) {
if (use_GPU)
Caffe::set_mode(Caffe::GPU);
else
Caffe::set_mode(Caffe::CPU);
/* Set batchsize */
batch_size_ = batch_size;
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(trained_file);
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<< "Input layer should have 1 or 3 channels.";
input_geometry_ = cv::Size(input_layer->width(), input_layer->height());
/* Load the binaryproto mean file. */
SetMean(mean_file);
/* Load labels. */
std::ifstream labels(label_file.c_str());
CHECK(labels) << "Unable to open labels file " << label_file;
string line;
while (std::getline(labels, line))
labels_.push_back(string(line));
Blob<float>* output_layer = net_->output_blobs()[0];
CHECK_EQ(labels_.size(), output_layer->channels())
<< "Number of labels is different from the output layer dimension.";
}
static bool PairCompare(const std::pair<float, int>& lhs,
const std::pair<float, int>& rhs) {
return lhs.first > rhs.first;
}
/* Return the indices of the top N values of vector v. */
static std::vector<int> Argmax(const std::vector<float>& v, int N) {
std::vector<std::pair<float, int> > pairs;
for (size_t i = 0; i < v.size(); ++i)
pairs.push_back(std::make_pair(v[i], i));
std::partial_sort(pairs.begin(), pairs.begin() + N, pairs.end(), PairCompare);
std::vector<int> result;
for (int i = 0; i < N; ++i)
result.push_back(pairs[i].second);
return result;
}
std::vector< vector<Prediction> > CaffeClassifier::ClassifyBatch(const vector< cv::Mat > imgs, int num_classes){
std::vector<float> output_batch = PredictBatch(imgs);
std::vector< std::vector<Prediction> > predictions;
for(int j = 0; j < imgs.size(); j++){
std::vector<float> output(output_batch.begin() + j*num_classes, output_batch.begin() + (j+1)*num_classes);
std::vector<int> maxN = Argmax(output, num_classes);
std::vector<Prediction> prediction_single;
for (int i = 0; i < num_classes; ++i) {
int idx = maxN[i];
prediction_single.push_back(std::make_pair(labels_[idx], output[idx]));
}
predictions.push_back(std::vector<Prediction>(prediction_single));
}
return predictions;
}
/* Load the mean file in binaryproto format. */
void CaffeClassifier::SetMean(const string& mean_file) {
BlobProto blob_proto;
ReadProtoFromBinaryFileOrDie(mean_file.c_str(), &blob_proto);
/* Convert from BlobProto to Blob<float> */
Blob<float> mean_blob;
mean_blob.FromProto(blob_proto);
CHECK_EQ(mean_blob.channels(), num_channels_)
<< "Number of channels of mean file doesn't match input layer.";
/* The format of the mean file is planar 32-bit float BGR or grayscale. */
std::vector<cv::Mat> channels;
float* data = mean_blob.mutable_cpu_data();
for (int i = 0; i < num_channels_; ++i) {
/* Extract an individual channel. */
cv::Mat channel(mean_blob.height(), mean_blob.width(), CV_32FC1, data);
channels.push_back(channel);
data += mean_blob.height() * mean_blob.width();
}
/* Merge the separate channels into a single image. */
cv::Mat mean;
cv::merge(channels, mean);
/* Compute the global mean pixel value and create a mean image
* filled with this value. */
cv::Scalar channel_mean = cv::mean(mean);
mean_ = cv::Mat(input_geometry_, mean.type(), channel_mean);
}
std::vector< float > CaffeClassifier::PredictBatch(const vector< cv::Mat > imgs) {
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(batch_size_, num_channels_,
input_geometry_.height,
input_geometry_.width);
/* Forward dimension change to all layers. */
net_->Reshape();
std::vector< std::vector<cv::Mat> > input_batch;
WrapBatchInputLayer(&input_batch);
PreprocessBatch(imgs, &input_batch);
net_->ForwardPrefilled();
/* Copy the output layer to a std::vector */
Blob<float>* output_layer = net_->output_blobs()[0];
const float* begin = output_layer->cpu_data();
const float* end = begin + output_layer->channels()*imgs.size();
return std::vector<float>(begin, end);
}
void CaffeClassifier::WrapBatchInputLayer(std::vector<std::vector<cv::Mat> > *input_batch){
Blob<float>* input_layer = net_->input_blobs()[0];
int width = input_layer->width();
int height = input_layer->height();
int num = input_layer->num();
float* input_data = input_layer->mutable_cpu_data();
for ( int j = 0; j < num; j++){
vector<cv::Mat> input_channels;
for (int i = 0; i < input_layer->channels(); ++i){
cv::Mat channel(height, width, CV_32FC1, input_data);
input_channels.push_back(channel);
input_data += width * height;
}
input_batch -> push_back(vector<cv::Mat>(input_channels));
}
cv::imshow("bla", input_batch->at(1).at(0));
cv::waitKey(1);
}
void CaffeClassifier::PreprocessBatch(const vector<cv::Mat> imgs,
std::vector< std::vector<cv::Mat> >* input_batch){
for (int i = 0 ; i < imgs.size(); i++){
cv::Mat img = imgs[i];
std::vector<cv::Mat> *input_channels = &(input_batch->at(i));
/* Convert the input image to the input image format of the network. */
cv::Mat sample;
if (img.channels() == 3 && num_channels_ == 1)
cv::cvtColor(img, sample, CV_BGR2GRAY);
else if (img.channels() == 4 && num_channels_ == 1)
cv::cvtColor(img, sample, CV_BGRA2GRAY);
else if (img.channels() == 4 && num_channels_ == 3)
cv::cvtColor(img, sample, CV_BGRA2BGR);
else if (img.channels() == 1 && num_channels_ == 3)
cv::cvtColor(img, sample, CV_GRAY2BGR);
else
sample = img;
cv::Mat sample_resized;
if (sample.size() != input_geometry_)
cv::resize(sample, sample_resized, input_geometry_);
else
sample_resized = sample;
cv::Mat sample_float;
if (num_channels_ == 3)
sample_resized.convertTo(sample_float, CV_32FC3);
else
sample_resized.convertTo(sample_float, CV_32FC1);
cv::Mat sample_normalized;
cv::subtract(sample_float, mean_, sample_normalized);
/* This operation will write the separate BGR planes directly to the
* input layer of the network because it is wrapped by the cv::Mat
* objects in input_channels. */
cv::split(sample_normalized, *input_channels);
// CHECK(reinterpret_cast<float*>(input_channels->at(0).data)
// == net_->input_blobs()[0]->cpu_data())
// << "Input channels are not wrapping the input layer of the network.";
}
}
int CaffeClassifier::testClassifier() {
string model_file = CAFFE_MODEL_FILE;
string trained_file = CAFFE_MODEL_BIN;
string mean_file = CAFFE_MEAN_FILE;
string label_file = CAFFE_LABEL_FILE;
CaffeClassifier classifier(model_file, trained_file, mean_file, label_file, true, 1 );
cv::Mat img = cv::imread(CAFFE_EXP_IMG, -1);
std::cout << "---------- Prediction for "
<< CAFFE_EXP_IMG << " ----------" << std::endl;
CHECK(!img.empty()) << "Unable to decode image " << CAFFE_EXP_IMG;
std::vector<Prediction> predictions = classifier.Classify(img, 2);
std::cout << predictions.size() << std::endl;
/* Print the top N predictions. */
for (size_t i = 0; i < predictions.size(); ++i) {
Prediction p = predictions[i];
std::cout << std::fixed << std::setprecision(4) << p.second << " - \""
<< p.first << "\"" << std::endl;
}
}
@lunzueta
Copy link

Hi @erogol. I'm a bit confused with this code. I understand that its purpose is to classify several images using batch processing. So... initially the batch size is defined, depending on the installed GPU's capabilities to process data in parallel. What I don't understand well is what happens in the function PreprocessBatch(). The input data are all the images that we want to process, which are normally more than the defined batch size. Inside that function there's a for loop that goes from i=0 to i = imgs.size() - 1, which takes each image one by one, but then it also does the following:

std::vector<cv::Mat> *input_channels = &(input_batch->at(i));

And as far as I understand, batch_size_ is smaller than imgs.size(), so it's not possible to access the memory at some point (i >= batch_size_) and it crashes. Am I wrong with this analysis? Thanks

@troore
Copy link

troore commented Jul 29, 2016

Hi @lunzueta, I think batch_size_ is equal to imgs.size(). imgs contains ALL images for one batch processing.

@lunzueta
Copy link

Ok @troore. Thanks, now I understand the idea.

@MohsenZandi
Copy link

How much batch processing gain performance in comparison with single processing? (on a GPU)

@MohsenZandi
Copy link

Which sections of this code are running on gpu? All sections or only prediction or preprocessing?

@ifadams
Copy link

ifadams commented Aug 15, 2017

@MoshenZandi a bit of a late response, but I believe that only the classification (prediction) itself runs on the GPU when GPU mode is enabled. The preprocessing and transforms are all CPU based.

@vaibhav0195
Copy link

hi did you get any performance gain using this ?

@masonwang513
Copy link

I tested this batch classification implementation in mnist under GPU mode. When batch size is 32, the speedup is about 3x faster than non-batch one, batch size 128, speedup 4x, and batch size 512, speedup 5x.

@icyhearts
Copy link

Hi, where is the header file caffeclassifier.h?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment