Skip to content

Instantly share code, notes, and snippets.

@kyamagu
Last active December 6, 2016 17:26
Show Gist options
  • Save kyamagu/31a4b6f782670a28098b to your computer and use it in GitHub Desktop.
Save kyamagu/31a4b6f782670a28098b to your computer and use it in GitHub Desktop.
Caffe Datum proto converter
// Caffe proto converter.
//
// Build.
//
// mex -I/path/to/matlab-lmdb/include ...
// -I/path/to/caffe/build/src/caffe/proto ...
// caffe_proto_.cc ...
// /path/to/caffe/build/src/caffe/proto/caffe.pb.o ...
// -lprotobuf CXXFLAGS="$CXXFLAGS -std=c++11"
//
// Usage.
//
// fid = fopen('cat.jpg', 'r');
// jpg_image = fread(fid, inf, 'uint8=>uint8');
// fclose(fid);
// datum = caffe_proto_('toEncodedDatum', jpg_image, label);
// [jpg_image, label] = caffe_proto_('fromDatum', datum);
//
// image = imread('cat.jpg');
// label = 1;
// datum = caffe_proto_('toDatum', image, label);
// [image, label] = caffe_proto_('fromDatum', datum);
//
// Importing into LMDB database.
//
// addpath('/path/to/matlab-lmdb');
// image_files = {
// '/path/to/image-1.jpg', ...
// '/path/to/image-2.jpg', ...
// ...
// };
// database = lmdb.DB('/path/to/lmdb');
// for i = 1:numel(image_files)
// label = 0;
// fid = fopen(image_files{i}, 'r');
// jpg_image = fread(fid, inf, 'uint8=>uint8');
// fclose(fid);
// datum = caffe_proto_('toEncodedDatum', jpg_image, label);
// database.put(image_files{i}, datum);
// end
// clear database;
//
#include "caffe.pb.h"
#include "mexplus.h"
using namespace std;
using namespace mexplus;
#define ASSERT(condition, ...) \
if (!(condition)) mexErrMsgIdAndTxt("caffe_proto:error", __VA_ARGS__)
MEX_DEFINE(toEncodedDatum) (int nlhs, mxArray* plhs[],
int nrhs, const mxArray* prhs[]) {
InputArguments input(nrhs, prhs, 2);
OutputArguments output(nlhs, plhs, 1);
caffe::Datum datum;
MxArray array(input.get(0));
datum.set_data(array.getData<uint8_t>(), array.size());
datum.set_label(input.get<int>(1));
datum.set_encoded(true);
output.set(0, datum.SerializeAsString());
}
MEX_DEFINE(toDatum) (int nlhs, mxArray* plhs[],
int nrhs, const mxArray* prhs[]) {
InputArguments input(nrhs, prhs, 2);
OutputArguments output(nlhs, plhs, 1);
caffe::Datum datum;
MxArray array(input.get(0));
datum.set_label(input.get<int>(1));
vector<mwSize> dimensions = array.dimensions();
int width = dimensions[1];
int height = dimensions[0];
int channels = 1;
for (int i = 2; i < dimensions.size(); ++i)
channels *= dimensions[i];
datum.set_channels(channels);
datum.set_width(width);
datum.set_height(height);
vector<mwIndex> subscripts(3);
if (array.isUint8()) {
datum.mutable_data()->reserve(array.size());
for (int k = channels - 1; k >= 0; --k) { // RGB to BGR order.
subscripts[2] = k;
for (int i = 0; i < height; ++i) {
subscripts[0] = i;
for (int j = 0; j < width; ++j) {
subscripts[1] = j;
datum.mutable_data()->push_back(array.at<uint8_t>(subscripts));
}
}
}
}
else {
datum.mutable_float_data()->Reserve(array.size());
for (int k = channels - 1; k >= 0; --k) { // RGB to BGR order.
subscripts[2] = k;
for (int i = 0; i < height; ++i) {
subscripts[0] = i;
for (int j = 0; j < width; ++j) {
subscripts[1] = j;
datum.add_float_data(array.at<float>(subscripts));
}
}
}
}
output.set(0, datum.SerializeAsString());
}
MEX_DEFINE(fromDatum) (int nlhs, mxArray* plhs[],
int nrhs, const mxArray* prhs[]) {
InputArguments input(nrhs, prhs, 1);
OutputArguments output(nlhs, plhs, 2);
caffe::Datum datum;
ASSERT(datum.ParseFromString(input.get<string>(0)),
"Failed to parse datum.");
if (datum.has_encoded() && datum.encoded()) {
output.set(0, datum.data());
}
else {
vector<mwIndex> dimensions(3);
dimensions[0] = (datum.has_height()) ? datum.height() : 0;
dimensions[1] = (datum.has_width()) ? datum.width() : 0;
dimensions[2] = (datum.has_channels()) ? datum.channels() : 0;
MxArray array;
vector<mwIndex> subscripts(3);
int index = 0;
if (datum.has_data()) {
array.reset(mxCreateNumericArray(dimensions.size(),
&dimensions[0],
mxUINT8_CLASS,
mxREAL));
const string& data = datum.data();
for (int k = dimensions[2] - 1; k >= 0; --k) { // BGR to RGB order.
subscripts[2] = k;
for (int i = 0; i < dimensions[0]; ++i) {
subscripts[0] = i;
for (int j = 0; j < dimensions[1]; ++j) {
subscripts[1] = j;
array.set(subscripts, data[index++]);
}
}
}
}
else if (datum.float_data_size() > 0) {
array.reset(mxCreateNumericArray(dimensions.size(),
&dimensions[0],
mxSINGLE_CLASS,
mxREAL));
for (int k = dimensions[2] - 1; k >= 0; --k) { // BGR to RGB order.
subscripts[2] = k;
for (int i = 0; i < dimensions[0]; ++i) {
subscripts[0] = i;
for (int j = 0; j < dimensions[1]; ++j) {
subscripts[1] = j;
array.set(subscripts, datum.float_data(index++));
}
}
}
}
output.set(0, array.release());
}
output.set(1, (datum.has_label()) ? datum.label() : 0);
}
MEX_DISPATCH
@kyamagu
Copy link
Author

kyamagu commented Mar 28, 2016

Hi folks, Github gist doesn't notify me any new comment and I didn't know so many discussions happening here!

@peerajak You can encode and decode to check if they are identical.

@201power I believe your encoded datum is not in the expected binary format. Just check if you can fwrite the binary to a file and able to open. My implementation does not do encoding/decoding but only conversion, because Matlab can imread/imwrite for decoding/encoding. That's why there is fread in the example.

@amir-abdi You have to compile caffe first to link. Make sure your relative path is correctly pointing to the caffe.pb.o object.

@cedricseah You must correctly specify /path/to/caffe. Check the caffe directory.

@CeSul
Copy link

CeSul commented Dec 2, 2016

Hi, Thank you for posting this.

Could you provide example usage for using 'fromDatum' to read from an existing database? I tried

datum = read_db.get(filename);
[im, label] = caffe_proto_('fromDatum',datum)

based on your example and received this error:

Error using caffe_proto_
Failed to parse datum.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment