Created
July 25, 2014 04:26
-
-
Save yjxiong/093884ab1fcdccf0da70 to your computer and use it in GitHub Desktop.
Caffe-Batch merge guide
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Guide for merging accelerated convolution to Caffe. | |
==== | |
*Yuanjun Xiong* | |
--- | |
[TOC] | |
Adding this feature will accelerate minibatch-based convolution in Caffe. Typically it brings about 1.33x speedup. | |
# 0. The Full Change List | |
All changes are wrapped in [this commit](https://github.com/yjxiong/caffe/commit/9e8e069f11a3382bc34a595fe5bd2814f4f9993d) on github. Please refer to the page for a full list of modified files. | |
# 1. Manual merging | |
The zip in the email file only contains modified files. | |
Here is a list of changed content: | |
* In file `include/caffe/util/im2col.hpp`: | |
add function declaration | |
``` | |
template <typename Dtype> | |
void bu_im2col_gpu(const Dtype* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, Dtype* data_col, const int batch_size); | |
template <typename Dtype> | |
void bu_im2col_gpu_rot(const Dtype* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, Dtype* data_col, const int batch_size); | |
template <typename Dtype> | |
void bu_col2im_gpu(const Dtype* data_col, const int channels, | |
const int height, const int width, const int psize, const int pad, | |
const int stride, Dtype* data_im, | |
const int batch_size); | |
template <typename Dtype> | |
void bu_col2im_gpu_rot(const Dtype* data_col, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, Dtype* data_im, | |
const int batch_size); | |
template <typename Dtype> | |
void cu_im2mat_gpu(const Dtype* data_im, const int channels, | |
const int height, const int width, | |
Dtype* data_mat, | |
const int batch_size); | |
``` | |
* In file `include/caffe/util/math_functions.hpp` | |
add function declaration | |
``` | |
template <typename Dtype> | |
void caffe_gpu_geam(const CBLAS_TRANSPOSE TransA, | |
const CBLAS_TRANSPOSE TransB, const int M, const int N, | |
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, | |
Dtype* C); | |
``` | |
* In file `include/caffe/vision_layers.hpp`, in the declaration of class `ConvolutionLayer` add following properties: | |
``` | |
int mem_group_size; | |
Blob<Dtype> bias_buffer_; | |
Blob<Dtype> trans_buffer_; | |
``` | |
* In file `src/caffe/layers/conv_layer.cpp`, change the implementation of method | |
``` | |
void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, | |
vector<Blob<Dtype>*>* top) | |
``` | |
* In file `src/caffe/layers/conv_layer.cu`, rewrite implementations of two methods | |
``` | |
template <typename Dtype> | |
Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | |
vector<Blob<Dtype>*>* top) | |
``` | |
and | |
``` | |
template <typename Dtype> | |
void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | |
const bool propagate_down, vector<Blob<Dtype>*>* bottom) | |
``` | |
* In file `src/caffe/util/im2col.cu` add function | |
``` | |
template <typename Dtype> | |
__global__ void bu_im2col_gpu_kernel( | |
const int n, const Dtype* data_im, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, const int height_col, const int width_col, | |
Dtype* data_col, | |
const int data_im_size, | |
const int data_col_size, | |
const int batch_size) | |
template <typename Dtype> | |
__global__ void bu_im2col_gpu_kernel_rot( | |
const int n, const Dtype* data_im, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, const int height_col, const int width_col, | |
Dtype* data_col, | |
const int data_im_size, | |
const int data_col_size, | |
const int batch_size) | |
template <typename Dtype> | |
__global__ void bu_col2im_gpu_kernel(const int n, const Dtype* data_col, | |
const int height, const int width, const int channels, const int ksize, | |
const int pad, const int stride, const int height_col, const int width_col, | |
Dtype* data_im, | |
const int batch_size) | |
template <typename Dtype> | |
__global__ void bu_col2im_gpu_rot_kernel(const int n, const Dtype* data_col, | |
const int height, const int width, const int channels, const int ksize, | |
const int pad, const int stride, const int height_col, const int width_col, | |
Dtype* data_im, | |
const int batch_size) { | |
CUDA_KERNEL_LOOP(index, n) | |
template <typename Dtype> | |
__global__ void cu_im2mat_gpu_kernel(const int n, const Dtype* data_im, | |
const int height, const int width, const int channels, | |
Dtype* data_mat, | |
const int batch_size) | |
``` | |
implement functions | |
``` | |
template <typename Dtype> | |
void bu_im2col_gpu(const Dtype* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, Dtype* data_col, const int batch_size) | |
template <typename Dtype> | |
void bu_im2col_gpu_rot(const Dtype* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, Dtype* data_col, const int batch_size); | |
template <typename Dtype> | |
void bu_col2im_gpu(const Dtype* data_col, const int channels, | |
const int height, const int width, const int psize, const int pad, | |
const int stride, Dtype* data_im, | |
const int batch_size); | |
template <typename Dtype> | |
void bu_col2im_gpu_rot(const Dtype* data_col, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, Dtype* data_im, | |
const int batch_size); | |
template <typename Dtype> | |
void cu_im2mat_gpu(const Dtype* data_im, const int channels, | |
const int height, const int width, | |
Dtype* data_mat, | |
const int batch_size); | |
``` | |
and explicitly instantiate them | |
``` | |
// Explicit instantiation | |
template void bu_im2col_gpu<float>( | |
const float* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, float* data_col, | |
const int batch_size); | |
template void bu_im2col_gpu<double>( | |
const double* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, double* data_col, | |
const int batch_size); | |
``` | |
(repeat for others). | |
* In file `src/caffe/util/math_functions.cpp` instantiate and implement two functions | |
``` | |
void caffe_gpu_geam<float>(const CBLAS_TRANSPOSE TransA, | |
const CBLAS_TRANSPOSE TransB, const int M, const int N, | |
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, | |
Dtype* C); | |
``` | |
``` | |
void caffe_gpu_geam<double>(const CBLAS_TRANSPOSE TransA, | |
const CBLAS_TRANSPOSE TransB, const int M, const int N, | |
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, | |
Dtype* C); | |
``` | |
* Finally, change the protobuffer definition `src/caffe/proto/caffe.proto`. | |
In message `ConvolutionParameter ` add one parameter | |
``` | |
optional uint32 mem_group_size = 9 [default = 1]; | |
``` | |
If the parameter id (9) is occupied, just assign a non-used number to it. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Guide for merging accelerated convolution to Caffe. | |
==== | |
*Yuanjun Xiong* | |
--- | |
[TOC] | |
# 0. The Full Change List | |
All changes are wrapped in [this commit](https://github.com/yjxiong/caffe/commit/9e8e069f11a3382bc34a595fe5bd2814f4f9993d) on github. Please refer to the page for a full list of modified files. | |
# 1. Manual merging | |
The zip in the email file only contains modified files. | |
Here is a list of changed content: | |
* In file `include/caffe/util/im2col.hpp`: | |
add function declaration | |
``` | |
template <typename Dtype> | |
void bu_im2col_gpu(const Dtype* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, Dtype* data_col, const int batch_size); | |
``` | |
* In file `include/caffe/util/math_functions.hpp` | |
add function declaration | |
``` | |
template <typename Dtype> | |
void caffe_gpu_geam(const CBLAS_TRANSPOSE TransA, | |
const CBLAS_TRANSPOSE TransB, const int M, const int N, | |
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, | |
Dtype* C); | |
``` | |
* In file `include/caffe/vision_layers.hpp`, in the declaration of class `ConvolutionLayer` add following properties: | |
``` | |
int mem_group_size; | |
Blob<Dtype> bias_buffer_; | |
Blob<Dtype> trans_buffer_; | |
``` | |
* In file `src/caffe/layers/conv_layer.cpp`, change the implementation of method | |
``` | |
void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom, | |
vector<Blob<Dtype>*>* top) | |
``` | |
* In file `src/caffe/layers/conv_layer.cu`, rewrite implementations of two methods | |
``` | |
template <typename Dtype> | |
Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom, | |
vector<Blob<Dtype>*>* top) | |
``` | |
and | |
``` | |
template <typename Dtype> | |
void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top, | |
const bool propagate_down, vector<Blob<Dtype>*>* bottom) | |
``` | |
* In file `src/caffe/util/im2col.cu` add function | |
``` | |
template <typename Dtype> | |
__global__ void bu_im2col_gpu_kernel( | |
const int n, const Dtype* data_im, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, const int height_col, const int width_col, | |
Dtype* data_col, | |
const int data_im_size, | |
const int data_col_size, | |
const int batch_size) | |
``` | |
implement function | |
``` | |
template <typename Dtype> | |
void bu_im2col_gpu(const Dtype* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, Dtype* data_col, const int batch_size) | |
``` | |
and explicitly instantiate it | |
``` | |
// Explicit instantiation | |
template void bu_im2col_gpu<float>( | |
const float* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, float* data_col, | |
const int batch_size); | |
template void bu_im2col_gpu<double>( | |
const double* data_im, const int channels, | |
const int height, const int width, const int ksize, const int pad, | |
const int stride, double* data_col, | |
const int batch_size); | |
``` | |
* In file `src/caffe/util/math_functions.cpp` instantiate and implement two functions | |
``` | |
void caffe_gpu_geam<float>(const CBLAS_TRANSPOSE TransA, | |
const CBLAS_TRANSPOSE TransB, const int M, const int N, | |
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, | |
Dtype* C); | |
``` | |
``` | |
void caffe_gpu_geam<double>(const CBLAS_TRANSPOSE TransA, | |
const CBLAS_TRANSPOSE TransB, const int M, const int N, | |
const Dtype alpha, const Dtype* A, const Dtype* B, const Dtype beta, | |
Dtype* C); | |
``` | |
* Finally, change the protobuffer definition `src/caffe/proto/caffe.proto`. | |
In message `ConvolutionParameter ` add one parameter | |
``` | |
optional uint32 mem_group_size = 9 [default = 1]; | |
``` | |
If the parameter id (9) is occupied, just assign a non-used number to it. | |
# 2. Auto patching with Git | |
If your code are tracked by Git. You can download the Git patch I generated -> [changes.patch](https://dl.dropboxusercontent.com/u/101055055/changes.patch). | |
Assume you are in `master` branch. In your repo, run | |
``` | |
git commit -a -m "before patching" | |
git checkout -b patching_batch | |
``` | |
to build a temporary branch for patching. Then, run | |
``` | |
git am -3 < changes.patch | |
``` | |
to patch the code. This will reserve the change you make while adding new feature from the patch. | |
If no conflict detected, run | |
``` | |
git checkout master | |
git merge patching_batch | |
``` | |
to merge the changes to the master branch. |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment