Skip to content

Instantly share code, notes, and snippets.

@knsong
Last active November 14, 2016 12:17
Show Gist options
  • Save knsong/f116f40134c4e246ea95879efdd9bba9 to your computer and use it in GitHub Desktop.
Save knsong/f116f40134c4e246ea95879efdd9bba9 to your computer and use it in GitHub Desktop.
#include <algorithm>
#include <vector>
#include "caffe/layers/relu_layer.hpp"
namespace caffe {
template <typename Dtype>
__global__ void ReLUForward(const int n, const Dtype* in, Dtype* out,
Dtype negative_slope, Dtype threshold) {
CUDA_KERNEL_LOOP(index, n) {
out[index] = (in[index] > threshold) ? (in[index] - threshold) : ((in[index] - threshold) * negative_slope);
}
}
template <typename Dtype>
void ReLULayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
const vector<Blob<Dtype>*>& top) {
const Dtype* bottom_data = bottom[0]->gpu_data();
Dtype* top_data = top[0]->mutable_gpu_data();
const int count = bottom[0]->count();
Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
Dtype threshold = this->layer_param_.relu_param().threshold();
// NOLINT_NEXT_LINE(whitespace/operators)
ReLUForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, bottom_data, top_data, negative_slope, threshold);
CUDA_POST_KERNEL_CHECK;
// << " count: " << count << " bottom_data: "
// << (unsigned long)bottom_data
// << " top_data: " << (unsigned long)top_data
// << " blocks: " << CAFFE_GET_BLOCKS(count)
// << " threads: " << CAFFE_CUDA_NUM_THREADS;
}
template <typename Dtype>
__global__ void ReLUBackward(const int n, const Dtype* in_diff,
const Dtype* in_data, Dtype* out_diff, Dtype negative_slope, Dtype threshold) {
CUDA_KERNEL_LOOP(index, n) {
out_diff[index] = in_diff[index] * ((in_data[index] > threshold)
+ (in_data[index] <= threshold) * negative_slope);
}
}
template <typename Dtype>
void ReLULayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[0]) {
const Dtype* bottom_data = bottom[0]->gpu_data();
const Dtype* top_diff = top[0]->gpu_diff();
Dtype* bottom_diff = bottom[0]->mutable_gpu_diff();
const int count = bottom[0]->count();
Dtype negative_slope = this->layer_param_.relu_param().negative_slope();
Dtype threshold = this->layer_param_.relu_param().threshold();
// NOLINT_NEXT_LINE(whitespace/operators)
ReLUBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
count, top_diff, bottom_data, bottom_diff, negative_slope, threshold);
CUDA_POST_KERNEL_CHECK;
}
}
INSTANTIATE_LAYER_GPU_FUNCS(ReLULayer);
} // namespace caffe
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment