Created
May 21, 2018 06:58
-
-
Save lijiansong/a9572f518c156789f8fa87058a4a2e51 to your computer and use it in GitHub Desktop.
roi-pooling-cpu.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// ------------------------------------------------------------------ | |
// Fast R-CNN | |
// Copyright (c) 2015 Microsoft | |
// Licensed under The MIT License [see fast-rcnn/LICENSE for details] | |
// Written by Ross Girshick | |
// ------------------------------------------------------------------ | |
#include <cfloat> | |
#include "caffe/fast_rcnn_layers.hpp" | |
using std::max; | |
using std::min; | |
using std::floor; | |
using std::ceil; | |
namespace caffe { | |
template <typename Dtype> | |
void ROIPoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, | |
const vector<Blob<Dtype>*>& top) { | |
ROIPoolingParameter roi_pool_param = this->layer_param_.roi_pooling_param(); | |
CHECK_GT(roi_pool_param.pooled_h(), 0) | |
<< "pooled_h must be > 0"; | |
CHECK_GT(roi_pool_param.pooled_w(), 0) | |
<< "pooled_w must be > 0"; | |
pooled_height_ = roi_pool_param.pooled_h(); //定义网络的大小 | |
pooled_width_ = roi_pool_param.pooled_w(); | |
spatial_scale_ = roi_pool_param.spatial_scale(); | |
LOG(INFO) << "Spatial scale: " << spatial_scale_; | |
} | |
template <typename Dtype> | |
void ROIPoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, | |
const vector<Blob<Dtype>*>& top) { | |
channels_ = bottom[0]->channels(); | |
height_ = bottom[0]->height(); | |
width_ = bottom[0]->width(); | |
top[0]->Reshape(bottom[1]->num(), channels_, pooled_height_, | |
pooled_width_); | |
max_idx_.Reshape(bottom[1]->num(), channels_, pooled_height_, | |
pooled_width_); | |
} | |
template <typename Dtype> | |
void ROIPoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, | |
const vector<Blob<Dtype>*>& top) { | |
const Dtype* bottom_data = bottom[0]->cpu_data(); | |
const Dtype* bottom_rois = bottom[1]->cpu_data();//获取roidb信息(n,x1,y1,x2,y2) | |
// Number of ROIs | |
int num_rois = bottom[1]->num();//候选目标的个数 | |
int batch_size = bottom[0]->num();//特征图的维度,vgg16的conv5之后为512 | |
int top_count = top[0]->count();//需要输出的值个数 | |
Dtype* top_data = top[0]->mutable_cpu_data(); | |
caffe_set(top_count, Dtype(-FLT_MAX), top_data); | |
int* argmax_data = max_idx_.mutable_cpu_data(); | |
caffe_set(top_count, -1, argmax_data); | |
// For each ROI R = [batch_index x1 y1 x2 y2]: max pool over R | |
for (int n = 0; n < num_rois; ++n) { | |
int roi_batch_ind = bottom_rois[0]; | |
int roi_start_w = round(bottom_rois[1] * spatial_scale_);//缩小16倍,将候选区域在原始坐标中的位置,映射到conv_5特征图上 | |
int roi_start_h = round(bottom_rois[2] * spatial_scale_); | |
int roi_end_w = round(bottom_rois[3] * spatial_scale_); | |
int roi_end_h = round(bottom_rois[4] * spatial_scale_); | |
CHECK_GE(roi_batch_ind, 0); | |
CHECK_LT(roi_batch_ind, batch_size); | |
int roi_height = max(roi_end_h - roi_start_h + 1, 1);//得到候选区域在特征图上的大小 | |
int roi_width = max(roi_end_w - roi_start_w + 1, 1); | |
const Dtype bin_size_h = static_cast<Dtype>(roi_height) | |
/ static_cast<Dtype>(pooled_height_);//计算如果需要划分成(pooled_height_,pooled_weight_)这么多块,那么每一个块的大小(bin_size_w,bin_size_h); | |
const Dtype bin_size_w = static_cast<Dtype>(roi_width) | |
/ static_cast<Dtype>(pooled_width_); | |
const Dtype* batch_data = bottom_data + bottom[0]->offset(roi_batch_ind);//获取当前维度的特征图数据,比如一共有(n,x1,x2,x3,x4)的数据,拿到第一块特征图的数据 | |
for (int c = 0; c < channels_; ++c) { | |
for (int ph = 0; ph < pooled_height_; ++ph) { | |
for (int pw = 0; pw < pooled_width_; ++pw) { | |
// Compute pooling region for this output unit: | |
// start (included) = floor(ph * roi_height / pooled_height_) | |
// end (excluded) = ceil((ph + 1) * roi_height / pooled_height_) | |
int hstart = static_cast<int>(floor(static_cast<Dtype>(ph) | |
* bin_size_h)); //计算每一块的位置 | |
int wstart = static_cast<int>(floor(static_cast<Dtype>(pw) | |
* bin_size_w)); | |
int hend = static_cast<int>(ceil(static_cast<Dtype>(ph + 1) | |
* bin_size_h)); | |
int wend = static_cast<int>(ceil(static_cast<Dtype>(pw + 1) | |
* bin_size_w)); | |
hstart = min(max(hstart + roi_start_h, 0), height_); | |
hend = min(max(hend + roi_start_h, 0), height_); | |
wstart = min(max(wstart + roi_start_w, 0), width_); | |
wend = min(max(wend + roi_start_w, 0), width_); | |
bool is_empty = (hend <= hstart) || (wend <= wstart); | |
const int pool_index = ph * pooled_width_ + pw; | |
if (is_empty) { | |
top_data[pool_index] = 0; | |
argmax_data[pool_index] = -1; | |
} | |
for (int h = hstart; h < hend; ++h) { | |
for (int w = wstart; w < wend; ++w) { | |
const int index = h * width_ + w; | |
if (batch_data[index] > top_data[pool_index]) { | |
top_data[pool_index] = batch_data[index]; //在取每一块中的最大值,就是max_pooling操作. | |
argmax_data[pool_index] = index; | |
} | |
} | |
} | |
} | |
} | |
// Increment all data pointers by one channel | |
batch_data += bottom[0]->offset(0, 1); | |
top_data += top[0]->offset(0, 1); | |
argmax_data += max_idx_.offset(0, 1); | |
} | |
// Increment ROI data pointer | |
bottom_rois += bottom[1]->offset(1); | |
} | |
} | |
template <typename Dtype> | |
void ROIPoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, | |
const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { | |
NOT_IMPLEMENTED; | |
} | |
#ifdef CPU_ONLY | |
STUB_GPU(ROIPoolingLayer); | |
#endif | |
INSTANTIATE_CLASS(ROIPoolingLayer); | |
REGISTER_LAYER_CLASS(ROIPooling); | |
} // namespace caffe |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment