Skip to content

Instantly share code, notes, and snippets.

@makefile
Last active December 3, 2018 07:53
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save makefile/6731ca0e311b6401681c15635bb97330 to your computer and use it in GitHub Desktop.
Save makefile/6731ca0e311b6401681c15635bb97330 to your computer and use it in GitHub Desktop.
Cascade R-CNN demo&test script
import os
import sys
import argparse
import numpy as np
from PIL import Image, ImageDraw
import cv2
import time
import json
# Make sure that caffe is on the python path:
caffe_root = '../../..'
sys.path.insert(0, os.path.join(caffe_root, 'python'))
import caffe
class CaffeDetection:
def __init__(self, gpu_id, model_def, model_weights,
cascade=0, FPN=0, use_soft_nms=0):
if gpu_id < 0:
caffe.set_mode_cpu()
else:
caffe.set_device(gpu_id)
caffe.set_mode_gpu()
# Load the net in the test phase for inference, and configure input preprocessing.
self.net = caffe.Net(model_def, # defines the structure of the model
model_weights, # contains the trained weights
caffe.TEST) # use test mode (e.g., don't perform dropout)
# input preprocessing: 'data' is the name of the input blob == net.inputs[0]
#self.transformer = caffe.io.Transformer({'data': self.net.blobs['data'].data.shape})
#self.transformer.set_transpose('data', (2, 0, 1))
#self.transformer.set_mean('data', np.array([104, 117, 123])) # mean pixel
## the reference model operates on images in [0,255] range instead of [0,1]
#self.transformer.set_raw_scale('data', 255)
## the reference model has channels in BGR order instead of RGB
#self.transformer.set_channel_swap('data', (2, 1, 0))
self.use_soft_nms = use_soft_nms > 0
self.cascade = cascade > 0
self.FPN = FPN > 0
print cascade,FPN
if not self.cascade:
# baseline model
if self.FPN:
self.proposal_blob_names = ['proposals_to_all']
else:
self.proposal_blob_names = ['proposals']
self.bbox_blob_names = ['output_bbox_1st']
self.cls_prob_blob_names = ['cls_prob_1st']
self.output_names = ['1st']
else:
# cascade-rcnn model
if self.FPN:
self.proposal_blob_names = ['proposals_to_all', 'proposals_to_all_2nd',
'proposals_to_all_3rd', 'proposals_to_all_2nd', 'proposals_to_all_3rd']
else:
self.proposal_blob_names = ['proposals', 'proposals_2nd', 'proposals_3rd',
'proposals_2nd', 'proposals_3rd']
self.bbox_blob_names = ['output_bbox_1st', 'output_bbox_2nd', 'output_bbox_3rd',
'output_bbox_2nd', 'output_bbox_3rd']
self.cls_prob_blob_names = ['cls_prob_1st', 'cls_prob_2nd', 'cls_prob_3rd',
'cls_prob_2nd_avg', 'cls_prob_3rd_avg']
self.output_names = ['1st', '2nd', '3rd', '2nd_avg', '3rd_avg']
self.num_outputs = len(self.proposal_blob_names)
assert(self.num_outputs==len(self.bbox_blob_names))
assert(self.num_outputs==len(self.cls_prob_blob_names))
assert(self.num_outputs==len(self.output_names))
# detection configuration
#self.det_thr = 0.001 # threshold for testing
self.det_thr = 0.3 # threshold for demo
self.max_per_img = 100 # max number of detections
self.nms_thresh = 0.5 # NMS
if FPN:
self.shortSize = 800
self.longSize = 1312
else:
self.shortSize = 600
self.longSize = 1000
self.PIXEL_MEANS = np.array([104, 117, 123],dtype=np.uint8)
self.num_cls = 80
def detect(self, image_file):
'''
rcnn detection
'''
#image = caffe.io.load_image(image_file)
image = cv2.imread(image_file) # BGR, default is cv2.IMREAD_COLOR 3-channel
orgH, orgW, channel = image.shape
#print("image shape:",image.shape)
rzRatio = self.shortSize / float(min(orgH, orgW))
imgH = min(rzRatio * orgH, self.longSize)
imgW = min(rzRatio * orgW, self.longSize)
imgH = round(imgH / 32) * 32
imgW = round(imgW / 32) * 32 # must be the multiple of 32
hwRatios = [imgH/orgH, imgW/orgW]
#transformed_image = self.transformer.preprocess('data', image)
#image = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
resized_w = int(imgW)
resized_h = int(imgH)
#print 'resized -> ',(resized_w, resized_h)
image = cv2.resize(image, (resized_w, resized_h), interpolation=cv2.INTER_LINEAR)
image = image.astype('float32')-self.PIXEL_MEANS.astype('float32')
#cv2.imwrite("transformed_image.jpg", image)
transformed_image = np.transpose(image, (2,0,1)) # C H W
# set net to batch size of 1
self.net.blobs['data'].reshape(1, 3, resized_h, resized_w)
#Run the net and examine the top_k results
self.net.blobs['data'].data[...] = transformed_image.astype(np.float32, copy=False)
start = time.time()
# Forward pass.
blobs_out = self.net.forward()
#print('output_bbox_1st---',blobs_out['output_bbox_1st'].shape)
end = time.time()
cost_millis = int((end - start) * 1000)
print "detection cost ms: ", cost_millis
detect_final_boxes = []
for nn in range(self.num_outputs):
tmp = self.net.blobs[self.bbox_blob_names[nn]].data.copy() # if no need modify,then no need copy
print(self.bbox_blob_names[nn], tmp.shape)
#tmp = tmp.reshape((-1,5))
tmp = tmp[:,:,0,0]
tmp[:,1] /= hwRatios[1]
tmp[:,3] /= hwRatios[1]
tmp[:,2] /= hwRatios[0]
tmp[:,4] /= hwRatios[0]
# clipping bbs to image boarders
tmp[:, 1] = np.maximum(0,tmp[:,1])
tmp[:, 2] = np.maximum(0,tmp[:,2])
tmp[:, 3] = np.minimum(orgW,tmp[:,3])
tmp[:, 4] = np.minimum(orgH,tmp[:,4])
tmp[:, 3] = tmp[:, 3] - tmp[:, 1] + 1 # w
tmp[:, 4] = tmp[:, 4] - tmp[:, 2] + 1 # h
output_bboxs = tmp[:,1:]
tmp = self.net.blobs[self.cls_prob_blob_names[nn]].data
#print(self.cls_prob_blob_names[nn], tmp.shape)
cls_prob = tmp.reshape((-1,self.num_cls+1))
tmp = self.net.blobs[self.proposal_blob_names[nn]].data.copy()
#print(self.proposal_blob_names[nn], tmp.shape)
tmp = tmp[:,1:]
tmp[:, 2] = tmp[:, 2] - tmp[:, 0] + 1 # w
tmp[:, 3] = tmp[:, 3] - tmp[:, 1] + 1 # h
proposals = tmp
keep_id = np.where((proposals[:, 2] > 0) & (proposals[:, 3] > 0))[0]
proposals = proposals[keep_id,:]
output_bboxs = output_bboxs[keep_id,:]
cls_prob = cls_prob[keep_id,:]
detect_boxes = []
for i in range(self.num_cls):
cls_id = i + 1
prob = cls_prob[:, cls_id][:, np.newaxis] # 0 is background
#print (output_bboxs.shape, prob.shape)
bbset = np.hstack([output_bboxs, prob])
if self.det_thr > 0:
keep_id = np.where(prob >= self.det_thr)[0]
bbset = bbset[keep_id,:]
if self.use_soft_nms:
keep = self.cpu_soft_nms(bbset, sigma=0.5, Nt=0.30, threshold=0.01,method=1)
else:
keep = self.cpu_nms_single_cls(bbset, self.nms_thresh)
if len(keep) == 0: continue
bbset = bbset[keep,:]
cls_ids = np.array([cls_id] * len(bbset))[:, np.newaxis]
#print "cls_ids.shape", cls_ids.shape, bbset.shape
detect_boxes.extend(np.hstack([cls_ids, bbset]).tolist())
print "detected box num: ", len(detect_boxes)
detect_boxes = np.asarray(detect_boxes)
if self.max_per_img > 0 and len(detect_boxes) > self.max_per_img:
rank_scores = detect_boxes[:, 5].copy()[::-1]
rank_scores.sort() # 'descend'
print len(rank_scores),self.max_per_img
print np.where(detect_boxes[:, 5] >= rank_scores[self.max_per_img])
keep_id = np.where(detect_boxes[:, 5] >= rank_scores[self.max_per_img])[0]
detect_boxes = detect_boxes[keep_id,:]
detect_final_boxes.append(detect_boxes.tolist())
return detect_final_boxes
def cpu_nms_single_cls(self, dets, thresh):
"""Pure Python NMS baseline."""
x1 = dets[:, 0]
y1 = dets[:, 1]
w = dets[:, 2]
h = dets[:, 3]
scores = dets[:, 4]
x2 = x1 + w - 1
y2 = y1 + h - 1
areas = w * h
order = scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thresh)[0]
order = order[inds + 1]
return keep
def cpu_soft_nms(self, boxes, sigma=0.5, Nt=0.3, threshold=0.001, method=0):
N = boxes.shape[0]
pos = 0
maxscore = 0
maxpos = 0
for i in range(N):
maxscore = boxes[i, 4]
maxpos = i
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = tx1 + boxes[i,2] - 1
ty2 = ty1 + boxes[i,3] - 1
ts = boxes[i,4]
pos = i + 1
# get max box
while pos < N:
if maxscore < boxes[pos, 4]:
maxscore = boxes[pos, 4]
maxpos = pos
pos = pos + 1
# add max box as a detection
boxes[i,0] = boxes[maxpos,0]
boxes[i,1] = boxes[maxpos,1]
boxes[i,2] = boxes[maxpos,2]
boxes[i,3] = boxes[maxpos,3]
boxes[i,4] = boxes[maxpos,4]
# swap ith box with position of max box
boxes[maxpos,0] = tx1
boxes[maxpos,1] = ty1
boxes[maxpos,2] = tx2
boxes[maxpos,3] = ty2
boxes[maxpos,4] = ts
tx1 = boxes[i,0]
ty1 = boxes[i,1]
tx2 = boxes[i,2]
ty2 = boxes[i,3]
ts = boxes[i,4]
pos = i + 1
# NMS iterations, note that N changes if detection boxes fall below threshold
while pos < N:
x1 = boxes[pos, 0]
y1 = boxes[pos, 1]
x2 = boxes[pos, 2]
y2 = boxes[pos, 3]
s = boxes[pos, 4]
area = (x2 - x1 + 1) * (y2 - y1 + 1)
iw = (min(tx2, x2) - max(tx1, x1) + 1)
if iw > 0:
ih = (min(ty2, y2) - max(ty1, y1) + 1)
if ih > 0:
ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
ov = iw * ih / ua #iou between max box and detection box
if method == 1: # linear
if ov > Nt:
weight = 1 - ov
else:
weight = 1
elif method == 2: # gaussian
weight = np.exp(-(ov * ov)/sigma)
else: # original NMS
if ov > Nt:
weight = 0
else:
weight = 1
boxes[pos, 4] = weight*boxes[pos, 4]
# if box score falls below threshold, discard the box by swapping with last box
# update N
if boxes[pos, 4] < threshold:
boxes[pos,0] = boxes[N-1, 0]
boxes[pos,1] = boxes[N-1, 1]
boxes[pos,2] = boxes[N-1, 2]
boxes[pos,3] = boxes[N-1, 3]
boxes[pos,4] = boxes[N-1, 4]
N = N - 1
pos = pos - 1
pos = pos + 1
keep = [i for i in range(N)]
return keep
def bbox2cocoVec(image_id, results, catIds):
bbox_list = []
for item in results[len(results)-1]:# the 3rd_avg result
xmin = round(item[1])
ymin = round(item[2])
cls_id = int(item[0])
cat_id = catIds[cls_id]
score = item[5]
bbox = [image_id, xmin, ymin, item[3], item[4], score, cat_id]
bbox_list.append(bbox)
return bbox_list
def demo(args):
detection = CaffeDetection(args.gpu_id,
args.model_def, args.model_weights,
cascade=args.cascade, FPN=args.FPN)
results = detection.detect(args.image_file)
img = Image.open(args.image_file)
draw = ImageDraw.Draw(img)
width, height = img.size
for item in results[len(results)-1]:# the 3rd_avg result
xmin = int(round(item[1]))
ymin = int(round(item[2]))
xmax = int(round(item[1] + item[3] - 1))
ymax = int(round(item[2] + item[4] - 1))
cls_id = int(item[0])
draw.rectangle([xmin, ymin, xmax, ymax], outline=(255, 0, 0))
draw.text([xmin, ymin], str(cls_id), (0, 0, 255))
print [cls_id, xmin, ymin, xmax, ymax, round(item[-1]*1000)/1000]
img.save('detect_result.jpg')
def test_coco(args):
# local import
from pycocotools.coco import COCO
coco_catIds = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44,
46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64,
65, 67, 70, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90]
dataDir='coco'
dataType='val2017'
annFile='{}/annotations/instances_{}.json'.format(dataDir,dataType)
image_base_path = '{}/images/{}'.format(dataDir,dataType)
# initialize COCO api for instance annotations
cocoGT = COCO(annFile)
imgIds = cocoGT.getImgIds()
# model define
detection = CaffeDetection(args.gpu_id,
args.model_def, args.model_weights,
cascade=args.cascade, FPN=args.FPN)
res_list = []
i = 0
for imgId in sorted(imgIds):
img = cocoGT.loadImgs(imgId)[0]
img_name = img['file_name']
img_path = image_base_path + '/' + img_name
# inference
results = detection.detect(img_path)
res_list = res_list + bbox2cocoVec(imgId, results, coco_catIds)
i = i + 1
if i % 100 == 0:
print('--------------- ' + str(i) + ' ---------------')
#if i ==10:
# break
with open(args.out_file, 'w') as f:
json.dump(cocoGT.loadNumpyAnnotations(np.asarray(res_list)), f)
def parse_args():
'''parse args'''
parser = argparse.ArgumentParser()
parser.add_argument('--gpu_id', type=int, default=0, help='gpu id')
parser.add_argument('--model_def',
default='models/deploy.prototxt')
parser.add_argument('--cascade', default=0, type=int)
parser.add_argument('--FPN', default=0, type=int)
parser.add_argument('--model_weights',
default='models/models_iter_120000.caffemodel')
parser.add_argument('--image_file', default='')
parser.add_argument('--out_file', default='cascadercnn_coco_result.json')
return parser.parse_args()
if __name__ == '__main__':
args = parse_args()
if args.image_file != '':
demo(args)
else:
test_coco(args)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment