Skip to content

Instantly share code, notes, and snippets.

@aallan
Created April 26, 2019 15:36
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aallan/69333770aec3023b6cb304698fd1dbb1 to your computer and use it in GitHub Desktop.
Save aallan/69333770aec3023b6cb304698fd1dbb1 to your computer and use it in GitHub Desktop.
Benchmarking script for TensorFlow + TensorRT inferencing on the NVIDIA Jetson Nano
#!/usr/bin/env python3
import platform
PLATFORM = platform.system().lower()
GOOGLE = 'edge_tpu'
INTEL = 'ncs2'
NVIDIA = 'jetson_nano'
PI = 'raspberry_pi'
IS_LINUX = (PLATFORM == 'linux')
if IS_LINUX:
PLATFORM = platform.linux_distribution()[0].lower()
if PLATFORM == 'debian':
try:
with open('/proc/cpuinfo') as f:
for line in f:
line = line.strip()
if line.startswith('Hardware') and ( line.endswith('BCM2708') or line.endswith('BCM2835')):
PLATFORM = PI
print("Running on a Raspberry Pi.")
break
except:
print("Unknown platform based on Debian.")
pass
elif PLATFORM == 'mendel':
PLATFORM = GOOGLE
print("Running on a Coral Dev Board.")
try:
from edgetpu.detection.engine import DetectionEngine
print("DetectionEngine support present.")
except ImportError:
try:
from openvino.inference_engine import IENetwork, IEPlugin
print("OpenVINO present.")
print("Assuming Movidius hardware.")
PLATFORM = INTEL
except ImportError:
try:
import tensorflow as tf
if (tf.test.is_built_with_cuda()):
print("TensorFlow with GPU support present.")
print("Assuming Jetson Nano.")
try:
import tensorflow.contrib.tensorrt as trt
print("TensorRT support present.")
PLATFORM = NVIDIA
from object_detection.protos import pipeline_pb2
from object_detection.protos import image_resizer_pb2
from object_detection import exporter
from google.protobuf import text_format
except:
print("No TensorRT support found.")
print("Unknown TensorFlow platform.")
PLATFORM = 'unknown'
else:
print("No GPU support in TensorFlow.")
except ImportError:
print("No TensorFlow support found.")
LEGAL_PLATFORMS = NVIDIA
assert PLATFORM in LEGAL_PLATFORMS, "Don't understand platform %s." % PLATFORM
INPUT_NAME='image_tensor'
BOXES_NAME='detection_boxes'
CLASSES_NAME='detection_classes'
SCORES_NAME='detection_scores'
MASKS_NAME='detection_masks'
NUM_DETECTIONS_NAME='num_detections'
FROZEN_GRAPH_NAME='frozen_inference_graph.pb'
PIPELINE_CONFIG_NAME='pipeline.config'
CHECKPOINT_PREFIX='model.ckpt'
import sys
import os
import logging as log
import argparse
import subprocess
from timeit import default_timer as timer
import cv2
from PIL import Image
from PIL import ImageFont, ImageDraw
def make_const6(const6_name='const6'):
graph = tf.Graph()
with graph.as_default():
tf_6 = tf.constant(dtype=tf.float32, value=6.0, name=const6_name)
return graph.as_graph_def()
def make_relu6(output_name, input_name, const6_name='const6'):
graph = tf.Graph()
with graph.as_default():
tf_x = tf.placeholder(tf.float32, [10, 10], name=input_name)
tf_6 = tf.constant(dtype=tf.float32, value=6.0, name=const6_name)
with tf.name_scope(output_name):
tf_y1 = tf.nn.relu(tf_x, name='relu1')
tf_y2 = tf.nn.relu(tf.subtract(tf_x, tf_6, name='sub1'), name='relu2')
#tf_y = tf.nn.relu(tf.subtract(tf_6, tf.nn.relu(tf_x, name='relu1'), name='sub'), name='relu2')
#tf_y = tf.subtract(tf_6, tf_y, name=output_name)
tf_y = tf.subtract(tf_y1, tf_y2, name=output_name)
graph_def = graph.as_graph_def()
graph_def.node[-1].name = output_name
# remove unused nodes
for node in graph_def.node:
if node.name == input_name:
graph_def.node.remove(node)
for node in graph_def.node:
if node.name == const6_name:
graph_def.node.remove(node)
for node in graph_def.node:
if node.op == '_Neg':
node.op = 'Neg'
return graph_def
def convert_relu6(graph_def, const6_name='const6'):
# add constant 6
has_const6 = False
for node in graph_def.node:
if node.name == const6_name:
has_const6 = True
if not has_const6:
const6_graph_def = make_const6(const6_name=const6_name)
graph_def.node.extend(const6_graph_def.node)
for node in graph_def.node:
if node.op == 'Relu6':
input_name = node.input[0]
output_name = node.name
relu6_graph_def = make_relu6(output_name, input_name, const6_name=const6_name)
graph_def.node.remove(node)
graph_def.node.extend(relu6_graph_def.node)
return graph_def
def remove_node(graph_def, node):
for n in graph_def.node:
if node.name in n.input:
n.input.remove(node.name)
ctrl_name = '^' + node.name
if ctrl_name in n.input:
n.input.remove(ctrl_name)
graph_def.node.remove(node)
def remove_op(graph_def, op_name):
matches = [node for node in graph_def.node if node.op == op_name]
for match in matches:
remove_node(graph_def, match)
def f_force_nms_cpu(frozen_graph):
for node in frozen_graph.node:
if 'NonMaxSuppression' in node.name:
node.device = '/device:CPU:0'
return frozen_graph
def f_replace_relu6(frozen_graph):
return convert_relu6(frozen_graph)
def f_remove_assert(frozen_graph):
remove_op(frozen_graph, 'Assert')
return frozen_graph
def build_detection_graph(config, checkpoint,
batch_size=1,
score_threshold=None,
force_nms_cpu=True,
replace_relu6=True,
remove_assert=True,
input_shape=None,
output_dir='.generated_model'):
"""Builds a frozen graph for a pre-trained object detection model"""
config_path = config
checkpoint_path = checkpoint
# parse config from file
config = pipeline_pb2.TrainEvalPipelineConfig()
with open(config_path, 'r') as f:
text_format.Merge(f.read(), config, allow_unknown_extension=True)
# override some config parameters
if config.model.HasField('ssd'):
config.model.ssd.feature_extractor.override_base_feature_extractor_hyperparams = True
if score_threshold is not None:
config.model.ssd.post_processing.batch_non_max_suppression.score_threshold = score_threshold
if input_shape is not None:
config.model.ssd.image_resizer.fixed_shape_resizer.height = input_shape[0]
config.model.ssd.image_resizer.fixed_shape_resizer.width = input_shape[1]
elif config.model.HasField('faster_rcnn'):
if score_threshold is not None:
config.model.faster_rcnn.second_stage_post_processing.score_threshold = score_threshold
if input_shape is not None:
config.model.faster_rcnn.image_resizer.fixed_shape_resizer.height = input_shape[0]
config.model.faster_rcnn.image_resizer.fixed_shape_resizer.width = input_shape[1]
if os.path.isdir(output_dir):
subprocess.call(['rm', '-rf', output_dir])
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
# export inference graph to file (initial)
with tf.Session(config=tf_config) as tf_sess:
with tf.Graph().as_default() as tf_graph:
exporter.export_inference_graph(
'image_tensor',
config,
checkpoint_path,
output_dir,
input_shape=[batch_size, None, None, 3]
)
# read frozen graph from file
frozen_graph = tf.GraphDef()
with open(os.path.join(output_dir, FROZEN_GRAPH_NAME), 'rb') as f:
frozen_graph.ParseFromString(f.read())
# apply graph modifications
if force_nms_cpu:
frozen_graph = f_force_nms_cpu(frozen_graph)
if replace_relu6:
frozen_graph = f_replace_relu6(frozen_graph)
if remove_assert:
frozen_graph = f_remove_assert(frozen_graph)
# get input names
# TODO: handle mask_rcnn
input_names = [INPUT_NAME]
output_names = [BOXES_NAME, CLASSES_NAME, SCORES_NAME, NUM_DETECTIONS_NAME]
# remove temporary directory
subprocess.call(['rm', '-rf', output_dir])
return frozen_graph, input_names, output_names
# Function to draw a rectangle with width > 1
def draw_rectangle(draw, coordinates, color, width=1):
for i in range(width):
rect_start = (coordinates[0] - i, coordinates[1] - i)
rect_end = (coordinates[2] + i, coordinates[3] + i)
draw.rectangle((rect_start, rect_end), outline = color, fill = color)
# Function to read labels from text files.
def ReadLabelFile(file_path):
with open(file_path, 'r') as f:
lines = f.readlines()
ret = {}
for line in lines:
pair = line.strip().split(maxsplit=1)
ret[int(pair[0])] = pair[1].strip()
return ret
def inference_tf(runs, image, model, output, label=None):
if label:
labels = ReadLabelFile(label)
else:
labels = None
tf_config = tf.ConfigProto()
tf_config.gpu_options.allow_growth = True
config_path = os.path.join(model, PIPELINE_CONFIG_NAME)
checkpoint_path = os.path.join(model, CHECKPOINT_PREFIX)
frozen_graph, input_names, output_names = build_detection_graph(
config=config_path, checkpoint=checkpoint_path, score_threshold=0.3, batch_size=1)
trt_graph = trt.create_inference_graph(
input_graph_def=frozen_graph, outputs=output_names, max_batch_size=1,
max_workspace_size_bytes=1 << 25, precision_mode='FP16', minimum_segment_size=50 )
with tf.Session(config=tf_config) as sess:
sess.graph.as_default()
tf.import_graph_def(trt_graph, name='')
img = Image.open(image)
draw = ImageDraw.Draw(img, 'RGBA')
helvetica=ImageFont.truetype("./Helvetica.ttf", size=72)
picture = cv2.imread(image)
initial_h, initial_w, channels = picture.shape
frame = cv2.resize(picture, (300, 300))
frame = frame[:, :, [2, 1, 0]] # BGR2RGB
frame = frame.reshape(1, frame.shape[0], frame.shape[1], 3)
# Start synchronous inference and get inference result
# Run inference.
print("Running inferencing for ", runs, " times.")
if runs == 1:
start = timer()
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
sess.graph.get_tensor_by_name('detection_scores:0'),
sess.graph.get_tensor_by_name('detection_boxes:0'),
sess.graph.get_tensor_by_name('detection_classes:0')],
feed_dict={'image_tensor:0': frame})
end = timer()
print('Elapsed time is ', ((end - start)/runs)*1000, 'ms' )
else:
start = timer()
print('Initial run, discarding.')
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
sess.graph.get_tensor_by_name('detection_scores:0'),
sess.graph.get_tensor_by_name('detection_boxes:0'),
sess.graph.get_tensor_by_name('detection_classes:0')],
feed_dict={'image_tensor:0': frame})
end = timer()
print('First run time is ', (end - start)*1000, 'ms')
start = timer()
for i in range(runs):
out = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
sess.graph.get_tensor_by_name('detection_scores:0'),
sess.graph.get_tensor_by_name('detection_boxes:0'),
sess.graph.get_tensor_by_name('detection_classes:0')],
feed_dict={'image_tensor:0': frame})
end = timer()
print('Elapsed time is ', ((end - start)/runs)*1000, 'ms' )
# Visualize detected bounding boxes.
num_detections = int(out[0][0])
for i in range(num_detections):
classId = int(out[3][0][i])
score = float(out[1][0][i])
bbox = [float(v) for v in out[2][0][i]]
if score > 0.5:
xmin = bbox[1] * initial_w
ymin = bbox[0] * initial_h
xmax = bbox[3] * initial_w
ymax = bbox[2] * initial_h
if labels:
print(labels[classId], 'score = ', score)
else:
print ('score = ', score)
box = [xmin, ymin, xmax, ymax]
print( 'box = ', box )
draw_rectangle(draw, box, (0,128,128,20), width=5)
if labels:
draw.text((box[0] + 20, box[1] + 20), labels[classId], fill=(255,255,255,20), font=helvetica)
img.save(output)
print ('Saved to ', output)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--model', help='Path of the detection model directory.', required=True)
parser.add_argument('--label', help='Path of the labels file.')
parser.add_argument('--input', help='File path of the input image.', required=True)
parser.add_argument('--output', help='File path of the output image.')
parser.add_argument('--runs', help='Number of times to run the inference', type=int, default=1)
args = parser.parse_args()
if ( args.output):
output_file = args.output
else:
output_file = 'out.jpg'
if ( args.label ):
label_file = args.label
else:
label_file = None
result = inference_tf( args.runs, args.input, args.model, output_file, label_file)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment