Skip to content

Instantly share code, notes, and snippets.

@csvance
Created April 8, 2019 04:27
Show Gist options
  • Save csvance/e5db387f464463783101093d00393f59 to your computer and use it in GitHub Desktop.
Save csvance/e5db387f464463783101093d00393f59 to your computer and use it in GitHub Desktop.
import keras
from keras.models import Model
from keras import backend as K
from keras.layers import Dense, Activation
import tensorflow as tf
from tensorflow.contrib import tensorrt as tftrt
import copy
import numpy as np
import sys
import time
class FrozenGraph(object):
def __init__(self, model, shape):
shape = (None, shape[0], shape[1], shape[2])
x_name = 'image_tensor_x'
with K.get_session() as sess:
x_tensor = tf.placeholder(tf.float32, shape, x_name)
K.set_learning_phase(0)
y_tensor = model(x_tensor)
y_name = y_tensor.name[:-2]
graph = sess.graph.as_graph_def()
graph0 = tf.graph_util.convert_variables_to_constants(sess, graph, [y_name])
graph1 = tf.graph_util.remove_training_nodes(graph0)
self.x_name = [x_name]
self.y_name = [y_name]
self.frozen = graph1
class TfEngine(object):
def __init__(self, graph):
g = tf.Graph()
with g.as_default():
x_op, y_op = tf.import_graph_def(
graph_def=graph.frozen, return_elements=graph.x_name + graph.y_name)
self.x_tensor = x_op.outputs[0]
self.y_tensor = y_op.outputs[0]
config = tf.ConfigProto(gpu_options=
tf.GPUOptions(per_process_gpu_memory_fraction=0.5,
allow_growth=True))
self.sess = tf.Session(graph=g, config=config)
def infer(self, x):
y = self.sess.run(self.y_tensor,
feed_dict={self.x_tensor: x})
return y
class TftrtEngine(TfEngine):
def __init__(self, graph, batch_size, precision):
tftrt_graph = tftrt.create_inference_graph(
graph.frozen,
outputs=graph.y_name,
max_batch_size=batch_size,
max_workspace_size_bytes=1 << 30,
precision_mode=precision,
minimum_segment_size=2)
self.tftrt_graph = tftrt_graph
opt_graph = copy.deepcopy(graph)
opt_graph.frozen = tftrt_graph
super(TftrtEngine, self).__init__(opt_graph)
self.batch_size = batch_size
def infer(self, x):
num_tests = x.shape[0]
y = np.empty((num_tests, 1), np.float32)
batch_size = self.batch_size
for i in range(0, num_tests, batch_size):
x_part = x[i: i + batch_size]
y_part = self.sess.run(self.y_tensor,
feed_dict={self.x_tensor: x_part})
y[i: i + batch_size] = y_part
return y
mobilenet = keras.applications.mobilenet.MobileNet(input_shape=(224, 224, 3), weights='imagenet', pooling='max')
new_output = mobilenet.get_layer('global_average_pooling2d_1').output
new_output = Dense(1, activation='softmax')(new_output)
model = Model(inputs=mobilenet.input, outputs=new_output)
model.summary()
frozen_graph = FrozenGraph(model, (224, 224, 3))
x_test = np.random.random((10, 224, 224, 3))
tf_engine = TfEngine(frozen_graph)
t0 = time.time()
y_tf = tf_engine.infer(x_test)
print(y_tf)
t1 = time.time()
print('Tensorflow time', t1 - t0)
batch_size = 1
tftrt_engine = TftrtEngine(frozen_graph, batch_size, 'FP32')
t0 = time.time()
y_tftrt = tftrt_engine.infer(x_test)
print(y_tftrt)
t1 = time.time()
print('TFTRT time', t1 - t0)
tftrt_engine = TftrtEngine(frozen_graph, batch_size, 'FP16')
t0 = time.time()
y_tftrt = tftrt_engine.infer(x_test)
print(y_tftrt)
t1 = time.time()
print('TFTRT_FP16 time', t1 - t0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment