Last active
December 13, 2017 20:01
-
-
Save yaboo-oyabu/f27296db3624ac2edb1571eaa9768c96 to your computer and use it in GitHub Desktop.
Vectorize images with inception_v3 or mobilenet
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from __future__ import absolute_import | |
from __future__ import division | |
from __future__ import print_function | |
import json | |
import numpy as np | |
import os | |
import tarfile | |
import tensorflow as tf | |
from six.moves import urllib | |
from tensorflow.python.platform import gfile | |
flags = tf.app.flags | |
flags.DEFINE_string('output_json', None, 'specify output json file path') | |
flags.DEFINE_string('jpeg_pattern', None, 'specify target jpeg file pattern') | |
flags.DEFINE_string('model_dir', '/tmp/inception_v3', 'specify model directory') | |
flags.DEFINE_string('architecture', 'inception_v3', 'specify model architecture') | |
FLAGS = tf.app.flags.FLAGS | |
def maybe_download_and_extract(data_url): | |
"""Download and extract model tar file. | |
If the pretrained model we're using doesn't already exist, this function | |
downloads it from the TensorFlow.org website and unpacks it into a directory. | |
Args: | |
data_url: Web location of the tar file containing the pretrained model. | |
""" | |
dest_directory = FLAGS.model_dir | |
if not os.path.exists(dest_directory): | |
os.makedirs(dest_directory) | |
filename = data_url.split('/')[-1] | |
filepath = os.path.join(dest_directory, filename) | |
if not os.path.exists(filepath): | |
filepath, _ = urllib.request.urlretrieve(data_url, filepath) | |
tarfile.open(filepath, 'r:gz').extractall(dest_directory) | |
else: | |
tf.logging.info('Not extracting or downloading files, model already present in disk') | |
def create_model_info(architecture='inception_v3'): | |
architecture = architecture.lower() | |
is_quantized = False | |
if architecture == 'inception_v3': | |
# pylint: disable=line-too-long | |
data_url = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' | |
# pylint: enable=line-too-long | |
logits_tensor_name = 'softmax/logits:0' | |
probabilities_tensor_name = 'softmax:0' | |
bottleneck_tensor_size = 1008 | |
input_width = 299 | |
input_height = 299 | |
input_depth = 3 | |
resized_input_tensor_name = 'Mul:0' | |
model_file_name = 'classify_image_graph_def.pb' | |
input_mean = 128 | |
input_std = 128 | |
elif architecture.startswith('mobilenet_'): | |
parts = architecture.split('_') | |
if len(parts) != 3 and len(parts) != 4: | |
tf.logging.error("Couldn't understand architecture name '%s'", | |
architecture) | |
return None | |
version_string = parts[1] | |
if (version_string != '1.0' and version_string != '0.75' and | |
version_string != '0.50' and version_string != '0.25'): | |
tf.logging.error( | |
""""The Mobilenet version should be '1.0', '0.75', '0.50', or '0.25', | |
but found '%s' for architecture '%s'""", | |
version_string, architecture) | |
return None | |
size_string = parts[2] | |
if (size_string != '224' and size_string != '192' and | |
size_string != '160' and size_string != '128'): | |
tf.logging.error( | |
"""The Mobilenet input size should be '224', '192', '160', or '128', | |
but found '%s' for architecture '%s'""", | |
size_string, architecture) | |
return None | |
if len(parts) == 3: | |
is_quantized = False | |
else: | |
if parts[3] != 'quantized': | |
tf.logging.error( | |
"Couldn't understand architecture suffix '%s' for '%s'", parts[3], | |
architecture) | |
return None | |
is_quantized = True | |
if is_quantized: | |
data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' | |
data_url += version_string + '_' + size_string + '_quantized_frozen.tgz' | |
logits_tensor_name = 'MobilenetV1/Predictions/Reshape:0' | |
probabilities_tensor_name = 'MobilenetV1/Predictions/Reshape_1:0' | |
resized_input_tensor_name = 'Placeholder:0' | |
model_dir_name = ('mobilenet_v1_' + version_string + '_' + size_string + | |
'_quantized_frozen') | |
model_base_name = 'quantized_frozen_graph.pb' | |
else: | |
data_url = 'http://download.tensorflow.org/models/mobilenet_v1_' | |
data_url += version_string + '_' + size_string + '_frozen.tgz' | |
logits_tensor_name = 'MobilenetV1/Predictions/Reshape:0' | |
probabilities_tensor_name = 'MobilenetV1/Predictions/Reshape_1:0' | |
resized_input_tensor_name = 'input:0' | |
model_dir_name = 'mobilenet_v1_' + version_string + '_' + size_string | |
model_base_name = 'frozen_graph.pb' | |
bottleneck_tensor_size = 1001 | |
input_width = int(size_string) | |
input_height = int(size_string) | |
input_depth = 3 | |
model_file_name = os.path.join(model_dir_name, model_base_name) | |
input_mean = 127.5 | |
input_std = 127.5 | |
else: | |
tf.logging.error("Couldn't understand architecture name '%s'", architecture) | |
raise ValueError('Unknown architecture', architecture) | |
return { | |
'data_url': data_url, | |
'logits_tensor_name': logits_tensor_name, | |
'probabilities_tensor_name': probabilities_tensor_name, | |
'bottleneck_tensor_size': bottleneck_tensor_size, | |
'input_width': input_width, | |
'input_height': input_height, | |
'input_depth': input_depth, | |
'resized_input_tensor_name': resized_input_tensor_name, | |
'model_file_name': model_file_name, | |
'input_mean': input_mean, | |
'input_std': input_std, | |
'quantize_layer': is_quantized, | |
} | |
def create_model_graph(graph, model_info): | |
""""Creates a graph from saved GraphDef file and returns a Graph object. | |
Args: | |
model_info: Dictionary containing information about the model architecture. | |
Returns: | |
Graph holding the trained Inception network, and various tensors we'll be | |
manipulating. | |
""" | |
with graph.as_default(): | |
model_path = os.path.join(FLAGS.model_dir, model_info['model_file_name']) | |
tf.logging.info('Model path: {}'.format(model_path)) | |
with gfile.FastGFile(model_path, 'rb') as f: | |
graph_def = tf.GraphDef() | |
graph_def.ParseFromString(f.read()) | |
logits_tensor, probabilities_tensor, resized_input_tensor = ( | |
tf.import_graph_def( | |
graph_def, | |
name='', | |
return_elements=[ | |
model_info['logits_tensor_name'], | |
model_info['probabilities_tensor_name'], | |
model_info['resized_input_tensor_name'], | |
])) | |
return logits_tensor, probabilities_tensor, resized_input_tensor | |
def add_jpeg_decoding(input_width, input_height, input_depth, input_mean, | |
input_std): | |
"""Adds operations that perform JPEG decoding and resizing to the graph.. | |
Args: | |
input_width: Desired width of the image fed into the recognizer graph. | |
input_height: Desired width of the image fed into the recognizer graph. | |
input_depth: Desired channels of the image fed into the recognizer graph. | |
input_mean: Pixel value that should be zero in the image for the graph. | |
input_std: How much to divide the pixel values by before recognition. | |
Returns: | |
Tensors for the node to feed JPEG data into, and the output of the | |
preprocessing steps. | |
""" | |
jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput') | |
decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth) | |
decoded_image_as_float = tf.cast(decoded_image, dtype=tf.float32) | |
decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0) | |
resize_shape = tf.stack([input_height, input_width]) | |
resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32) | |
resized_image = tf.image.resize_bilinear(decoded_image_4d, | |
resize_shape_as_int) | |
offset_image = tf.subtract(resized_image, input_mean) | |
mul_image = tf.multiply(offset_image, 1.0 / input_std) | |
return jpeg_data, mul_image | |
def main(): | |
output_json = FLAGS.output_json | |
target_jpeg = FLAGS.jpeg_pattern | |
architecture = FLAGS.architecture | |
tf.logging.set_verbosity(tf.logging.INFO) | |
tf.logging.info('TensorFlow version: {}'.format(tf.__version__)) | |
tf.logging.info('output_json: {}'.format(output_json)) | |
tf.logging.info('target_jpeg: {}'.format(target_jpeg)) | |
tf.logging.info('architecture: {}'.format(architecture)) | |
model_info = create_model_info(architecture) | |
maybe_download_and_extract(model_info['data_url']) | |
graph = tf.Graph() | |
logits_tensor, probabilities_tensor, resized_input_tensor = ( | |
create_model_graph(graph, model_info)) | |
with graph.as_default(): | |
jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding( | |
model_info['input_width'], model_info['input_height'], | |
model_info['input_depth'], model_info['input_mean'], | |
model_info['input_std']) | |
values = [] | |
with open(output_json, 'w') as json_file: | |
with tf.Session(graph=graph) as sess: | |
image_paths = gfile.Glob(target_jpeg) | |
group_by = 100 | |
groups = [image_paths[i:i + group_by] for i in range(0, len(image_paths), group_by)] | |
for sub_image_paths in groups: | |
for image_path in sub_image_paths: | |
image_data = gfile.FastGFile(image_path, 'rb').read() | |
resized_input_values = sess.run(decoded_image_tensor, | |
{jpeg_data_tensor: image_data}) | |
logits_values, probabilities_values = sess.run( | |
[logits_tensor, probabilities_tensor], | |
{resized_input_tensor: resized_input_values}) | |
logits_values = np.squeeze(logits_values) | |
probabilities_values = np.squeeze(probabilities_values) | |
jobj = { | |
'key': os.path.splitext(os.path.basename(image_path))[0], | |
'vector': logits_values.tolist(), | |
'prob': probabilities_values.tolist() | |
} | |
json_file.write(json.dumps(jobj) + '\n') | |
json_file.flush() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks for the above simple version of the retrain.py
I was trying to reduce the vectore size. but i am getting same size 2016 even after changing "bottleneck_tensor_size = 1008"
can you please tell me how to reduce vector size to 100. Thanks