Skip to content

Instantly share code, notes, and snippets.

@txizzle
Created August 4, 2017 23:45
Show Gist options
  • Save txizzle/6654a79086762fb75584fdbdbd0b2e04 to your computer and use it in GitHub Desktop.
Save txizzle/6654a79086762fb75584fdbdbd0b2e04 to your computer and use it in GitHub Desktop.
kratzert_finetune_tf_vs_cv2
Display the source blob
Display the rendered blob
Raw
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
%matplotlib inline
filepath = '<directory goes here>/imagenet/ilsvrc2012-val/n02397096/ILSVRC2012_val_00046916.JPEG'
##########################
# Approach 1: Using cv2 #
########################
mean = np.array([104., 117., 124.]).astype(np.float32)
# Read .JPEG image
cv2_decoded = cv2.imread(filepath)
# Rescale image and convert to float
cv2_resized = cv2.resize(cv2_decoded, (227, 227))
cv2_float = cv2_resized.astype(np.float32)
# Subtract Imagenet mean
cv2_centered = cv2_float - mean
# The final image sent to the network during validation/training
cv2_img = cv2_centered
# Visualization: Add mean and convert to uint8
cv2_img_viz = cv2_centered + mean
cv2_img_viz = cv2_img_viz.astype(np.uint8)
plt.imshow(cv2.cvtColor(cv2_img_viz, cv2.COLOR_BGR2RGB))
plt.title('Image Processed with CV2')
plt.show()
###############################
# Approach 2: Using tf.image #
#############################
IMAGENET_MEAN = tf.constant([104., 117., 124.], dtype=tf.float32)
# Convert filepath string to string tensor
tf_filepath = tf.convert_to_tensor(filepath, dtype=tf.string)
# Read .JPEG image
tf_img_string = tf.read_file(tf_filepath)
tf_decoded = tf.image.decode_jpeg(tf_img_string, channels=3)
# Approach 2a): Rescale, Convert to BGR, Subtract mean
#Rescale image and convert to float
tf_resized = tf.image.resize_images(tf_decoded, [227, 227])
tf_float = tf.to_float(tf_resized)
# Convert to BGR
tf_bgr = tf_float[:, :, ::-1]
# Subtract Imagenet mean
tf_centered = tf.subtract(tf_bgr, mean)
# The final image sent to the network during validation/training
tf_img = tf_centered
# Approach 2b): Convert to BGR, Rescale, Subtract mean
# Convert to BGR
tf_decoded_bgr = tf_decoded[:, :, ::-1]
# Rescale image and convert to float
tf_resized_bgr = tf.image.resize_images(tf_decoded_bgr, [227, 227])
tf_float_bgr = tf.to_float(tf_resized_bgr)
# Subtract Imagenet mean
tf_centered_bgr = tf.subtract(tf_float_bgr, mean)
# The final image sent to the network during validation/training
tf_img_bgr = tf_centered_bgr
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
tf_img_decoded = sess.run(tf_decoded)
tf_img_resized = sess.run(tf_resized)
tf_img_float = sess.run(tf_float)
tf_img = sess.run(tf_centered)
tf_img_decoded_bgr = sess.run(tf_decoded_bgr)
tf_img_resized_bgr = sess.run(tf_resized_bgr)
tf_img_float_bgr = sess.run(tf_float_bgr)
tf_img_bgr = sess.run(tf_centered_bgr)
# Visualization: Add mean and convert to uint8
tf_img_copy = tf_img + mean
tf_img_viz= tf_img_copy.astype(np.uint8)
plt.imshow(cv2.cvtColor(tf_img_viz, cv2.COLOR_BGR2RGB))
plt.title('Image Processed with TF, Approach 2a)')
plt.show()
# Visualization: Add mean and convert to uint8
tf_img_bgr_copy = tf_img_bgr + mean
tf_img_bgr_viz = tf_img_bgr_copy.astype(np.uint8)
plt.imshow(cv2.cvtColor(tf_img_viz, cv2.COLOR_BGR2RGB))
plt.title('Image Processed with TF, Approach 2b)')
plt.show()
##########
# This cell illustrates the differences between the CV2 produced image batches and TF produced image batches.
# These are the images that are actually passed to the NN.
print("TF: ")
print("dtype: " + str(tf_img.dtype))
print("shape: " + str(np.shape(tf_img)))
print(tf_img[:,:,0])
print("\nCV2: ")
print("dtype: " + str(cv2_img.dtype))
print("shape: " + str(np.shape(cv2_img)))
print(cv2_img[:,:,0])
print("\nDifferences for pre-viz images (images that are outputted to the neural network): ")
# print(tf_img - cv2_img)
print(np.sum(np.abs(tf_img - cv2_img)))
print("\nDifferences for final visualized images (after adding mean and converting to uint8): ")
# print(tf_img_viz - cv2_img_viz)
print(np.sum(np.abs(tf_img_viz - cv2_img_viz)))
##########
# This cell shows that the TF 2a) and TF 2b) approaches are equivalent.
# Essentially, converting to BGR and then resizing is equivalent to resizing then converting to BGR
print("TF Approach 2a): ")
print("dtype: " + str(tf_img.dtype))
print("shape: " + str(np.shape(tf_img)))
print(tf_img[:,:,0])
print("\nTF Approach 2b): ")
print("dtype: " + str(tf_img_bgr.dtype))
print("shape: " + str(np.shape(tf_img_bgr)))
print(tf_img_bgr[:,:,0])
print("\nDifferences between TF Approach 2a) and 2b):")
print(np.sum(np.abs(tf_img - tf_img_bgr)))
#######
# This cell shows where TF and CV2 operations start to diverge.
# We see these intermediate differences in two places: tf.image.decode_jpeg() and tf.image.resize_images()
# tf.image.decode_jpeg() differs from cv2.imread() in almost every pixel by a small amount
# tf.image.resize_images() automatically converts the dtype to float32, which may or not worsen the inaccuracy
print("tf_img_decoded_bgr: ")
print("dtype: " + str(tf_img_decoded_bgr.dtype))
print("shape: " + str(np.shape(tf_img_decoded_bgr)))
print(tf_img_decoded_bgr[:,:,0])
print("\ncv2_decoded:")
print("dtype: " + str(cv2_decoded.dtype))
print(np.shape(cv2_decoded))
print(cv2_decoded[:,:,0])
print("\n\ntf_img_resized_bgr: ")
print("dtype: " + str(tf_img_resized_bgr.dtype))
print("shape: " + str(np.shape(tf_img_resized_bgr)))
print(tf_img_resized[:,:,0])
print("\ncv2_decoded:")
print("dtype: " + str(cv2_resized.dtype))
print(np.shape(cv2_resized))
print(cv2_resized[:,:,0])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment