Created
August 4, 2017 23:45
-
-
Save txizzle/6654a79086762fb75584fdbdbd0b2e04 to your computer and use it in GitHub Desktop.
kratzert_finetune_tf_vs_cv2
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import cv2 | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import tensorflow as tf | |
%matplotlib inline | |
filepath = '<directory goes here>/imagenet/ilsvrc2012-val/n02397096/ILSVRC2012_val_00046916.JPEG' | |
########################## | |
# Approach 1: Using cv2 # | |
######################## | |
mean = np.array([104., 117., 124.]).astype(np.float32) | |
# Read .JPEG image | |
cv2_decoded = cv2.imread(filepath) | |
# Rescale image and convert to float | |
cv2_resized = cv2.resize(cv2_decoded, (227, 227)) | |
cv2_float = cv2_resized.astype(np.float32) | |
# Subtract Imagenet mean | |
cv2_centered = cv2_float - mean | |
# The final image sent to the network during validation/training | |
cv2_img = cv2_centered | |
# Visualization: Add mean and convert to uint8 | |
cv2_img_viz = cv2_centered + mean | |
cv2_img_viz = cv2_img_viz.astype(np.uint8) | |
plt.imshow(cv2.cvtColor(cv2_img_viz, cv2.COLOR_BGR2RGB)) | |
plt.title('Image Processed with CV2') | |
plt.show() | |
############################### | |
# Approach 2: Using tf.image # | |
############################# | |
IMAGENET_MEAN = tf.constant([104., 117., 124.], dtype=tf.float32) | |
# Convert filepath string to string tensor | |
tf_filepath = tf.convert_to_tensor(filepath, dtype=tf.string) | |
# Read .JPEG image | |
tf_img_string = tf.read_file(tf_filepath) | |
tf_decoded = tf.image.decode_jpeg(tf_img_string, channels=3) | |
# Approach 2a): Rescale, Convert to BGR, Subtract mean | |
#Rescale image and convert to float | |
tf_resized = tf.image.resize_images(tf_decoded, [227, 227]) | |
tf_float = tf.to_float(tf_resized) | |
# Convert to BGR | |
tf_bgr = tf_float[:, :, ::-1] | |
# Subtract Imagenet mean | |
tf_centered = tf.subtract(tf_bgr, mean) | |
# The final image sent to the network during validation/training | |
tf_img = tf_centered | |
# Approach 2b): Convert to BGR, Rescale, Subtract mean | |
# Convert to BGR | |
tf_decoded_bgr = tf_decoded[:, :, ::-1] | |
# Rescale image and convert to float | |
tf_resized_bgr = tf.image.resize_images(tf_decoded_bgr, [227, 227]) | |
tf_float_bgr = tf.to_float(tf_resized_bgr) | |
# Subtract Imagenet mean | |
tf_centered_bgr = tf.subtract(tf_float_bgr, mean) | |
# The final image sent to the network during validation/training | |
tf_img_bgr = tf_centered_bgr | |
with tf.Session() as sess: | |
sess.run(tf.global_variables_initializer()) | |
tf_img_decoded = sess.run(tf_decoded) | |
tf_img_resized = sess.run(tf_resized) | |
tf_img_float = sess.run(tf_float) | |
tf_img = sess.run(tf_centered) | |
tf_img_decoded_bgr = sess.run(tf_decoded_bgr) | |
tf_img_resized_bgr = sess.run(tf_resized_bgr) | |
tf_img_float_bgr = sess.run(tf_float_bgr) | |
tf_img_bgr = sess.run(tf_centered_bgr) | |
# Visualization: Add mean and convert to uint8 | |
tf_img_copy = tf_img + mean | |
tf_img_viz= tf_img_copy.astype(np.uint8) | |
plt.imshow(cv2.cvtColor(tf_img_viz, cv2.COLOR_BGR2RGB)) | |
plt.title('Image Processed with TF, Approach 2a)') | |
plt.show() | |
# Visualization: Add mean and convert to uint8 | |
tf_img_bgr_copy = tf_img_bgr + mean | |
tf_img_bgr_viz = tf_img_bgr_copy.astype(np.uint8) | |
plt.imshow(cv2.cvtColor(tf_img_viz, cv2.COLOR_BGR2RGB)) | |
plt.title('Image Processed with TF, Approach 2b)') | |
plt.show() | |
########## | |
# This cell illustrates the differences between the CV2 produced image batches and TF produced image batches. | |
# These are the images that are actually passed to the NN. | |
print("TF: ") | |
print("dtype: " + str(tf_img.dtype)) | |
print("shape: " + str(np.shape(tf_img))) | |
print(tf_img[:,:,0]) | |
print("\nCV2: ") | |
print("dtype: " + str(cv2_img.dtype)) | |
print("shape: " + str(np.shape(cv2_img))) | |
print(cv2_img[:,:,0]) | |
print("\nDifferences for pre-viz images (images that are outputted to the neural network): ") | |
# print(tf_img - cv2_img) | |
print(np.sum(np.abs(tf_img - cv2_img))) | |
print("\nDifferences for final visualized images (after adding mean and converting to uint8): ") | |
# print(tf_img_viz - cv2_img_viz) | |
print(np.sum(np.abs(tf_img_viz - cv2_img_viz))) | |
########## | |
# This cell shows that the TF 2a) and TF 2b) approaches are equivalent. | |
# Essentially, converting to BGR and then resizing is equivalent to resizing then converting to BGR | |
print("TF Approach 2a): ") | |
print("dtype: " + str(tf_img.dtype)) | |
print("shape: " + str(np.shape(tf_img))) | |
print(tf_img[:,:,0]) | |
print("\nTF Approach 2b): ") | |
print("dtype: " + str(tf_img_bgr.dtype)) | |
print("shape: " + str(np.shape(tf_img_bgr))) | |
print(tf_img_bgr[:,:,0]) | |
print("\nDifferences between TF Approach 2a) and 2b):") | |
print(np.sum(np.abs(tf_img - tf_img_bgr))) | |
####### | |
# This cell shows where TF and CV2 operations start to diverge. | |
# We see these intermediate differences in two places: tf.image.decode_jpeg() and tf.image.resize_images() | |
# tf.image.decode_jpeg() differs from cv2.imread() in almost every pixel by a small amount | |
# tf.image.resize_images() automatically converts the dtype to float32, which may or not worsen the inaccuracy | |
print("tf_img_decoded_bgr: ") | |
print("dtype: " + str(tf_img_decoded_bgr.dtype)) | |
print("shape: " + str(np.shape(tf_img_decoded_bgr))) | |
print(tf_img_decoded_bgr[:,:,0]) | |
print("\ncv2_decoded:") | |
print("dtype: " + str(cv2_decoded.dtype)) | |
print(np.shape(cv2_decoded)) | |
print(cv2_decoded[:,:,0]) | |
print("\n\ntf_img_resized_bgr: ") | |
print("dtype: " + str(tf_img_resized_bgr.dtype)) | |
print("shape: " + str(np.shape(tf_img_resized_bgr))) | |
print(tf_img_resized[:,:,0]) | |
print("\ncv2_decoded:") | |
print("dtype: " + str(cv2_resized.dtype)) | |
print(np.shape(cv2_resized)) | |
print(cv2_resized[:,:,0]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment