txizzle/debug_tf_vs_cv2.ipynb

## debug_tf_vs_cv2.ipynb
import cv2
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
%matplotlib inline

filepath = '<directory goes here>/imagenet/ilsvrc2012-val/n02397096/ILSVRC2012_val_00046916.JPEG'

##########################
# Approach 1: Using cv2 #
########################
mean = np.array([104., 117., 124.]).astype(np.float32)

# Read .JPEG image
cv2_decoded = cv2.imread(filepath)

# Rescale image and convert to float
cv2_resized = cv2.resize(cv2_decoded, (227, 227))
cv2_float = cv2_resized.astype(np.float32)

# Subtract Imagenet mean
cv2_centered = cv2_float - mean

# The final image sent to the network during validation/training
cv2_img = cv2_centered

# Visualization: Add mean and convert to uint8
cv2_img_viz = cv2_centered + mean
cv2_img_viz = cv2_img_viz.astype(np.uint8)
plt.imshow(cv2.cvtColor(cv2_img_viz, cv2.COLOR_BGR2RGB))
plt.title('Image Processed with CV2')
plt.show()


###############################
# Approach 2: Using tf.image #
#############################
IMAGENET_MEAN = tf.constant([104., 117., 124.], dtype=tf.float32)

# Convert filepath string to string tensor
tf_filepath = tf.convert_to_tensor(filepath, dtype=tf.string)

# Read .JPEG image
tf_img_string = tf.read_file(tf_filepath)
tf_decoded = tf.image.decode_jpeg(tf_img_string, channels=3)

# Approach 2a): Rescale, Convert to BGR, Subtract mean
#Rescale image and convert to float
tf_resized = tf.image.resize_images(tf_decoded, [227, 227])
tf_float = tf.to_float(tf_resized)

# Convert to BGR
tf_bgr = tf_float[:, :, ::-1]

# Subtract Imagenet mean
tf_centered = tf.subtract(tf_bgr, mean)

# The final image sent to the network during validation/training
tf_img = tf_centered


# Approach 2b): Convert to BGR, Rescale, Subtract mean
# Convert to BGR
tf_decoded_bgr = tf_decoded[:, :, ::-1]

# Rescale image and convert to float
tf_resized_bgr = tf.image.resize_images(tf_decoded_bgr, [227, 227])
tf_float_bgr = tf.to_float(tf_resized_bgr)

# Subtract Imagenet mean
tf_centered_bgr = tf.subtract(tf_float_bgr, mean)

# The final image sent to the network during validation/training
tf_img_bgr = tf_centered_bgr


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    tf_img_decoded = sess.run(tf_decoded)
    tf_img_resized = sess.run(tf_resized)
    tf_img_float = sess.run(tf_float)
    tf_img = sess.run(tf_centered)

    tf_img_decoded_bgr = sess.run(tf_decoded_bgr)
    tf_img_resized_bgr = sess.run(tf_resized_bgr)
    tf_img_float_bgr = sess.run(tf_float_bgr)
    tf_img_bgr = sess.run(tf_centered_bgr)

# Visualization: Add mean and convert to uint8
tf_img_copy = tf_img + mean
tf_img_viz= tf_img_copy.astype(np.uint8)

plt.imshow(cv2.cvtColor(tf_img_viz, cv2.COLOR_BGR2RGB))
plt.title('Image Processed with TF, Approach 2a)')
plt.show()

# Visualization: Add mean and convert to uint8
tf_img_bgr_copy = tf_img_bgr + mean
tf_img_bgr_viz = tf_img_bgr_copy.astype(np.uint8)
plt.imshow(cv2.cvtColor(tf_img_viz, cv2.COLOR_BGR2RGB))
plt.title('Image Processed with TF, Approach 2b)')
plt.show()

##########
# This cell illustrates the differences between the CV2 produced image batches and TF produced image batches.
# These are the images that are actually passed to the NN.
print("TF: ")
print("dtype: " + str(tf_img.dtype))
print("shape: " + str(np.shape(tf_img)))
print(tf_img[:,:,0])

print("\nCV2: ")
print("dtype: " + str(cv2_img.dtype))
print("shape: " + str(np.shape(cv2_img)))
print(cv2_img[:,:,0])

print("\nDifferences for pre-viz images (images that are outputted to the neural network): ")
# print(tf_img - cv2_img)
print(np.sum(np.abs(tf_img - cv2_img)))

print("\nDifferences for final visualized images (after adding mean and converting to uint8): ")
# print(tf_img_viz - cv2_img_viz)
print(np.sum(np.abs(tf_img_viz - cv2_img_viz)))


##########
# This cell shows that the TF 2a) and TF 2b) approaches are equivalent.
# Essentially, converting to BGR and then resizing is equivalent to resizing then converting to BGR

print("TF Approach 2a): ")
print("dtype: " + str(tf_img.dtype))
print("shape: " + str(np.shape(tf_img)))
print(tf_img[:,:,0])

print("\nTF Approach 2b): ")
print("dtype: " + str(tf_img_bgr.dtype))
print("shape: " + str(np.shape(tf_img_bgr)))
print(tf_img_bgr[:,:,0])

print("\nDifferences between TF Approach 2a) and 2b):")
print(np.sum(np.abs(tf_img - tf_img_bgr)))

#######
# This cell shows where TF and CV2 operations start to diverge.
# We see these intermediate differences in two places: tf.image.decode_jpeg() and tf.image.resize_images()
# tf.image.decode_jpeg() differs from cv2.imread() in almost every pixel by a small amount
# tf.image.resize_images() automatically converts the dtype to float32, which may or not worsen the inaccuracy

print("tf_img_decoded_bgr: ")
print("dtype: " + str(tf_img_decoded_bgr.dtype))
print("shape: " + str(np.shape(tf_img_decoded_bgr)))
print(tf_img_decoded_bgr[:,:,0])

print("\ncv2_decoded:")
print("dtype: " + str(cv2_decoded.dtype))
print(np.shape(cv2_decoded))
print(cv2_decoded[:,:,0])


print("\n\ntf_img_resized_bgr: ")
print("dtype: " + str(tf_img_resized_bgr.dtype))
print("shape: " + str(np.shape(tf_img_resized_bgr)))
print(tf_img_resized[:,:,0])

print("\ncv2_decoded:")
print("dtype: " + str(cv2_resized.dtype))
print(np.shape(cv2_resized))
print(cv2_resized[:,:,0])
	import cv2
	import matplotlib.pyplot as plt
	import numpy as np
	import tensorflow as tf
	%matplotlib inline

	filepath = '<directory goes here>/imagenet/ilsvrc2012-val/n02397096/ILSVRC2012_val_00046916.JPEG'

	##########################
	# Approach 1: Using cv2 #
	########################
	mean = np.array([104., 117., 124.]).astype(np.float32)

	# Read .JPEG image
	cv2_decoded = cv2.imread(filepath)

	# Rescale image and convert to float
	cv2_resized = cv2.resize(cv2_decoded, (227, 227))
	cv2_float = cv2_resized.astype(np.float32)

	# Subtract Imagenet mean
	cv2_centered = cv2_float - mean

	# The final image sent to the network during validation/training
	cv2_img = cv2_centered

	# Visualization: Add mean and convert to uint8
	cv2_img_viz = cv2_centered + mean
	cv2_img_viz = cv2_img_viz.astype(np.uint8)
	plt.imshow(cv2.cvtColor(cv2_img_viz, cv2.COLOR_BGR2RGB))
	plt.title('Image Processed with CV2')
	plt.show()


	###############################
	# Approach 2: Using tf.image #
	#############################
	IMAGENET_MEAN = tf.constant([104., 117., 124.], dtype=tf.float32)

	# Convert filepath string to string tensor
	tf_filepath = tf.convert_to_tensor(filepath, dtype=tf.string)

	# Read .JPEG image
	tf_img_string = tf.read_file(tf_filepath)
	tf_decoded = tf.image.decode_jpeg(tf_img_string, channels=3)

	# Approach 2a): Rescale, Convert to BGR, Subtract mean
	#Rescale image and convert to float
	tf_resized = tf.image.resize_images(tf_decoded, [227, 227])
	tf_float = tf.to_float(tf_resized)

	# Convert to BGR
	tf_bgr = tf_float[:, :, ::-1]

	# Subtract Imagenet mean
	tf_centered = tf.subtract(tf_bgr, mean)

	# The final image sent to the network during validation/training
	tf_img = tf_centered


	# Approach 2b): Convert to BGR, Rescale, Subtract mean
	# Convert to BGR
	tf_decoded_bgr = tf_decoded[:, :, ::-1]

	# Rescale image and convert to float
	tf_resized_bgr = tf.image.resize_images(tf_decoded_bgr, [227, 227])
	tf_float_bgr = tf.to_float(tf_resized_bgr)

	# Subtract Imagenet mean
	tf_centered_bgr = tf.subtract(tf_float_bgr, mean)

	# The final image sent to the network during validation/training
	tf_img_bgr = tf_centered_bgr


	with tf.Session() as sess:
	sess.run(tf.global_variables_initializer())
	tf_img_decoded = sess.run(tf_decoded)
	tf_img_resized = sess.run(tf_resized)
	tf_img_float = sess.run(tf_float)
	tf_img = sess.run(tf_centered)

	tf_img_decoded_bgr = sess.run(tf_decoded_bgr)
	tf_img_resized_bgr = sess.run(tf_resized_bgr)
	tf_img_float_bgr = sess.run(tf_float_bgr)
	tf_img_bgr = sess.run(tf_centered_bgr)

	# Visualization: Add mean and convert to uint8
	tf_img_copy = tf_img + mean
	tf_img_viz= tf_img_copy.astype(np.uint8)

	plt.imshow(cv2.cvtColor(tf_img_viz, cv2.COLOR_BGR2RGB))
	plt.title('Image Processed with TF, Approach 2a)')
	plt.show()

	# Visualization: Add mean and convert to uint8
	tf_img_bgr_copy = tf_img_bgr + mean
	tf_img_bgr_viz = tf_img_bgr_copy.astype(np.uint8)
	plt.imshow(cv2.cvtColor(tf_img_viz, cv2.COLOR_BGR2RGB))
	plt.title('Image Processed with TF, Approach 2b)')
	plt.show()

	##########
	# This cell illustrates the differences between the CV2 produced image batches and TF produced image batches.
	# These are the images that are actually passed to the NN.
	print("TF: ")
	print("dtype: " + str(tf_img.dtype))
	print("shape: " + str(np.shape(tf_img)))
	print(tf_img[:,:,0])

	print("\nCV2: ")
	print("dtype: " + str(cv2_img.dtype))
	print("shape: " + str(np.shape(cv2_img)))
	print(cv2_img[:,:,0])

	print("\nDifferences for pre-viz images (images that are outputted to the neural network): ")
	# print(tf_img - cv2_img)
	print(np.sum(np.abs(tf_img - cv2_img)))

	print("\nDifferences for final visualized images (after adding mean and converting to uint8): ")
	# print(tf_img_viz - cv2_img_viz)
	print(np.sum(np.abs(tf_img_viz - cv2_img_viz)))


	##########
	# This cell shows that the TF 2a) and TF 2b) approaches are equivalent.
	# Essentially, converting to BGR and then resizing is equivalent to resizing then converting to BGR

	print("TF Approach 2a): ")
	print("dtype: " + str(tf_img.dtype))
	print("shape: " + str(np.shape(tf_img)))
	print(tf_img[:,:,0])

	print("\nTF Approach 2b): ")
	print("dtype: " + str(tf_img_bgr.dtype))
	print("shape: " + str(np.shape(tf_img_bgr)))
	print(tf_img_bgr[:,:,0])

	print("\nDifferences between TF Approach 2a) and 2b):")
	print(np.sum(np.abs(tf_img - tf_img_bgr)))

	#######
	# This cell shows where TF and CV2 operations start to diverge.
	# We see these intermediate differences in two places: tf.image.decode_jpeg() and tf.image.resize_images()
	# tf.image.decode_jpeg() differs from cv2.imread() in almost every pixel by a small amount
	# tf.image.resize_images() automatically converts the dtype to float32, which may or not worsen the inaccuracy

	print("tf_img_decoded_bgr: ")
	print("dtype: " + str(tf_img_decoded_bgr.dtype))
	print("shape: " + str(np.shape(tf_img_decoded_bgr)))
	print(tf_img_decoded_bgr[:,:,0])

	print("\ncv2_decoded:")
	print("dtype: " + str(cv2_decoded.dtype))
	print(np.shape(cv2_decoded))
	print(cv2_decoded[:,:,0])


	print("\n\ntf_img_resized_bgr: ")
	print("dtype: " + str(tf_img_resized_bgr.dtype))
	print("shape: " + str(np.shape(tf_img_resized_bgr)))
	print(tf_img_resized[:,:,0])

	print("\ncv2_decoded:")
	print("dtype: " + str(cv2_resized.dtype))
	print(np.shape(cv2_resized))
	print(cv2_resized[:,:,0])