Skip to content

Instantly share code, notes, and snippets.

@maciej-adamiak
Created February 13, 2021 20:45
Show Gist options
  • Save maciej-adamiak/033e76225250d3d7652bdc5fae242f43 to your computer and use it in GitHub Desktop.
Save maciej-adamiak/033e76225250d3d7652bdc5fae242f43 to your computer and use it in GitHub Desktop.
Parallel Tensorflow image read
import cv2
import glob
import numpy as np
import tensorflow as tf
def process_image(image_size, path, preserve_file=False):
path = path.decode('utf-8')
image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
image = cv2.resize(image, (image_size, image_size))
image = image / 127.5 - 1
if preserve_file:
yield path, image
else:
yield image
def dataset(image_path, image_size, channels=4, batch_size=1, labels=False, prefetch_size=32,
num_parallel_calls=tf.data.experimental.AUTOTUNE, output_types=np.float32,
shuffle=False) -> (
tf.data.Dataset, int):
files = glob.glob(image_path)
np.random.shuffle(files)
file_count = len(files)
tf.print(f'Processing {file_count} files')
def aux(path):
return tf.data.Dataset.from_generator(process_image, output_types=output_types,
output_shapes=tf.TensorShape([image_size, image_size, channels]),
args=(image_size, path, labels))
dataset = tf.data.Dataset.from_tensor_slices(files)
dataset = dataset.interleave(aux, cycle_length=num_parallel_calls,
num_parallel_calls=num_parallel_calls)
if shuffle:
dataset = dataset.shuffle(buffer_size=file_count, reshuffle_each_iteration=True)
dataset = dataset.batch(batch_size=batch_size, drop_remainder=False)
dataset = dataset.prefetch(prefetch_size)
return dataset, file_count
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment