maciej-adamiak/dataset.py

## dataset.py
import cv2
import glob
import numpy as np
import tensorflow as tf


def process_image(image_size, path, preserve_file=False):
    path = path.decode('utf-8')
    image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
    image = cv2.resize(image, (image_size, image_size))
    image = image / 127.5 - 1

    if preserve_file:
        yield path, image
    else:
        yield image


def dataset(image_path, image_size, channels=4, batch_size=1, labels=False, prefetch_size=32,
            num_parallel_calls=tf.data.experimental.AUTOTUNE, output_types=np.float32,
            shuffle=False) -> (
        tf.data.Dataset, int):
    files = glob.glob(image_path)
    np.random.shuffle(files)
    file_count = len(files)

    tf.print(f'Processing {file_count} files')

    def aux(path):
        return tf.data.Dataset.from_generator(process_image, output_types=output_types,
                                              output_shapes=tf.TensorShape([image_size, image_size, channels]),
                                              args=(image_size, path, labels))

    dataset = tf.data.Dataset.from_tensor_slices(files)
    dataset = dataset.interleave(aux, cycle_length=num_parallel_calls,
                                 num_parallel_calls=num_parallel_calls)

    if shuffle:
        dataset = dataset.shuffle(buffer_size=file_count, reshuffle_each_iteration=True)

    dataset = dataset.batch(batch_size=batch_size, drop_remainder=False)
    dataset = dataset.prefetch(prefetch_size)

    return dataset, file_count
	import cv2
	import glob
	import numpy as np
	import tensorflow as tf


	def process_image(image_size, path, preserve_file=False):
	path = path.decode('utf-8')
	image = cv2.imread(path, cv2.IMREAD_UNCHANGED)
	image = cv2.resize(image, (image_size, image_size))
	image = image / 127.5 - 1

	if preserve_file:
	yield path, image
	else:
	yield image


	def dataset(image_path, image_size, channels=4, batch_size=1, labels=False, prefetch_size=32,
	num_parallel_calls=tf.data.experimental.AUTOTUNE, output_types=np.float32,
	shuffle=False) -> (
	tf.data.Dataset, int):
	files = glob.glob(image_path)
	np.random.shuffle(files)
	file_count = len(files)

	tf.print(f'Processing {file_count} files')

	def aux(path):
	return tf.data.Dataset.from_generator(process_image, output_types=output_types,
	output_shapes=tf.TensorShape([image_size, image_size, channels]),
	args=(image_size, path, labels))

	dataset = tf.data.Dataset.from_tensor_slices(files)
	dataset = dataset.interleave(aux, cycle_length=num_parallel_calls,
	num_parallel_calls=num_parallel_calls)

	if shuffle:
	dataset = dataset.shuffle(buffer_size=file_count, reshuffle_each_iteration=True)

	dataset = dataset.batch(batch_size=batch_size, drop_remainder=False)
	dataset = dataset.prefetch(prefetch_size)

	return dataset, file_count