Skip to content

Instantly share code, notes, and snippets.

@pbruneau
Created September 8, 2021 15:55
Show Gist options
  • Save pbruneau/2f76d33e15e2a3673825cc08bcc2415b to your computer and use it in GitHub Desktop.
Save pbruneau/2f76d33e15e2a3673825cc08bcc2415b to your computer and use it in GitHub Desktop.
Trial of a DALIDataset with external input for Keras
import glob
import numpy as np
import cupy as cp
import imageio
from random import shuffle
from nvidia.dali import Pipeline
import nvidia.dali.fn as fn
import nvidia.dali.plugin.tf as dali_tf
import tensorflow as tf
BATCH_SIZE = 16
FILE_PATH_GLOB = "/files/data/pokemon_jpg/*.jpg" # replacing with local path
class ExternalInputIterator(object):
def __init__(self, batch_size):
self.files = glob.glob(FILE_PATH_GLOB)
self.batch_size = batch_size
shuffle(self.files)
def __iter__(self):
self.i = 0
self.n = len(self.files)
return self
def __next__(self):
batch = []
labels = []
for _ in range(self.batch_size):
im = imageio.imread(self.files[self.i])
im = np.asarray(im, dtype=np.float32)
im = im / 255.0
batch.append(im)
labels.append(np.array([1], dtype = np.uint8))
self.i = (self.i + 1) % self.n
return batch, labels
# creating and testing iterator
eii = ExternalInputIterator(BATCH_SIZE)
print(type(next(iter(eii))[0][0]))
# creating pipeline
pipe = Pipeline(batch_size=BATCH_SIZE, num_threads=2, device_id=0)
with pipe:
images, labels = fn.external_source(source=eii, num_outputs=2, device="cpu")
images = fn.resize(images, size=[256,256])
pipe.set_outputs(images, labels)
pipe.build()
# testing pipeline
pipe_out = pipe.run()
batch = pipe_out[0]
img = batch.at(0)
print(img.shape)
# trying to create DALIDataset
shapes = ((BATCH_SIZE, 256, 256, 3), (BATCH_SIZE))
dtypes = (tf.float32, tf.uint8)
with tf.device('/cpu:0'):
dataset = dali_tf.experimental.DALIDatasetWithInputs(
pipeline=pipe,
batch_size=BATCH_SIZE,
output_shapes=shapes,
output_dtypes=dtypes,
device_id=0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment