Last active
December 18, 2017 22:35
-
-
Save OluwoleOyetoke/eee86af592b61a312e532ba743807d47 to your computer and use it in GitHub Desktop.
TFRecord Creation Find images
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def _find_image_files(data_dir, labels_file): | |
""" | |
Args: | |
data_dir: string, path to the root directory of images. | |
Assumes that the image data set resides in JPEG files located in | |
the following directory structure. | |
data_dir/dog/another-image.JPEG | |
data_dir/dog/my-image.jpg | |
where 'dog' is the label associated with these images. | |
labels_file: string, path to the labels file. | |
The list of valid labels are held in this file. Assumes that the file | |
contains entries as such: | |
dog | |
cat | |
flower | |
where each line corresponds to a label. We map each label contained in | |
the file to an integer starting with the integer 0 corresponding to the | |
label contained in the first line. | |
Returns: | |
filenames: list of strings; each string is a path to an image file. | |
texts: list of strings; each string is the class, e.g. 'dog' | |
labels: list of integer; each integer identifies the ground truth. | |
""" | |
print('Determining list of input files and labels from %s ' % labels_file) | |
unique_labels = [l.strip() for l in tf.gfile.FastGFile( | |
labels_file, 'r').readlines()] | |
labels = [] | |
filenames = [] | |
texts = [] | |
# Leave label index 0 empty as a background class. | |
label_index = 1 | |
# Construct the list of JPEG files and labels. | |
for text in unique_labels: | |
jpeg_file_path = '%s/%s/*' % (data_dir, text) | |
print("File path %s \n" % jpeg_file_path); | |
matching_files = tf.gfile.Glob(jpeg_file_path) | |
labels.extend([label_index] * len(matching_files)) | |
texts.extend([text] * len(matching_files)) | |
filenames.extend(matching_files) | |
if not label_index % 100: | |
print('Finished finding files in %d of %d classes.' % ( | |
label_index, len(labels))) | |
label_index += 1 | |
# Shuffle the ordering of all image files in order to guarantee | |
# random ordering of the images with respect to label in the | |
# saved TFRecord files. Make the randomization repeatable. | |
shuffled_index = list(range(len(filenames))) | |
random.seed(12345) | |
random.shuffle(shuffled_index) | |
filenames = [filenames[i] for i in shuffled_index] | |
texts = [texts[i] for i in shuffled_index] | |
labels = [labels[i] for i in shuffled_index] | |
print('Found %d JPEG files across %d labels inside %s' % | |
(len(filenames), len(unique_labels), data_dir)) | |
return filenames, texts, labels |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment