Skip to content

Instantly share code, notes, and snippets.

@BenjaminFraser
Forked from ardamavi/get_dataset.py
Last active March 24, 2020 19:14
Show Gist options
  • Save BenjaminFraser/64f3929b61395f99799f2ab4b5def04f to your computer and use it in GitHub Desktop.
Save BenjaminFraser/64f3929b61395f99799f2ab4b5def04f to your computer and use it in GitHub Desktop.
For reading datasets and converting to numpy files.
# Arda Mavi
import os
import numpy as np
from os import listdir
from scipy.misc import imread, imresize
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
# Settings:
img_size = 64
grayscale_images = True
num_class = 10
test_size = 0.2
def get_img(data_path):
# Getting image array from path:
img = imread(data_path, flatten=grayscale_images)
img = imresize(img, (img_size, img_size, 1 if grayscale_images else 3))
return img
def get_dataset(dataset_path='Dataset'):
# Getting all data from data path:
try:
X = np.load('npy_dataset/X.npy')
Y = np.load('npy_dataset/Y.npy')
except:
labels = sorted(listdir(dataset_path)) # Geting labels
X = []
Y = []
for i, label in enumerate(labels):
datas_path = dataset_path+'/'+label
for data in listdir(datas_path):
img = get_img(datas_path+'/'+data)
X.append(img)
Y.append(i)
# Create dateset:
X = 1-np.array(X).astype('float32')/255.
Y = np.array(Y).astype('float32')
Y = to_categorical(Y, num_class)
if not os.path.exists('npy_dataset/'):
os.makedirs('npy_dataset/')
np.save('npy_dataset/X.npy', X)
np.save('npy_dataset/Y.npy', Y)
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
return X, X_test, Y, Y_test
if __name__ == '__main__':
get_dataset()
@BenjaminFraser
Copy link
Author

Changed line 28 so that it sorts the output label directories in numerical order, to ensure easy data mapping after preprocessing.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment