Skip to content

Instantly share code, notes, and snippets.

@ardamavi
Last active June 7, 2023 02:20
Show Gist options
  • Star 18 You must be signed in to star a gist
  • Fork 9 You must be signed in to fork a gist
  • Save ardamavi/a7d06ff8a315308771c70006cf494d69 to your computer and use it in GitHub Desktop.
Save ardamavi/a7d06ff8a315308771c70006cf494d69 to your computer and use it in GitHub Desktop.
For reading datasets and converting to numpy files.
# Arda Mavi
import os
import numpy as np
from os import listdir
from scipy.misc import imread, imresize
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
# Settings:
img_size = 64
grayscale_images = True
num_class = 10
test_size = 0.2
def get_img(data_path):
# Getting image array from path:
img = imread(data_path, flatten=grayscale_images)
img = imresize(img, (img_size, img_size, 1 if grayscale_images else 3))
return img
def get_dataset(dataset_path='Dataset'):
# Getting all data from data path:
try:
X = np.load('npy_dataset/X.npy')
Y = np.load('npy_dataset/Y.npy')
except:
labels = listdir(dataset_path) # Geting labels
X = []
Y = []
for i, label in enumerate(labels):
datas_path = dataset_path+'/'+label
for data in listdir(datas_path):
img = get_img(datas_path+'/'+data)
X.append(img)
Y.append(i)
# Create dateset:
X = 1-np.array(X).astype('float32')/255.
Y = np.array(Y).astype('float32')
Y = to_categorical(Y, num_class)
if not os.path.exists('npy_dataset/'):
os.makedirs('npy_dataset/')
np.save('npy_dataset/X.npy', X)
np.save('npy_dataset/Y.npy', Y)
X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
return X, X_test, Y, Y_test
if __name__ == '__main__':
get_dataset()
@Ehsan-Yaghoubi
Copy link

Ehsan-Yaghoubi commented Jun 19, 2020

I think some of the functions are removed from the used libraries, so this is an updated version. Hope it works.

import os
import numpy as np
from os import listdir
from matplotlib.pyplot import imread
from skimage.transform import resize
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split


# Settings:
img_size = 64
grayscale_images = True
num_class = 10
test_size = 0.2


def get_img(data_path):
    # Getting image array from path:
    img = imread(data_path)
    img = resize(img, (img_size, img_size, 1 if grayscale_images else 3))
    return img


def get_dataset(dataset_path='Dataset'):
    # Getting all data from data path:
    try:
        X = np.load('npy_dataset/X.npy')
        Y = np.load('npy_dataset/Y.npy')
    except:
        labels = listdir(dataset_path)  # Geting labels
        X = []
        Y = []
        for i, label in enumerate(labels):
            datas_path = dataset_path + '/' + label
            for data in listdir(datas_path):
                img = get_img(datas_path + '/' + data)
                X.append(img)
                Y.append(i)
        # Create dateset:
        X = 1 - np.array(X).astype('float32') / 255.
        Y = np.array(Y).astype('float32')
        Y = to_categorical(Y, num_class)
        if not os.path.exists('npy_dataset/'):
            os.makedirs('npy_dataset/')
        np.save('npy_dataset/X.npy', X)
        np.save('npy_dataset/Y.npy', Y)
    X, X_test, Y, Y_test = train_test_split(X, Y, test_size=test_size, random_state=42)
    return X, X_test, Y, Y_test


if __name__ == '__main__':
    X, X_test, Y, Y_test = get_dataset()
    print(X)
    print(X_test)
    print(Y)
    print(Y_test)

To make this code works fine, you should change the following lines:

  1. Change the line "labels = listdir(dataset_path)" with "labels = ['0','1','2','3','4','5','6','7','8','9']"
  2. Change the line "X = 1 - np.array(X).astype('float32') / 255." with "X = np.array(X).astype('float32')"

@BhoomiBM
Copy link

how to download this dataset " sign language digit dataset".
I tried this code on google colab, but it gave errors.
Please help

@ardamavi
Copy link
Author

how to download this dataset " sign language digit dataset".
I tried this code on google colab, but it gave errors.
Please help

You can use Kaggle API to download and use the dataset.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment