Last active
October 14, 2018 12:17
-
-
Save pmarcelino/e7009ba7da7a8a8b54f9c7ed66c22997 to your computer and use it in GitHub Desktop.
Data preparation for Dogs vs. Cats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create smaller dataset for Dogs vs. Cats | |
import os, shutil | |
original_dataset_dir = '/Users/macbook/dogs_cats_dataset/train/' | |
base_dir = '/Users/macbook/book/dogs_cats/data' | |
if not os.path.exists(base_dir): | |
os.mkdir(base_dir) | |
# Create directories | |
train_dir = os.path.join(base_dir,'train') | |
if not os.path.exists(train_dir): | |
os.mkdir(train_dir) | |
validation_dir = os.path.join(base_dir,'validation') | |
if not os.path.exists(validation_dir): | |
os.mkdir(validation_dir) | |
test_dir = os.path.join(base_dir,'test') | |
if not os.path.exists(test_dir): | |
os.mkdir(test_dir) | |
train_cats_dir = os.path.join(train_dir,'cats') | |
if not os.path.exists(train_cats_dir): | |
os.mkdir(train_cats_dir) | |
train_dogs_dir = os.path.join(train_dir,'dogs') | |
if not os.path.exists(train_dogs_dir): | |
os.mkdir(train_dogs_dir) | |
validation_cats_dir = os.path.join(validation_dir,'cats') | |
if not os.path.exists(validation_cats_dir): | |
os.mkdir(validation_cats_dir) | |
validation_dogs_dir = os.path.join(validation_dir, 'dogs') | |
if not os.path.exists(validation_dogs_dir): | |
os.mkdir(validation_dogs_dir) | |
test_cats_dir = os.path.join(test_dir, 'cats') | |
if not os.path.exists(test_cats_dir): | |
os.mkdir(test_cats_dir) | |
test_dogs_dir = os.path.join(test_dir, 'dogs') | |
if not os.path.exists(test_dogs_dir): | |
os.mkdir(test_dogs_dir) | |
# Copy first 1000 cat images to train_cats_dir | |
fnames = ['cat.{}.jpg'.format(i) for i in range(100)] | |
for fname in fnames: | |
src = os.path.join(original_dataset_dir, fname) | |
dst = os.path.join(train_cats_dir, fname) | |
shutil.copyfile(src, dst) | |
# Copy next 500 cat images to validation_cats_dir | |
fnames = ['cat.{}.jpg'.format(i) for i in range(200, 250)] | |
for fname in fnames: | |
src = os.path.join(original_dataset_dir, fname) | |
dst = os.path.join(validation_cats_dir, fname) | |
shutil.copyfile(src, dst) | |
# Copy next 500 cat images to test_cats_dir | |
fnames = ['cat.{}.jpg'.format(i) for i in range(250,300)] | |
for fname in fnames: | |
src = os.path.join(original_dataset_dir, fname) | |
dst = os.path.join(test_cats_dir, fname) | |
shutil.copyfile(src, dst) | |
# Copy first 1000 dog images to train_dogs_dir | |
fnames = ['dog.{}.jpg'.format(i) for i in range(100)] | |
for fname in fnames: | |
src = os.path.join(original_dataset_dir, fname) | |
dst = os.path.join(train_dogs_dir, fname) | |
shutil.copyfile(src, dst) | |
# Copy next 500 dog images to validation_dogs_dir | |
fnames = ['dog.{}.jpg'.format(i) for i in range(200,250)] | |
for fname in fnames: | |
src = os.path.join(original_dataset_dir, fname) | |
dst = os.path.join(validation_dogs_dir, fname) | |
shutil.copyfile(src, dst) | |
# Copy next 500 dog images to test_dogs_dir | |
fnames = ['dog.{}.jpg'.format(i) for i in range(250,300)] | |
for fname in fnames: | |
src = os.path.join(original_dataset_dir, fname) | |
dst = os.path.join(test_dogs_dir, fname) | |
shutil.copyfile(src, dst) | |
# Sanity checks | |
print('total training cat images:', len(os.listdir(train_cats_dir))) | |
print('total training dog images:', len(os.listdir(train_dogs_dir))) | |
print('total validation cat images:', len(os.listdir(validation_cats_dir))) | |
print('total validation dog images:', len(os.listdir(validation_dogs_dir))) | |
print('total test cat images:', len(os.listdir(test_cats_dir))) | |
print('total test dog images:', len(os.listdir(test_dogs_dir))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment