Skip to content

Instantly share code, notes, and snippets.

@hsahovic
Last active December 9, 2019 02:52
Show Gist options
  • Save hsahovic/a769d1ff47e83a1bf0c9f68eb0f2587a to your computer and use it in GitHub Desktop.
Save hsahovic/a769d1ff47e83a1bf0c9f68eb0f2587a to your computer and use it in GitHub Desktop.
Split files from a directory into two directories with the same structure.
import os
from tqdm.notebook import tqdm
from shutil import copyfile
def split_dir(source_dir, target_dir_train, target_dir_test, ratio=.2):
files = os.listdir(source_dir)
to_test = int(len(files) * ratio + .5)
assert files[:to_test] + files[to_test:] == files
for file in files[:to_test]:
copyfile(
os.path.join(source_dir, file),
os.path.join(target_dir_test, file)
)
for file in files[to_test:]:
copyfile(
os.path.join(source_dir, file),
os.path.join(target_dir_train, file)
)
TRAIN_DIR = 'Train'
TEST_DIR = 'Test'
SOURCE_DIR = 'Images'
if not os.path.isdir(TRAIN_DIR):
os.mkdir(TRAIN_DIR)
if not os.path.isdir(TEST_DIR):
os.mkdir(TEST_DIR)
for target_dir in tqdm(os.listdir(SOURCE_DIR)):
if not os.path.isdir(os.path.join(TRAIN_DIR, target_dir)):
os.mkdir(os.path.join(TRAIN_DIR, target_dir))
if not os.path.isdir(os.path.join(TEST_DIR, target_dir)):
os.mkdir(os.path.join(TEST_DIR, target_dir))
split_dir(
os.path.join(SOURCE_DIR, target_dir),
os.path.join(TRAIN_DIR, target_dir),
os.path.join(TEST_DIR, target_dir),
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment