Skip to content

Instantly share code, notes, and snippets.

@simgt
Created October 10, 2017 08:48
Show Gist options
  • Save simgt/e642989a2bae318523b556ccbc5b1e87 to your computer and use it in GitHub Desktop.
Save simgt/e642989a2bae318523b556ccbc5b1e87 to your computer and use it in GitHub Desktop.
A python script to split an image dataset
#! /usr/bin/env python3
import argparse
from pathlib import Path
from random import shuffle
import shutil
if __name__ == '__main__':
parser = argparse.ArgumentParser(description="""
Split a dataset in two given a ratio.
""")
parser.add_argument('input', type=Path)
parser.add_argument('output', type=Path)
parser.add_argument('--ratio', type=float, default=0.66)
args = parser.parse_args()
for class_path in args.input.iterdir():
samples = [p.relative_to(args.input) for p in class_path.iterdir()]
split = int(len(samples) * args.ratio)
shuffle(samples)
for name, dataset in [('train', samples[:split]),
('test', samples[split:])]:
output_path = args.output / name
for p in dataset:
q = output_path / p
q.parent.mkdir(parents=True, exist_ok=True)
shutil.copy(str(args.input / p), str(output_path / p))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment