Skip to content

Instantly share code, notes, and snippets.

@otamajakusi
Created March 20, 2021 13:35
Show Gist options
  • Save otamajakusi/3a96951aef344c3b959b472d2f4e47cb to your computer and use it in GitHub Desktop.
Save otamajakusi/3a96951aef344c3b959b472d2f4e47cb to your computer and use it in GitHub Desktop.
yolov5 data split script
import glob
import os
import sys
import random
# put your own path here
dataset_path = sys.argv[1]
# Percentage of images to be used for the validation set
percentage_test = 20
"""
!mkdir data
!mkdir data/images
!mkdir data/labels
!mkdir data/images/train
!mkdir data/images/valid
!mkdir data/labels/train
!mkdir data/labels/valid
"""
def get_ext(basename, exts=["jpg", "png"]):
for ext in exts:
if os.path.exists(f"{basename}.{ext.upper()}"):
return ext.upper()
if os.path.exists(f"{basename}.{ext.lower()}"):
return ext.lower()
return None
# Populate the folders
p = percentage_test / 100
for pathAndFilename in glob.iglob(os.path.join(dataset_path, "*.txt")):
title, ext = os.path.splitext(os.path.basename(pathAndFilename))
img_ext = get_ext(f"{dataset_path}/{title}")
if img_ext is None:
print(f"WARN: image not found for {pathAndFilename}")
continue
if random.random() <= p:
os.system(f"cp {dataset_path}/{title}.{img_ext} tiles/images/valid")
os.system(f"cp {dataset_path}/{title}.txt tiles/labels/valid")
else:
os.system(f"cp {dataset_path}/{title}.{img_ext} tiles/images/train")
os.system(f"cp {dataset_path}/{title}.txt tiles/labels/train")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment