Created
March 20, 2021 13:35
-
-
Save otamajakusi/3a96951aef344c3b959b472d2f4e47cb to your computer and use it in GitHub Desktop.
yolov5 data split script
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import glob | |
import os | |
import sys | |
import random | |
# put your own path here | |
dataset_path = sys.argv[1] | |
# Percentage of images to be used for the validation set | |
percentage_test = 20 | |
""" | |
!mkdir data | |
!mkdir data/images | |
!mkdir data/labels | |
!mkdir data/images/train | |
!mkdir data/images/valid | |
!mkdir data/labels/train | |
!mkdir data/labels/valid | |
""" | |
def get_ext(basename, exts=["jpg", "png"]): | |
for ext in exts: | |
if os.path.exists(f"{basename}.{ext.upper()}"): | |
return ext.upper() | |
if os.path.exists(f"{basename}.{ext.lower()}"): | |
return ext.lower() | |
return None | |
# Populate the folders | |
p = percentage_test / 100 | |
for pathAndFilename in glob.iglob(os.path.join(dataset_path, "*.txt")): | |
title, ext = os.path.splitext(os.path.basename(pathAndFilename)) | |
img_ext = get_ext(f"{dataset_path}/{title}") | |
if img_ext is None: | |
print(f"WARN: image not found for {pathAndFilename}") | |
continue | |
if random.random() <= p: | |
os.system(f"cp {dataset_path}/{title}.{img_ext} tiles/images/valid") | |
os.system(f"cp {dataset_path}/{title}.txt tiles/labels/valid") | |
else: | |
os.system(f"cp {dataset_path}/{title}.{img_ext} tiles/images/train") | |
os.system(f"cp {dataset_path}/{title}.txt tiles/labels/train") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment