Last active
May 16, 2025 04:26
-
-
Save AdamDimech/7977dda447bef9dca7074cda41f50ba2 to your computer and use it in GitHub Desktop.
Divide Label Studio YOLO annotation files into training and validation sets. More information at https://code.adonline.id.au/divide-label-studio-yolo-annotation-files-into-training-and-validation-sets/
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env python | |
| # Splitting of image data sets generated from Label Studio | |
| # Adam Dimech | |
| # November 2024 | |
| import os, shutil, random, argparse | |
| def options(): | |
| parser = argparse.ArgumentParser(description="Return a recursive list of files that match a criterion") | |
| parser.add_argument("-f", "--folder", help="Target folder of images.", required=True) | |
| parser.add_argument("-e", "--extension", help="File extension of images", required=True) | |
| parser.add_argument("-s", "--split", help="Split percentage between test and validate (typically 90)", required=True) | |
| parser.add_argument("-r", "--seed", help="Set seed for randomisation", required=False) | |
| args = parser.parse_args() | |
| return args | |
| def main(): | |
| # Get options | |
| args = options() | |
| # Preparing the folder structure | |
| folder = args.folder | |
| folder_images = folder + "/images/" | |
| folder_labels = folder + "/labels/" | |
| ext = args.extension | |
| # Add leading dot to file extension if missing | |
| if ext[0]!=".": | |
| ext="." + ext | |
| print("Images are located at", folder_images, "\nLabels are located at", folder_labels) | |
| # Prepare to create new folders | |
| training_images_path = folder_images + 'train/' | |
| validation_images_path = folder_images + 'val/' | |
| training_labels_path = folder_labels + 'train/' | |
| validation_labels_path = folder_labels +'val/' | |
| # Create new folders even if folders already exist | |
| if os.path.exists(training_images_path): | |
| shutil.rmtree(training_images_path) | |
| os.mkdir(training_images_path) | |
| if os.path.exists(validation_images_path): | |
| shutil.rmtree(validation_images_path) | |
| os.mkdir(validation_images_path) | |
| if os.path.exists(training_labels_path): | |
| shutil.rmtree(training_labels_path) | |
| os.mkdir(training_labels_path) | |
| if os.path.exists(validation_labels_path): | |
| shutil.rmtree(validation_labels_path) | |
| os.mkdir(validation_labels_path) | |
| # Collect split percentage | |
| split_percentage = int(args.split) | |
| # Create list to sort files randomly | |
| files = [] | |
| ext_len = len(ext) | |
| for _, _, f in os.walk(folder): | |
| for file in f: | |
| if file.endswith(ext): | |
| strip = file[0:len(file) - ext_len] | |
| files.append(strip) | |
| # Randomise files, using seed (if supplied) | |
| if args.seed is not None: | |
| seed = int(args.seed) | |
| else: | |
| seed = int(100) | |
| random.Random(seed).shuffle(files) | |
| size = len(files) | |
| split = int(split_percentage * size / 100) | |
| print("Moving training data...") | |
| for i in range(0, split): | |
| strip = files[i] | |
| img = strip + ext | |
| src_img = folder_images + img | |
| dst_img = training_images_path | |
| shutil.move(src_img, dst_img) # Move files | |
| annot = strip + ".txt" | |
| src_label = folder_labels + annot | |
| dst = training_labels_path + annot | |
| shutil.move(src_label, dst) | |
| print("Moving validation data...") | |
| for i in range(split, size): | |
| strip = files[i] | |
| img = strip + ext | |
| src_img = folder_images + img | |
| dst_img = validation_images_path + img | |
| shutil.move(src_img, dst_img) # Move files | |
| annot = strip + ".txt" | |
| src_label = folder_labels + annot | |
| dst_label = validation_labels_path + annot | |
| shutil.move(src_label, dst_label) # Move files | |
| print("Complete.") | |
| if __name__ == '__main__': | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment