Skip to content

Instantly share code, notes, and snippets.

@AdamDimech
Last active May 16, 2025 04:26
Show Gist options
  • Select an option

  • Save AdamDimech/7977dda447bef9dca7074cda41f50ba2 to your computer and use it in GitHub Desktop.

Select an option

Save AdamDimech/7977dda447bef9dca7074cda41f50ba2 to your computer and use it in GitHub Desktop.
Divide Label Studio YOLO annotation files into training and validation sets. More information at https://code.adonline.id.au/divide-label-studio-yolo-annotation-files-into-training-and-validation-sets/
#!/usr/bin/env python
# Splitting of image data sets generated from Label Studio
# Adam Dimech
# November 2024
import os, shutil, random, argparse
def options():
parser = argparse.ArgumentParser(description="Return a recursive list of files that match a criterion")
parser.add_argument("-f", "--folder", help="Target folder of images.", required=True)
parser.add_argument("-e", "--extension", help="File extension of images", required=True)
parser.add_argument("-s", "--split", help="Split percentage between test and validate (typically 90)", required=True)
parser.add_argument("-r", "--seed", help="Set seed for randomisation", required=False)
args = parser.parse_args()
return args
def main():
# Get options
args = options()
# Preparing the folder structure
folder = args.folder
folder_images = folder + "/images/"
folder_labels = folder + "/labels/"
ext = args.extension
# Add leading dot to file extension if missing
if ext[0]!=".":
ext="." + ext
print("Images are located at", folder_images, "\nLabels are located at", folder_labels)
# Prepare to create new folders
training_images_path = folder_images + 'train/'
validation_images_path = folder_images + 'val/'
training_labels_path = folder_labels + 'train/'
validation_labels_path = folder_labels +'val/'
# Create new folders even if folders already exist
if os.path.exists(training_images_path):
shutil.rmtree(training_images_path)
os.mkdir(training_images_path)
if os.path.exists(validation_images_path):
shutil.rmtree(validation_images_path)
os.mkdir(validation_images_path)
if os.path.exists(training_labels_path):
shutil.rmtree(training_labels_path)
os.mkdir(training_labels_path)
if os.path.exists(validation_labels_path):
shutil.rmtree(validation_labels_path)
os.mkdir(validation_labels_path)
# Collect split percentage
split_percentage = int(args.split)
# Create list to sort files randomly
files = []
ext_len = len(ext)
for _, _, f in os.walk(folder):
for file in f:
if file.endswith(ext):
strip = file[0:len(file) - ext_len]
files.append(strip)
# Randomise files, using seed (if supplied)
if args.seed is not None:
seed = int(args.seed)
else:
seed = int(100)
random.Random(seed).shuffle(files)
size = len(files)
split = int(split_percentage * size / 100)
print("Moving training data...")
for i in range(0, split):
strip = files[i]
img = strip + ext
src_img = folder_images + img
dst_img = training_images_path
shutil.move(src_img, dst_img) # Move files
annot = strip + ".txt"
src_label = folder_labels + annot
dst = training_labels_path + annot
shutil.move(src_label, dst)
print("Moving validation data...")
for i in range(split, size):
strip = files[i]
img = strip + ext
src_img = folder_images + img
dst_img = validation_images_path + img
shutil.move(src_img, dst_img) # Move files
annot = strip + ".txt"
src_label = folder_labels + annot
dst_label = validation_labels_path + annot
shutil.move(src_label, dst_label) # Move files
print("Complete.")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment