Skip to content

Instantly share code, notes, and snippets.

@iaverypadberg
Created April 25, 2022 21:01
Show Gist options
  • Save iaverypadberg/1a3b8ea6f89915ca4529f3396eb72614 to your computer and use it in GitHub Desktop.
Save iaverypadberg/1a3b8ea6f89915ca4529f3396eb72614 to your computer and use it in GitHub Desktop.
Copy corresponding xml and jpg files(predicated on labels)
from re import T
import os
import subprocess
from glob import glob
import xml.etree.ElementTree as ET
labels = [
"some",
"labels",
"here",
"hia"]
image_dir = "/directory/to/copy/images/from/"
files = glob('/directory/with/xml/files/*.xml')
# annotations_folder and image_folder can point to the same folder
annotations_folder = "output_annotations_folder"
image_folder = "output_images_folder"
for file in files:
tree = ET.parse(file)
root = tree.getroot()
objects = root.findall('object')
modified_file_id="'"+file+"'" # Modification because the filenames have spaces in them
filename = root.find('filename').text
modified_image_id="'"+image_dir+filename+"'" # Same reason there is a modification here
for object in objects:
label = object.find('name')
if(label.text in labels):
os.system('cp {} {}'.format(modified_file_id,annotations_folder))
os.system('cp {} {}'.format(modified_image_id,image_folder))
break;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment