Skip to content

Instantly share code, notes, and snippets.

@e96031413
Created March 29, 2023 09:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save e96031413/55197e110fb6447e0c59e833438c236f to your computer and use it in GitHub Desktop.
Save e96031413/55197e110fb6447e0c59e833438c236f to your computer and use it in GitHub Desktop.
import os
import xml.etree.ElementTree as ET
import csv
# Set the paths for the input and output directories
voc_path = '/home/Yanwei_Liu/Datasets/PASCALRAW/annotations/'
train_img_path = '/home/Yanwei_Liu/Datasets/PASCALRAW/images/train/'
val_img_path = '/home/Yanwei_Liu/Datasets/PASCALRAW/images/val/'
train_file_path = '/home/Yanwei_Liu/Datasets/PASCALRAW/trainval/train.txt'
val_file_path = '/home/Yanwei_Liu/Datasets/PASCALRAW/trainval/val.txt'
train_file_list = []
val_file_list = []
with open(train_file_path, 'r') as f:
for line in f:
train_file_list.append(line.strip())
with open(val_file_path, 'r') as f:
for line in f:
val_file_list.append(line.strip())
# Define the class names
class_names = ["aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"]
# Loop through the VOC dataset and convert each annotation file to YOLOv5 format
with open('./pascalraw_annotation_train.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
for filename in os.listdir(voc_path):
if filename.endswith('.xml') and filename.split('.')[-2] in train_file_list:
# Parse the XML file
tree = ET.parse(os.path.join(voc_path, filename))
root = tree.getroot()
for obj in root.iter('object'):
# Extract the class label
class_label = obj.find('name').text
class_index = class_names.index(class_label)
# Extract the bounding box coordinates
bbox = obj.find('bndbox')
x1 = int(bbox.find('xmin').text)
y1 = int(bbox.find('ymin').text)
x2 = int(bbox.find('xmax').text)
y2 = int(bbox.find('ymax').text)
x1,y1,x2,y2
img_path = os.path.join(train_img_path, filename.split('.')[-2]+".png")
writer.writerow([img_path, x1, y1, x2, y2, class_label])
with open('./pascalraw_annotation_val.csv', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
for filename in os.listdir(voc_path):
if filename.endswith('.xml') and filename.split('.')[-2] in val_file_list:
# Parse the XML file
tree = ET.parse(os.path.join(voc_path, filename))
root = tree.getroot()
for obj in root.iter('object'):
# Extract the class label
class_label = obj.find('name').text
class_index = class_names.index(class_label)
# Extract the bounding box coordinates
bbox = obj.find('bndbox')
x1 = int(bbox.find('xmin').text)
y1 = int(bbox.find('ymin').text)
x2 = int(bbox.find('xmax').text)
y2 = int(bbox.find('ymax').text)
x1,y1,x2,y2
img_path = os.path.join(val_img_path, filename.split('.')[-2]+".png")
writer.writerow([img_path, x1, y1, x2, y2, class_label])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment