e96031413/VOC_to_COCO_annotation.py

## VOC_to_COCO_annotation.py
"""
https://github.com/d246810g2000/YOLOX/blob/main/datasets/train_val_data_split_coco.py
"""
import os
import cv2
import json
import random
import shutil
import xml.etree.ElementTree as ET
from tqdm import tqdm

split = 'val'
data_path = f'/home/Yanwei_Liu/New_Datasets/PASCALRAW/images_crop_new/{split}'
output_dir = '/home/Yanwei_Liu/New_Datasets/PASCALRAW/'
jsons_path = os.path.join(output_dir, 'annotations/')
class_names = ['person', 'bicycle', 'car']

def get_annotations(xml_path, class_names):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    annotations = []
    for object in root.findall('object'):
        cls = object.find('name').text
        obj = object.find('bndbox')
        xmin = obj.find('xmin').text
        ymin = obj.find('ymin').text
        xmax = obj.find('xmax').text
        ymax = obj.find('ymax').text
        annotations.append((int(xmin), int(ymin), int(xmax)-int(xmin), int(ymax)-int(ymin), int(class_names.index(cls))+1))
    return annotations


if not os.path.exists(output_dir):
    os.makedirs(jsons_path)

data = {}
data['info'] = {
    'description': '',
    'url': '',
    'version': '1.0',
    'year': 2024,
    'contributor': 'Yanwei',
    'date_created': ''}
data['categories'] = [{'supercategory': 'deterioration', 'id': i+1, 'name': cls} for i, cls in enumerate(class_names)]
data['licenses'] = []

annotation_data = data.copy()

imgs_list = sorted([file for file in os.listdir(data_path) if file.split('.')[-1] in ['jpg', 'png', 'tiff']])

img_id = 1
ann_id = 1
all_imgs = []
all_anns = []

for img in tqdm(imgs_list):
    img_path = os.path.join(data_path, img)
    h, w, _ = cv2.imread(img_path).shape
    xml_path = os.path.join(data_path.replace('images_crop_new', 'annotations'), img.split('.')[0]+'.xml')
    all_imgs.append({'id':img_id,
                       'width':w,
                       'height':h,
                       'license':1,
                       'file_name':img})
    annotations = get_annotations(xml_path, class_names)
    for ann in annotations:
        # annotation_json
        all_anns.append({'id': ann_id,
                           'image_id': img_id,
                           'category_id': ann[-1],
                           'segmentation': [],
                           'bbox': [ann[0], ann[1], ann[2], ann[3]],
                           'area': ann[2]*ann[3],
                           'iscrowd': 0})
        ann_id+=1
    img_id+=1


annotation_data['images'] = all_imgs
annotation_data['annotations'] = all_anns


with open(os.path.join(jsons_path, f'instances_{split}2017.json'), 'w') as json_file:
    json.dump(annotation_data, json_file)
	"""
	https://github.com/d246810g2000/YOLOX/blob/main/datasets/train_val_data_split_coco.py
	"""
	import os
	import cv2
	import json
	import random
	import shutil
	import xml.etree.ElementTree as ET
	from tqdm import tqdm

	split = 'val'
	data_path = f'/home/Yanwei_Liu/New_Datasets/PASCALRAW/images_crop_new/{split}'
	output_dir = '/home/Yanwei_Liu/New_Datasets/PASCALRAW/'
	jsons_path = os.path.join(output_dir, 'annotations/')
	class_names = ['person', 'bicycle', 'car']

	def get_annotations(xml_path, class_names):
	tree = ET.parse(xml_path)
	root = tree.getroot()
	annotations = []
	for object in root.findall('object'):
	cls = object.find('name').text
	obj = object.find('bndbox')
	xmin = obj.find('xmin').text
	ymin = obj.find('ymin').text
	xmax = obj.find('xmax').text
	ymax = obj.find('ymax').text
	annotations.append((int(xmin), int(ymin), int(xmax)-int(xmin), int(ymax)-int(ymin), int(class_names.index(cls))+1))
	return annotations


	if not os.path.exists(output_dir):
	os.makedirs(jsons_path)

	data = {}
	data['info'] = {
	'description': '',
	'url': '',
	'version': '1.0',
	'year': 2024,
	'contributor': 'Yanwei',
	'date_created': ''}
	data['categories'] = [{'supercategory': 'deterioration', 'id': i+1, 'name': cls} for i, cls in enumerate(class_names)]
	data['licenses'] = []

	annotation_data = data.copy()

	imgs_list = sorted([file for file in os.listdir(data_path) if file.split('.')[-1] in ['jpg', 'png', 'tiff']])

	img_id = 1
	ann_id = 1
	all_imgs = []
	all_anns = []

	for img in tqdm(imgs_list):
	img_path = os.path.join(data_path, img)
	h, w, _ = cv2.imread(img_path).shape
	xml_path = os.path.join(data_path.replace('images_crop_new', 'annotations'), img.split('.')[0]+'.xml')
	all_imgs.append({'id':img_id,
	'width':w,
	'height':h,
	'license':1,
	'file_name':img})
	annotations = get_annotations(xml_path, class_names)
	for ann in annotations:
	# annotation_json
	all_anns.append({'id': ann_id,
	'image_id': img_id,
	'category_id': ann[-1],
	'segmentation': [],
	'bbox': [ann[0], ann[1], ann[2], ann[3]],
	'area': ann[2]*ann[3],
	'iscrowd': 0})
	ann_id+=1
	img_id+=1



	annotation_data['images'] = all_imgs
	annotation_data['annotations'] = all_anns


	with open(os.path.join(jsons_path, f'instances_{split}2017.json'), 'w') as json_file:
	json.dump(annotation_data, json_file)