Skip to content

Instantly share code, notes, and snippets.

Created May 25, 2023 11:21
Show Gist options
  • Save diramazioni/69c4340e6663bb3e5c7193b860fce04e to your computer and use it in GitHub Desktop.
Save diramazioni/69c4340e6663bb3e5c7193b860fce04e to your computer and use it in GitHub Desktop.
DIANNE annotation tool outputs xml annotations, this script was made to convert the dataset of the ROSE's chalange in COCO format
import os
import xml.etree.ElementTree as ET
import json
import cv2
from PIL import Image
def rose2COCO(coco_annotations, root_dir, name_ds, name_cls):
root_dir = base directory of the dataset
name_ds = name of the dataset
name_cls = name of the class
# Define the directory paths
cls_dir = os.path.realpath(f'{root_dir}/COCO/{name_cls}')
image_dir = 'src'
annotation_dir = 'ref'
years = ["2019","2020","2021"]
img_data = os.path.realpath(f'{cls_dir}/data')
# make the dir for the output
if not os.path.exists(img_data): os.makedirs(img_data, exist_ok=True)
# Initialize the category and annotation ID counter
category_id = 0
annotation_id = 1
# Loop over the directories
for year in years:
year_dir = os.path.join(root_dir, year)
image_set_dir = os.path.join(year_dir, image_dir, name_ds, name_cls)
annotation_set_dir = os.path.join(year_dir, annotation_dir, name_ds, name_cls)
for xml_ann in sorted(os.listdir(annotation_set_dir)):
tree = ET.parse(os.path.join(annotation_set_dir, xml_ann))
root = tree.getroot()
# Extract the segmentation information
segmentations = []
category_ids = []
for clipping in root.findall('.//clipping'):
points = clipping.find('points')
segmentation = []
for point in points.findall('point'):
# Extract the category information
category = clipping.find('class').text
if category == "unknown":
category_ids.append(2) # unknown are clasified as weed, we avoid to distinguish between known weeds
for cat in coco_annotations['categories']:
if category in cat['name']:
# Compute the bounding boxes
bboxes = []
for segmentation in segmentations:
xmin = min(segmentation[::2])
xmax = max(segmentation[::2])
ymin = min(segmentation[1::2])
ymax = max(segmentation[1::2])
bbox = [xmin, ymin, xmax - xmin, ymax - ymin]
# Extract the image information
image_id = len(coco_annotations['images']) + 1
file_name = xml_ann.replace('.xml', '.jpg')
file_path = os.path.realpath(os.path.join(image_set_dir, file_name))
if not os.path.exists(file_path):
file_name = xml_ann.replace('.xml', '.png')
file_path = os.path.realpath(os.path.join(image_set_dir, file_name))
if os.path.exists(file_path):
img_png =
file_path = file_path.replace('.png', '.jpg')
else: print(f"image not found {file_path}")
img = cv2.imread(file_path)
height, width = img.shape[:2]
image = {
'id': image_id,
'file_name': file_name,
'height': height,
'width': width,
target = f"{img_data}/{file_name}"
if not os.path.exists(target):
os.symlink( file_path, target)
# Extract the annotation information
for i in range(len(segmentations)):
annotation_id = len(coco_annotations['annotations']) + 1
annotation = {
'id': annotation_id,
'image_id': image_id,
'category_id': category_ids[i],
'segmentation': [segmentations[i]],
'bbox': bboxes[i],
'area': height*width,
'iscrowd': 0,
#annotation_id += 1
return coco_annotations
if __name__ == "__main__":
# Initialize the COCO annotations
coco_annotations = {
'licenses': [],
'info': {},
"categories": [
"id": 1,
"name": "crop",
"supercategory": ""
"id": 2,
"name": "weed",
"supercategory": ""
'images': [],
'annotations': [],
root_dir = './dataset/rose'
ds_names = ['weedelec','bipbip','pead','roseau'] #,
cls_names = ['bean','maize']
#cls_names = ['maize']
for name_cls in cls_names:
coco_annotations['images'] = [] # reset annotations for each class
coco_annotations['annotations'] = []
label_files = []
for name_ds in ds_names:
print(" " + name_ds)
coco_annotations = rose2COCO(coco_annotations, root_dir, name_ds, name_cls)
print("\n " + str(len(coco_annotations['images'])))
# Save the COCO annotations to a JSON file
cls_dir = os.path.realpath(f'{root_dir}/COCO/{name_cls}')
label_file = os.path.realpath(f'{cls_dir}/labels.json')
with open(label_file, 'w') as f:
json.dump(coco_annotations, f, indent=1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment