Skip to content

Instantly share code, notes, and snippets.

Last active July 22, 2022 09:25
  • Star 4 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
Star You must be signed in to star a gist
What would you like to do?
Convert MS COCO Annotation to Pascal VOC format for python3 and with tqdm
import baker
import json
import os
from cytoolz import merge, join, groupby
from cytoolz.compatibility import iteritems
from cytoolz.curried import update_in
from itertools import starmap
from collections import deque
from lxml import etree, objectify
from import savemat
from scipy.ndimage import imread
from pathlib import Path
from tqdm import tqdm
def keyjoin(leftkey, leftseq, rightkey, rightseq):
return starmap(merge, join(leftkey, leftseq, rightkey, rightseq))
def root(folder, filename, width, height):
E = objectify.ElementMaker(annotate=False)
return E.annotation(
E.database('MS COCO 2014'),
E.annotation('MS COCO 2014'),
def instance_to_xml(anno):
E = objectify.ElementMaker(annotate=False)
xmin, ymin, width, height = anno['bbox']
return E.object(['category_id']),
def write_categories(coco_annotation, dst):
with open(os.path.abspath(coco_annotation)) as file:
content = json.load(file)
categories = tuple( d['name'] for d in content['categories'])
savemat(os.path.abspath(dst), {'categories': categories})
def get_instances(coco_annotation):
coco_annotation = os.path.abspath(coco_annotation)
with open(coco_annotation) as file:
content = json.load(file)
categories = {d['id']: d['name'] for d in content['categories']}
return categories, tuple(keyjoin('id', content['images'], 'image_id', content['annotations']))
def rename(name, year=2014):
out_name = os.path.splitext(name)[0]
# out_name = out_name.split('_')[-1]
# out_name = '{}_{}'.format(year, out_name)
return out_name
def create_imageset(annotations, dst):
annotations = os.path.abspath(annotations)
dst = os.path.abspath(dst)
val_txt = os.path.join(dst,'val.txt')
train_txt = os.path.join(dst, 'train.txt')
for val in annotations.listdir('*val*'):
val_txt.write_text('{}\n'.format(os.path.splitext(val.basename())[0]), append=True)
for train in annotations.listdir('*train*'):
train_txt.write_text('{}\n'.format(os.path.splitext(train.basename())[0]), append=True)
def create_annotations(dbPath, subset, dst='annotations_voc'):
""" converts annotations from coco to voc pascal.
dbPath: folder which contains the annotations subfolder which contains the annotations file in .json format.
Note: the corresponding images should be in the train2014 or val2014 subfolder.
subset: which of the .json files should be opened e.g. train for the "instances_train2014.json" file
dst: destination folder for the annotations. Will be created if it doesn't exist e.g. "annotations_voc"
if not os.path.exists(dst):
annotations_path = os.path.join(os.path.abspath(dbPath),'annotations','instances_'+str(subset)+'2014.json')
images_Path = os.path.join(os.path.abspath(dbPath),str(subset)+'2014')
print("reading data...")
categories , instances= get_instances(annotations_path)
print("finished reading data")
dst = os.path.abspath(dst)
for i, instance in tqdm(enumerate(instances),desc="rewriting categories"):
instances[i]['category_id'] = categories[instance['category_id']]
for name, group in tqdm(iteritems(groupby('file_name', instances)), total=len(groupby('file_name', instances)), desc="processing annotations"):
img = imread(os.path.abspath(os.path.join(images_Path,name)))
if img.ndim == 3:
out_name = rename(name)
annotation = root('VOC2014', '{}.jpg'.format(out_name), group[0]['height'], group[0]['width'])
for instance in group:
etree.ElementTree(annotation).write(os.path.join(dst, '{}.xml'.format(out_name)))
#print( out_name)
#print (instance['file_name'])
if __name__ == '__main__':
Copy link

AlexeyGy commented Aug 7, 2018

This is a python3.X version of the python 2 script to process the COCO labels to VOC format, e.g. for the testing this YOLO implementation:

Copy link

AlexeyGy commented Aug 7, 2018

update: now creates the output dir automatically

Copy link

Ah, thanks! This is useful.
Do you have any license or copyright snippet?

Copy link

Hey Samuel, I would use the same license as the creator, the MIT license.

Copy link

@AlexeyGy can you write command file to use this script, thanks you so much

Copy link

Check out the baker docs here.

An example invocation could be: create_annotations /home/somePathContainingAnnotationsFile train

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment