Skip to content

Instantly share code, notes, and snippets.

@digitalbrain79
Last active March 16, 2023 07:18
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save digitalbrain79/089b699f9ca209fd212c975755e12d22 to your computer and use it in GitHub Desktop.
Save digitalbrain79/089b699f9ca209fd212c975755e12d22 to your computer and use it in GitHub Desktop.
Convert MS COCO Annotation to Pascal VOC format
function coco(n, k, note, dotrainval, testyear)
% Train and evaluate a model.
% pascal(n, note, dotrainval, testyear)
%
% The model will be a mixture of n star models, each of which
% has 2 latent orientations.
%
% Arguments
% n Number of aspect ratio clusters to use
% (The final model has 2*n components)
% note Save a note in the model.note field that describes this model
% dotrainval Also evaluate on the trainval dataset
% This is used to collect training data for context rescoring
% testyear Test set year (e.g., '2007', '2011')
% AUTORIGHTS
% -------------------------------------------------------
% Copyright (C) 2011-2012 Ross Girshick
% Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick
%
% This file is part of the voc-releaseX code
% (http://people.cs.uchicago.edu/~rbg/latent/)
% and is available under the terms of an MIT-like license
% provided in COPYING. Please retain this notice and
% COPYING if you use this file (or a portion of it) in
% your project.
% -------------------------------------------------------
startup;
conf = voc_config();
cachedir = conf.paths.model_dir;
testset = conf.eval.test_set;
load('categories.mat')
% TODO: should save entire code used for this run
% Take the code, zip it into an archive named by date
% print the name of the code archive to the log file
% add the code name to the training note
timestamp = datestr(datevec(now()), 'dd.mmm.yyyy:HH.MM.SS');
% Set the note to the training time if none is given
if nargin < 3
note = timestamp;
end
% Don't evaluate trainval by default
if nargin < 4
dotrainval = false;
end
if nargin < 5
% which year to test on -- a string, e.g., '2007'.
testyear = conf.pascal.year;
end
for i = k:8:80
% Record a log of the training and test procedure
cls = categories(i, :)
diary(conf.training.log([cls '-' timestamp]));
% Train a model (and record how long it took)
th = tic;
model = pascal_train(cls, n, note);
toc(th);
% Free the feature vector cache memory
fv_cache('free');
% Lower threshold to get high recall
model.thresh = min(conf.eval.max_thresh, model.thresh);
model.interval = conf.eval.interval;
suffix = testyear;
% Collect detections on the test set
ds = pascal_test(model, testset, testyear, suffix);
% Evaluate the model without bounding box prediction
ap1 = pascal_eval(cls, ds, testset, testyear, suffix);
fprintf('AP = %.4f (without bounding box prediction)\n', ap1)
% Recompute AP after applying bounding box prediction
[ap1, ap2] = bboxpred_rescore(cls, testset, testyear, suffix);
fprintf('AP = %.4f (without bounding box prediction)\n', ap1)
fprintf('AP = %.4f (with bounding box prediction)\n', ap2)
% Compute detections on the trainval dataset (used for context rescoring)
if dotrainval
trainval(cls);
end
end
import baker
import json
from path import path
from cytoolz import merge, join, groupby
from cytoolz.compatibility import iteritems
from cytoolz.curried import update_in
from itertools import starmap
from collections import deque
from lxml import etree, objectify
from scipy.io import savemat
from scipy.ndimage import imread
def keyjoin(leftkey, leftseq, rightkey, rightseq):
return starmap(merge, join(leftkey, leftseq, rightkey, rightseq))
def root(folder, filename, height, width):
E = objectify.ElementMaker(annotate=False)
return E.annotation(
E.folder(folder),
E.filename(filename),
E.source(
E.database('MS COCO 2014'),
E.annotation('MS COCO 2014'),
E.image('Flickr'),
),
E.size(
E.width(width),
E.height(height),
E.depth(3),
),
E.segmented(0)
)
def instance_to_xml(anno):
E = objectify.ElementMaker(annotate=False)
xmin, ymin, width, height = anno['bbox']
xmin = int(xmin)
ymin = int(ymin)
width = int(width)
height = int(height)
return E.object(
E.name(anno['category_id']),
E.bndbox(
E.xmin(xmin),
E.ymin(ymin),
E.xmax(xmin+width),
E.ymax(ymin+height),
),
)
@baker.command
def write_categories(coco_annotation, dst):
content = json.loads(path(coco_annotation).expand().text())
categories = tuple( d['name'] for d in content['categories'])
savemat(path(dst).expand(), {'categories': categories})
def get_instances(coco_annotation):
coco_annotation = path(coco_annotation).expand()
content = json.loads(coco_annotation.text())
categories = {d['id']: d['name'] for d in content['categories']}
return categories, tuple(keyjoin('id', content['images'], 'image_id', content['annotations']))
def rename(name, year=2014):
out_name = path(name).stripext()
# out_name = out_name.split('_')[-1]
# out_name = '{}_{}'.format(year, out_name)
return out_name
@baker.command
def create_imageset(annotations, dst):
annotations = path(annotations).expand()
dst = path(dst).expand()
val_txt = dst / 'val.txt'
train_txt = dst / 'train.txt'
for val in annotations.listdir('*val*'):
val_txt.write_text('{}\n'.format(val.basename().stripext()), append=True)
for train in annotations.listdir('*train*'):
train_txt.write_text('{}\n'.format(train.basename().stripext()), append=True)
@baker.command
def create_annotations(dbpath, subset, dst):
annotations_path = path(dbpath).expand() / 'annotations/instances_{}2014.json'.format(subset)
images_path = path(dbpath).expand() / 'images/{}2014'.format(subset)
categories , instances= get_instances(annotations_path)
dst = path(dst).expand()
for i, instance in enumerate(instances):
instances[i]['category_id'] = categories[instance['category_id']]
for name, group in iteritems(groupby('file_name', instances)):
img = imread(images_path / name)
if img.ndim == 3:
out_name = rename(name)
annotation = root('VOC2014', '{}.jpg'.format(out_name),
group[0]['height'], group[0]['width'])
for instance in group:
annotation.append(instance_to_xml(instance))
etree.ElementTree(annotation).write(dst / '{}.xml'.format(out_name))
print out_name
else:
print instance['file_name']
if __name__ == '__main__':
baker.run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment