Convert MS COCO Annotation to Pascal VOC format
function coco(n, k, note, dotrainval, testyear) | |
% Train and evaluate a model. | |
% pascal(n, note, dotrainval, testyear) | |
% | |
% The model will be a mixture of n star models, each of which | |
% has 2 latent orientations. | |
% | |
% Arguments | |
% n Number of aspect ratio clusters to use | |
% (The final model has 2*n components) | |
% note Save a note in the model.note field that describes this model | |
% dotrainval Also evaluate on the trainval dataset | |
% This is used to collect training data for context rescoring | |
% testyear Test set year (e.g., '2007', '2011') | |
% AUTORIGHTS | |
% ------------------------------------------------------- | |
% Copyright (C) 2011-2012 Ross Girshick | |
% Copyright (C) 2008, 2009, 2010 Pedro Felzenszwalb, Ross Girshick | |
% | |
% This file is part of the voc-releaseX code | |
% (http://people.cs.uchicago.edu/~rbg/latent/) | |
% and is available under the terms of an MIT-like license | |
% provided in COPYING. Please retain this notice and | |
% COPYING if you use this file (or a portion of it) in | |
% your project. | |
% ------------------------------------------------------- | |
startup; | |
conf = voc_config(); | |
cachedir = conf.paths.model_dir; | |
testset = conf.eval.test_set; | |
load('categories.mat') | |
% TODO: should save entire code used for this run | |
% Take the code, zip it into an archive named by date | |
% print the name of the code archive to the log file | |
% add the code name to the training note | |
timestamp = datestr(datevec(now()), 'dd.mmm.yyyy:HH.MM.SS'); | |
% Set the note to the training time if none is given | |
if nargin < 3 | |
note = timestamp; | |
end | |
% Don't evaluate trainval by default | |
if nargin < 4 | |
dotrainval = false; | |
end | |
if nargin < 5 | |
% which year to test on -- a string, e.g., '2007'. | |
testyear = conf.pascal.year; | |
end | |
for i = k:8:80 | |
% Record a log of the training and test procedure | |
cls = categories(i, :) | |
diary(conf.training.log([cls '-' timestamp])); | |
% Train a model (and record how long it took) | |
th = tic; | |
model = pascal_train(cls, n, note); | |
toc(th); | |
% Free the feature vector cache memory | |
fv_cache('free'); | |
% Lower threshold to get high recall | |
model.thresh = min(conf.eval.max_thresh, model.thresh); | |
model.interval = conf.eval.interval; | |
suffix = testyear; | |
% Collect detections on the test set | |
ds = pascal_test(model, testset, testyear, suffix); | |
% Evaluate the model without bounding box prediction | |
ap1 = pascal_eval(cls, ds, testset, testyear, suffix); | |
fprintf('AP = %.4f (without bounding box prediction)\n', ap1) | |
% Recompute AP after applying bounding box prediction | |
[ap1, ap2] = bboxpred_rescore(cls, testset, testyear, suffix); | |
fprintf('AP = %.4f (without bounding box prediction)\n', ap1) | |
fprintf('AP = %.4f (with bounding box prediction)\n', ap2) | |
% Compute detections on the trainval dataset (used for context rescoring) | |
if dotrainval | |
trainval(cls); | |
end | |
end |
import baker | |
import json | |
from path import path | |
from cytoolz import merge, join, groupby | |
from cytoolz.compatibility import iteritems | |
from cytoolz.curried import update_in | |
from itertools import starmap | |
from collections import deque | |
from lxml import etree, objectify | |
from scipy.io import savemat | |
from scipy.ndimage import imread | |
def keyjoin(leftkey, leftseq, rightkey, rightseq): | |
return starmap(merge, join(leftkey, leftseq, rightkey, rightseq)) | |
def root(folder, filename, width, height): | |
E = objectify.ElementMaker(annotate=False) | |
return E.annotation( | |
E.folder(folder), | |
E.filename(filename), | |
E.source( | |
E.database('MS COCO 2014'), | |
E.annotation('MS COCO 2014'), | |
E.image('Flickr'), | |
), | |
E.size( | |
E.width(width), | |
E.height(height), | |
E.depth(3), | |
), | |
E.segmented(0) | |
) | |
def instance_to_xml(anno): | |
E = objectify.ElementMaker(annotate=False) | |
xmin, ymin, width, height = anno['bbox'] | |
return E.object( | |
E.name(anno['category_id']), | |
E.bndbox( | |
E.xmin(xmin), | |
E.ymin(ymin), | |
E.xmax(xmin+width), | |
E.ymax(ymin+height), | |
), | |
) | |
@baker.command | |
def write_categories(coco_annotation, dst): | |
content = json.loads(path(coco_annotation).expand().text()) | |
categories = tuple( d['name'] for d in content['categories']) | |
savemat(path(dst).expand(), {'categories': categories}) | |
def get_instances(coco_annotation): | |
coco_annotation = path(coco_annotation).expand() | |
content = json.loads(coco_annotation.text()) | |
categories = {d['id']: d['name'] for d in content['categories']} | |
return categories, tuple(keyjoin('id', content['images'], 'image_id', content['annotations'])) | |
def rename(name, year=2014): | |
out_name = path(name).stripext() | |
# out_name = out_name.split('_')[-1] | |
# out_name = '{}_{}'.format(year, out_name) | |
return out_name | |
@baker.command | |
def create_imageset(annotations, dst): | |
annotations = path(annotations).expand() | |
dst = path(dst).expand() | |
val_txt = dst / 'val.txt' | |
train_txt = dst / 'train.txt' | |
for val in annotations.listdir('*val*'): | |
val_txt.write_text('{}\n'.format(val.basename().stripext()), append=True) | |
for train in annotations.listdir('*train*'): | |
train_txt.write_text('{}\n'.format(train.basename().stripext()), append=True) | |
@baker.command | |
def create_annotations(dbpath, subset, dst): | |
annotations_path = path(dbpath).expand() / 'annotations/instances_{}2014.json'.format(subset) | |
images_path = path(dbpath).expand() / 'images/{}2014'.format(subset) | |
categories , instances= get_instances(annotations_path) | |
dst = path(dst).expand() | |
for i, instance in enumerate(instances): | |
instances[i]['category_id'] = categories[instance['category_id']] | |
for name, group in iteritems(groupby('file_name', instances)): | |
img = imread(images_path / name) | |
if img.ndim == 3: | |
out_name = rename(name) | |
annotation = root('VOC2014', '{}.jpg'.format(out_name), | |
group[0]['height'], group[0]['width']) | |
for instance in group: | |
annotation.append(instance_to_xml(instance)) | |
etree.ElementTree(annotation).write(dst / '{}.xml'.format(out_name)) | |
print out_name | |
else: | |
print instance['file_name'] | |
if __name__ == '__main__': | |
baker.run() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment