Created
January 4, 2020 22:53
-
-
Save zhaoweizhong/7ca1f4d4fdcb0aa198732a0e7cc9b908 to your computer and use it in GitHub Desktop.
Transform Tsinghua-Tencent 100K Dataset Annotations to COCO Format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import argparse | |
def load_json(file_name): | |
file = open(file_name, 'r').read() | |
return json.loads(file) | |
def parse(data, data_type, output_name): | |
# File Format | |
result = { | |
"info": { | |
"description": "TT100K Dataset COCO Format", | |
"url": "https://github.com/zhaoweizhong", | |
"version": "1.0", | |
"year": 2020, | |
"contributor": "Zhaowei Zhong", | |
"date_created": "2020/01/04" | |
}, | |
"licenses": [ | |
{ | |
"url": "https://creativecommons.org/licenses/by-nc-sa/4.0/", | |
"id": 1, | |
"name": "Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)" | |
} | |
], | |
"images": [], | |
"annotations": [], | |
"categories": [] | |
} | |
# Categories | |
categories = ["i1", "i10", "i11", "i12", "i13", "i14", "i15", "i2", "i3", "i4", "i5", "il100", "il110", "il50", "il60", "il70", "il80", "il90", "io", "ip", "p1", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p2", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "pa10", "pa12", "pa13", "pa14", "pa8", "pb", "pc", "pg", "ph1.5", "ph2", "ph2.1", "ph2.2", "ph2.4", "ph2.5", "ph2.8", "ph2.9", "ph3", "ph3.2", "ph3.5", "ph3.8", "ph4", "ph4.2", "ph4.3", "ph4.5", "ph4.8", "ph5", "ph5.3", "ph5.5", "pl10", "pl100", "pl110", "pl120", "pl15", "pl20", "pl25", "pl30", "pl35", "pl40", "pl5", "pl50", "pl60", "pl65", "pl70", "pl80", "pl90", "pm10", "pm13", "pm15", "pm1.5", "pm2", "pm20", "pm25", "pm30", "pm35", "pm40", "pm46", "pm5", "pm50", "pm55", "pm8", "pn", "pne", "po", "pr10", "pr100", "pr20", "pr30", "pr40", "pr45", "pr50", "pr60", "pr70", "pr80", "ps", "pw2", "pw2.5", "pw3", "pw3.2", "pw3.5", "pw4", "pw4.2", "pw4.5", "w1", "w10", "w12", "w13", "w16", "w18", "w20", "w21", "w22", "w24", "w28", "w3", "w30", "w31", "w32", "w34", "w35", "w37", "w38", "w41", "w42", "w43", "w44", "w45", "w46", "w47", "w48", "w49", "w5", "w50", "w55", "w56", "w57", "w58", "w59", "w60", "w62", "w63", "w66", "w8", "wo", "i6", "i7", "i8", "i9", "ilx", "p29", "w29", "w33", "w36", "w39", "w4", "w40", "w51", "w52", "w53", "w54", "w6", "w61", "w64", "w65", "w67", "w7", "w9", "pax", "pd", "pe", "phx", "plx", "pmx", "pnl", "prx", "pwx", "w11", "w14", "w15", "w17", "w19", "w2", "w23", "w25", "w26", "w27", "pl0", "pl4", "pl3", "pm2.5", "ph4.4", "pn40", "ph3.3", "ph2.6"] | |
i = 1 | |
for category in categories: | |
result['categories'].append({ | |
"id": i, | |
"name": category | |
}) | |
i = i + 1 | |
# Images | |
for img in data['imgs']: | |
if str(data['imgs'][img]['path']).find(data_type) != -1: | |
result['images'].append({ | |
"license": 1, | |
"file_name": data['imgs'][img]['path'][len(data_type) + 1:], | |
"height": 2048, | |
"width": 2048, | |
"id": data['imgs'][img]['id'] | |
}) | |
# Annotations | |
i = 0 | |
for img in data['imgs']: | |
if str(data['imgs'][img]['path']).find(data_type) != -1: | |
for box in data['imgs'][img]['objects']: | |
result['annotations'].append({ | |
"segmentation": [[]], | |
"area": (box['bbox']['xmax'] - box['bbox']['xmin']) * (box['bbox']['ymax'] - box['bbox']['ymin']), | |
"iscrowd": 0, | |
"image_id": data['imgs'][img]['id'], | |
"bbox": [ | |
box['bbox']['xmin'], | |
box['bbox']['ymin'], | |
box['bbox']['xmax'] - box['bbox']['xmin'], | |
box['bbox']['ymax'] - box['bbox']['ymin'] | |
], | |
"category_id": categories.index(box['category']) + 1, | |
"id": i | |
}) | |
if ('ellipse_org' in box): | |
for xy in box['ellipse_org']: | |
result['annotations'][i]['segmentation'][0].append(xy[0]) | |
result['annotations'][i]['segmentation'][0].append(xy[1]) | |
elif 'polygon' in box: | |
for xy in box['polygon']: | |
result['annotations'][i]['segmentation'][0].append(xy[0]) | |
result['annotations'][i]['segmentation'][0].append(xy[1]) | |
i = i + 1 | |
with open(output_name, "w") as f: | |
json.dump(result, f) | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('-t', '--data_type', type=str, default='train') | |
parser.add_argument('-f', '--file_name', type=str, default='data.json') | |
parser.add_argument('-o', '--output_name', type=str, default='output.json') | |
args = parser.parse_args() | |
data_type = args.data_type | |
file_name = args.file_name | |
output_name = args.output_name | |
data = load_json(file_name) | |
parse(data, data_type, output_name) |
Thanks a lot for this! Saved me the work to write it on my own. Works perfectly.
I just had to adjust the classes, because I'm working with the new 2021 dataset.
I'm glad it helped you. :)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Thanks a lot for this! Saved me the work to write it on my own. Works perfectly.
I just had to adjust the classes, because I'm working with the new 2021 dataset.