Skip to content

Instantly share code, notes, and snippets.

@zhaoweizhong
Created January 4, 2020 22:53
Show Gist options
  • Save zhaoweizhong/7ca1f4d4fdcb0aa198732a0e7cc9b908 to your computer and use it in GitHub Desktop.
Save zhaoweizhong/7ca1f4d4fdcb0aa198732a0e7cc9b908 to your computer and use it in GitHub Desktop.
Transform Tsinghua-Tencent 100K Dataset Annotations to COCO Format
import json
import argparse
def load_json(file_name):
file = open(file_name, 'r').read()
return json.loads(file)
def parse(data, data_type, output_name):
# File Format
result = {
"info": {
"description": "TT100K Dataset COCO Format",
"url": "https://github.com/zhaoweizhong",
"version": "1.0",
"year": 2020,
"contributor": "Zhaowei Zhong",
"date_created": "2020/01/04"
},
"licenses": [
{
"url": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)"
}
],
"images": [],
"annotations": [],
"categories": []
}
# Categories
categories = ["i1", "i10", "i11", "i12", "i13", "i14", "i15", "i2", "i3", "i4", "i5", "il100", "il110", "il50", "il60", "il70", "il80", "il90", "io", "ip", "p1", "p10", "p11", "p12", "p13", "p14", "p15", "p16", "p17", "p18", "p19", "p2", "p20", "p21", "p22", "p23", "p24", "p25", "p26", "p27", "p28", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "pa10", "pa12", "pa13", "pa14", "pa8", "pb", "pc", "pg", "ph1.5", "ph2", "ph2.1", "ph2.2", "ph2.4", "ph2.5", "ph2.8", "ph2.9", "ph3", "ph3.2", "ph3.5", "ph3.8", "ph4", "ph4.2", "ph4.3", "ph4.5", "ph4.8", "ph5", "ph5.3", "ph5.5", "pl10", "pl100", "pl110", "pl120", "pl15", "pl20", "pl25", "pl30", "pl35", "pl40", "pl5", "pl50", "pl60", "pl65", "pl70", "pl80", "pl90", "pm10", "pm13", "pm15", "pm1.5", "pm2", "pm20", "pm25", "pm30", "pm35", "pm40", "pm46", "pm5", "pm50", "pm55", "pm8", "pn", "pne", "po", "pr10", "pr100", "pr20", "pr30", "pr40", "pr45", "pr50", "pr60", "pr70", "pr80", "ps", "pw2", "pw2.5", "pw3", "pw3.2", "pw3.5", "pw4", "pw4.2", "pw4.5", "w1", "w10", "w12", "w13", "w16", "w18", "w20", "w21", "w22", "w24", "w28", "w3", "w30", "w31", "w32", "w34", "w35", "w37", "w38", "w41", "w42", "w43", "w44", "w45", "w46", "w47", "w48", "w49", "w5", "w50", "w55", "w56", "w57", "w58", "w59", "w60", "w62", "w63", "w66", "w8", "wo", "i6", "i7", "i8", "i9", "ilx", "p29", "w29", "w33", "w36", "w39", "w4", "w40", "w51", "w52", "w53", "w54", "w6", "w61", "w64", "w65", "w67", "w7", "w9", "pax", "pd", "pe", "phx", "plx", "pmx", "pnl", "prx", "pwx", "w11", "w14", "w15", "w17", "w19", "w2", "w23", "w25", "w26", "w27", "pl0", "pl4", "pl3", "pm2.5", "ph4.4", "pn40", "ph3.3", "ph2.6"]
i = 1
for category in categories:
result['categories'].append({
"id": i,
"name": category
})
i = i + 1
# Images
for img in data['imgs']:
if str(data['imgs'][img]['path']).find(data_type) != -1:
result['images'].append({
"license": 1,
"file_name": data['imgs'][img]['path'][len(data_type) + 1:],
"height": 2048,
"width": 2048,
"id": data['imgs'][img]['id']
})
# Annotations
i = 0
for img in data['imgs']:
if str(data['imgs'][img]['path']).find(data_type) != -1:
for box in data['imgs'][img]['objects']:
result['annotations'].append({
"segmentation": [[]],
"area": (box['bbox']['xmax'] - box['bbox']['xmin']) * (box['bbox']['ymax'] - box['bbox']['ymin']),
"iscrowd": 0,
"image_id": data['imgs'][img]['id'],
"bbox": [
box['bbox']['xmin'],
box['bbox']['ymin'],
box['bbox']['xmax'] - box['bbox']['xmin'],
box['bbox']['ymax'] - box['bbox']['ymin']
],
"category_id": categories.index(box['category']) + 1,
"id": i
})
if ('ellipse_org' in box):
for xy in box['ellipse_org']:
result['annotations'][i]['segmentation'][0].append(xy[0])
result['annotations'][i]['segmentation'][0].append(xy[1])
elif 'polygon' in box:
for xy in box['polygon']:
result['annotations'][i]['segmentation'][0].append(xy[0])
result['annotations'][i]['segmentation'][0].append(xy[1])
i = i + 1
with open(output_name, "w") as f:
json.dump(result, f)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-t', '--data_type', type=str, default='train')
parser.add_argument('-f', '--file_name', type=str, default='data.json')
parser.add_argument('-o', '--output_name', type=str, default='output.json')
args = parser.parse_args()
data_type = args.data_type
file_name = args.file_name
output_name = args.output_name
data = load_json(file_name)
parse(data, data_type, output_name)
@davesie
Copy link

davesie commented Jan 25, 2021

Thanks a lot for this! Saved me the work to write it on my own. Works perfectly.
I just had to adjust the classes, because I'm working with the new 2021 dataset.

@zhaoweizhong
Copy link
Author

Thanks a lot for this! Saved me the work to write it on my own. Works perfectly.
I just had to adjust the classes, because I'm working with the new 2021 dataset.

I'm glad it helped you. :)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment