davesie/tt100k2coco.py

## tt100k2coco.py
'''
Transform Tsinghua-Tencent 100K Dataset Annotations to COCO Format
Source:
https://gist.github.com/zhaoweizhong/7ca1f4d4fdcb0aa198732a0e7cc9b908#file-tt100k2coco-py
'''

import json
import argparse


def load_json(file_name):
    file = open(file_name, 'r').read()
    return json.loads(file)


def parse(data, data_type, output_name):
    # File Format
    result = {
        "info": {
            "description": "TT100K Dataset COCO Format",
            "url": "https://github.com/zhaoweizhong",
            "version": "1.0",
            "year": 2020,
            "contributor": "Zhaowei Zhong",
            "date_created": "2020/01/04"
        },
        "licenses": [
            {
                "url": " ",
                "id": 1,
                "name": " "
            }
        ],
        "images": [],
        "annotations": [],
        "categories": []
    }

    # Categories
    categories = ["pl80", "w9", "p6", "ph4.2", "i8", "w14", "w33", "pa13", "im", "pl90", "w58", "il70", "p5", "pm55",
                  "pl60", "ip", "p11", "pdd", "wc", "i2r", "w30", "pmr", "p23", "pl15", "pm10", "pss", "w1", "p4",
                  "w38", "w50", "w34", "pw3.5", "iz", "w39", "w11", "p1n", "pr70", "pd", "pnl", "pg", "ph5.3", "w66",
                  "il80", "pb", "pbm", "pm5", "w24", "w67", "w49", "pm40", "ph4", "w45", "i4", "w37", "ph2.6", "pl70",
                  "ph5.5", "i14", "i11", "p7", "p29", "pne", "pr60", "pm13", "ph4.5", "p12", "p3", "w40", "pl5", "w13",
                  "pr10", "p14", "i4l", "pr30", "pw4.2", "w16", "p17", "ph3", "i9", "w15", "w35", "pa8", "pt", "pr45",
                  "w17", "pl30", "pcs", "pctl", "pr50", "ph4.4", "pm46", "pm35", "i15", "pa12", "pclr", "i1", "pcd",
                  "pbp", "pcr", "w28", "ps", "pm8", "w18", "w2", "w52", "ph2.9", "ph1.8", "pe", "p20", "w36", "p10",
                  "pn", "pa14", "w54", "ph3.2", "p2", "ph2.5", "w62", "w55", "pw3", "pw4.5", "i12", "ph4.3", "phclr",
                  "i10", "pr5", "i13", "w10", "p26", "w26", "p8", "w5", "w42", "il50", "p13", "pr40", "p25", "w41",
                  "pl20", "ph4.8", "pnlc", "ph3.3", "w29", "ph2.1", "w53", "pm30", "p24", "p21", "pl40", "w27", "pmb",
                  "pc", "i6", "pr20", "p18", "ph3.8", "pm50", "pm25", "i2", "w22", "w47", "w56", "pl120", "ph2.8", "i7",
                  "w12", "pm1.5", "pm2.5", "w32", "pm15", "ph5", "w19", "pw3.2", "pw2.5", "pl10", "il60", "w57", "w48",
                  "w60", "pl100", "pr80", "p16", "pl110", "w59", "w64", "w20", "ph2", "p9", "il100", "w31", "w65",
                  "ph2.4", "pr100", "p19", "ph3.5", "pa10", "pcl", "pl35", "p15", "w7", "pa6", "phcs", "w43", "p28",
                  "w6", "w3", "w25", "pl25", "il110", "p1", "w46", "pn-2", "w51", "w44", "w63", "w23", "pm20", "w8",
                  "pmblr", "w4", "i5", "il90", "w21", "p27", "pl50", "pl65", "w61", "ph2.2", "pm2", "i3", "pa18", "pw4"]

    i = 1
    for category in categories:
        result['categories'].append({
            "id": i,
            "name": category
        })
        i = i + 1

    # Images
    for img in data['imgs']:
        if data_type == 'all':
            result['images'].append({
                "license": 1,
                "file_name": data['imgs'][img]['path'][len(data_type) + 1:],
                "height": 2048,
                "width": 2048,
                "id": data['imgs'][img]['id']
            })

        elif str(data['imgs'][img]['path']).find(data_type) != -1:
            result['images'].append({
                "license": 1,
                "file_name": data['imgs'][img]['path'][len(data_type) + 1:],
                "height": 2048,
                "width": 2048,
                "id": data['imgs'][img]['id']
            })

    # Annotations
    i = 0
    for img in data['imgs']:
        if data_type == 'all':
            for box in data['imgs'][img]['objects']:
                result['annotations'].append({
                    "segmentation": [],
                    "area": (box['bbox']['xmax'] - box['bbox']['xmin']) * (box['bbox']['ymax'] - box['bbox']['ymin']),
                    "iscrowd": 0,
                    "image_id": data['imgs'][img]['id'],
                    "bbox": [
                        box['bbox']['xmin'],
                        box['bbox']['ymin'],
                        box['bbox']['xmax'] - box['bbox']['xmin'],
                        box['bbox']['ymax'] - box['bbox']['ymin']
                    ],
                    "category_id": categories.index(box['category']) + 1,
                    "id": i
                })
                if ('ellipse_org' in box):
                    for xy in box['ellipse_org']:
                        result['annotations'][i]['segmentation'][0].append(xy[0])
                        result['annotations'][i]['segmentation'][0].append(xy[1])
                elif 'polygon' in box:
                    for xy in box['polygon']:
                        result['annotations'][i]['segmentation'][0].append(xy[0])
                        result['annotations'][i]['segmentation'][0].append(xy[1])
                i = i + 1

        elif str(data['imgs'][img]['path']).find(data_type) != -1:
            for box in data['imgs'][img]['objects']:
                result['annotations'].append({
                    "segmentation": [],
                    "area": (box['bbox']['xmax'] - box['bbox']['xmin']) * (box['bbox']['ymax'] - box['bbox']['ymin']),
                    "iscrowd": 0,
                    "image_id": data['imgs'][img]['id'],
                    "bbox": [
                        box['bbox']['xmin'],
                        box['bbox']['ymin'],
                        box['bbox']['xmax'] - box['bbox']['xmin'],
                        box['bbox']['ymax'] - box['bbox']['ymin']
                    ],
                    "category_id": categories.index(box['category']) + 1,
                    "id": i
                })
                if ('ellipse_org' in box):
                    for xy in box['ellipse_org']:
                        result['annotations'][i]['segmentation'][0].append(xy[0])
                        result['annotations'][i]['segmentation'][0].append(xy[1])
                elif 'polygon' in box:
                    for xy in box['polygon']:
                        result['annotations'][i]['segmentation'][0].append(xy[0])
                        result['annotations'][i]['segmentation'][0].append(xy[1])
                i = i + 1

    with open(output_name, "w") as f:
        json.dump(result, f)


if __name__ == '__main__':
    print('--- START ---')
    parser = argparse.ArgumentParser()
    parser.add_argument('-t', '--data_type', type=str, default='train')
    parser.add_argument('-f', '--file_name', type=str, default='data.json')
    parser.add_argument('-o', '--output_name', type=str, default='output.json')

    args = parser.parse_args()
    data_type = args.data_type
    file_name = args.file_name
    output_name = args.output_name

    data = load_json(file_name)
    parse(data, data_type, output_name)
    print('--- DONE ---')
	'''
	Transform Tsinghua-Tencent 100K Dataset Annotations to COCO Format
	Source:
	https://gist.github.com/zhaoweizhong/7ca1f4d4fdcb0aa198732a0e7cc9b908#file-tt100k2coco-py
	'''

	import json
	import argparse


	def load_json(file_name):
	file = open(file_name, 'r').read()
	return json.loads(file)


	def parse(data, data_type, output_name):
	# File Format
	result = {
	"info": {
	"description": "TT100K Dataset COCO Format",
	"url": "https://github.com/zhaoweizhong",
	"version": "1.0",
	"year": 2020,
	"contributor": "Zhaowei Zhong",
	"date_created": "2020/01/04"
	},
	"licenses": [
	{
	"url": " ",
	"id": 1,
	"name": " "
	}
	],
	"images": [],
	"annotations": [],
	"categories": []
	}

	# Categories
	categories = ["pl80", "w9", "p6", "ph4.2", "i8", "w14", "w33", "pa13", "im", "pl90", "w58", "il70", "p5", "pm55",
	"pl60", "ip", "p11", "pdd", "wc", "i2r", "w30", "pmr", "p23", "pl15", "pm10", "pss", "w1", "p4",
	"w38", "w50", "w34", "pw3.5", "iz", "w39", "w11", "p1n", "pr70", "pd", "pnl", "pg", "ph5.3", "w66",
	"il80", "pb", "pbm", "pm5", "w24", "w67", "w49", "pm40", "ph4", "w45", "i4", "w37", "ph2.6", "pl70",
	"ph5.5", "i14", "i11", "p7", "p29", "pne", "pr60", "pm13", "ph4.5", "p12", "p3", "w40", "pl5", "w13",
	"pr10", "p14", "i4l", "pr30", "pw4.2", "w16", "p17", "ph3", "i9", "w15", "w35", "pa8", "pt", "pr45",
	"w17", "pl30", "pcs", "pctl", "pr50", "ph4.4", "pm46", "pm35", "i15", "pa12", "pclr", "i1", "pcd",
	"pbp", "pcr", "w28", "ps", "pm8", "w18", "w2", "w52", "ph2.9", "ph1.8", "pe", "p20", "w36", "p10",
	"pn", "pa14", "w54", "ph3.2", "p2", "ph2.5", "w62", "w55", "pw3", "pw4.5", "i12", "ph4.3", "phclr",
	"i10", "pr5", "i13", "w10", "p26", "w26", "p8", "w5", "w42", "il50", "p13", "pr40", "p25", "w41",
	"pl20", "ph4.8", "pnlc", "ph3.3", "w29", "ph2.1", "w53", "pm30", "p24", "p21", "pl40", "w27", "pmb",
	"pc", "i6", "pr20", "p18", "ph3.8", "pm50", "pm25", "i2", "w22", "w47", "w56", "pl120", "ph2.8", "i7",
	"w12", "pm1.5", "pm2.5", "w32", "pm15", "ph5", "w19", "pw3.2", "pw2.5", "pl10", "il60", "w57", "w48",
	"w60", "pl100", "pr80", "p16", "pl110", "w59", "w64", "w20", "ph2", "p9", "il100", "w31", "w65",
	"ph2.4", "pr100", "p19", "ph3.5", "pa10", "pcl", "pl35", "p15", "w7", "pa6", "phcs", "w43", "p28",
	"w6", "w3", "w25", "pl25", "il110", "p1", "w46", "pn-2", "w51", "w44", "w63", "w23", "pm20", "w8",
	"pmblr", "w4", "i5", "il90", "w21", "p27", "pl50", "pl65", "w61", "ph2.2", "pm2", "i3", "pa18", "pw4"]

	i = 1
	for category in categories:
	result['categories'].append({
	"id": i,
	"name": category
	})
	i = i + 1

	# Images
	for img in data['imgs']:
	if data_type == 'all':
	result['images'].append({
	"license": 1,
	"file_name": data['imgs'][img]['path'][len(data_type) + 1:],
	"height": 2048,
	"width": 2048,
	"id": data['imgs'][img]['id']
	})

	elif str(data['imgs'][img]['path']).find(data_type) != -1:
	result['images'].append({
	"license": 1,
	"file_name": data['imgs'][img]['path'][len(data_type) + 1:],
	"height": 2048,
	"width": 2048,
	"id": data['imgs'][img]['id']
	})

	# Annotations
	i = 0
	for img in data['imgs']:
	if data_type == 'all':
	for box in data['imgs'][img]['objects']:
	result['annotations'].append({
	"segmentation": [],
	"area": (box['bbox']['xmax'] - box['bbox']['xmin']) * (box['bbox']['ymax'] - box['bbox']['ymin']),
	"iscrowd": 0,
	"image_id": data['imgs'][img]['id'],
	"bbox": [
	box['bbox']['xmin'],
	box['bbox']['ymin'],
	box['bbox']['xmax'] - box['bbox']['xmin'],
	box['bbox']['ymax'] - box['bbox']['ymin']
	],
	"category_id": categories.index(box['category']) + 1,
	"id": i
	})
	if ('ellipse_org' in box):
	for xy in box['ellipse_org']:
	result['annotations'][i]['segmentation'][0].append(xy[0])
	result['annotations'][i]['segmentation'][0].append(xy[1])
	elif 'polygon' in box:
	for xy in box['polygon']:
	result['annotations'][i]['segmentation'][0].append(xy[0])
	result['annotations'][i]['segmentation'][0].append(xy[1])
	i = i + 1

	elif str(data['imgs'][img]['path']).find(data_type) != -1:
	for box in data['imgs'][img]['objects']:
	result['annotations'].append({
	"segmentation": [],
	"area": (box['bbox']['xmax'] - box['bbox']['xmin']) * (box['bbox']['ymax'] - box['bbox']['ymin']),
	"iscrowd": 0,
	"image_id": data['imgs'][img]['id'],
	"bbox": [
	box['bbox']['xmin'],
	box['bbox']['ymin'],
	box['bbox']['xmax'] - box['bbox']['xmin'],
	box['bbox']['ymax'] - box['bbox']['ymin']
	],
	"category_id": categories.index(box['category']) + 1,
	"id": i
	})
	if ('ellipse_org' in box):
	for xy in box['ellipse_org']:
	result['annotations'][i]['segmentation'][0].append(xy[0])
	result['annotations'][i]['segmentation'][0].append(xy[1])
	elif 'polygon' in box:
	for xy in box['polygon']:
	result['annotations'][i]['segmentation'][0].append(xy[0])
	result['annotations'][i]['segmentation'][0].append(xy[1])
	i = i + 1

	with open(output_name, "w") as f:
	json.dump(result, f)


	if __name__ == '__main__':
	print('--- START ---')
	parser = argparse.ArgumentParser()
	parser.add_argument('-t', '--data_type', type=str, default='train')
	parser.add_argument('-f', '--file_name', type=str, default='data.json')
	parser.add_argument('-o', '--output_name', type=str, default='output.json')

	args = parser.parse_args()
	data_type = args.data_type
	file_name = args.file_name
	output_name = args.output_name

	data = load_json(file_name)
	parse(data, data_type, output_name)
	print('--- DONE ---')