zengyu714/convert_to_odgt.py

## convert_to_odgt.py
import json

from pathlib import Path
from tqdm import tqdm
from itertools import groupby

train_data_root = Path('/data1/kimmyzeng/dataset/Detect_COCO/train/JPEGIMAGES')
train_json_file = Path('../data/Detect_COCO/train_instances.json')
trainall_odgt = Path('../data/Detect_COCO/odformat/vehicle_trainall.odgt')

trainout_odgt = Path('../data/Detect_COCO/odformat/vehicle_train.odgt')
trainval_odgt = Path('../data/Detect_COCO/odformat/vehicle_val.odgt')

if not trainall_odgt.parent.exists():
    trainall_odgt.parent.mkdir()


def to_index(image_id):
    """
    Args:
        image_id: <int>, say, 2018042400001
    Returns:
        List index, i.e., 2018042400001 - 2018042400001 = 0
    """
    return image_id - 2018042400001


def convert_to_odgt():
    """
    json_file: categories
        [{'id': 1, 'name': 'car', 'supercategory': 'none'},
         {'id': 2, 'name': 'pedestrian', 'supercategory': 'none'},
         {'id': 3, 'name': 'others', 'supercategory': 'none'},
         {'id': 4, 'name': 'bus', 'supercategory': 'none'},
         {'id': 5, 'name': 'van', 'supercategory': 'none'}]
    :return:
    """
    with train_json_file.open() as fi:
        json_file = json.load(fi)

    images = json_file['images']
    tags = json_file['categories']
    annos = json_file['annotations']  # list of annotations
    annos.sort(key=lambda x: x['image_id'])
    # group annotations by image_id

    img_not_found_count = 0

    with trainall_odgt.open(mode='w+') as fo:
        od_line = {}
        for image_id, objects in tqdm(groupby(annos, key=lambda x: x['image_id'])):
            image_info = images[to_index(image_id)]

            gtboxes = []
            for obj in objects:
                gtbox = {
                    'box'  : obj['bbox'],
                    'occ'  : 0,
                    'tag'  : tags[obj['category_id'] - 1]['name'],
                    'extra': {'ignore': 0}
                }
                gtboxes.append(gtbox)
            od_line['gtboxes'] = gtboxes

            img_path = train_data_root / image_info['file_name']
            if not img_path.exists():
                img_not_found_count += 1
                continue

            od_line['fpath'] = str(img_path)

            od_line['dbName'] = "COCO",
            od_line['dbInfo'] = {"vID": "vehicle_train", "frameID": -1}
            od_line['width'] = image_info['width']
            od_line['height'] = image_info['height']
            od_line['ID'] = image_info['file_name']
            fo.write(f'{json.dumps(od_line)}\n')
    print(f'Can\'t find {img_not_found_count} images')


def split_train_val(train_ratio=0.9):
    with trainall_odgt.open() as fi:
        all_odgt = fi.readlines()
    all_odgt = [l.strip() for l in all_odgt]

    total = len(all_odgt)
    train_num = int(total * train_ratio)
    print(f'Total nums of data: {total}\tTrain: {train_num}\tVal: {total - train_num}')

    with trainout_odgt.open(mode='w+') as train_out:
        train_out.write('\n'.join(all_odgt[:train_num]))
    with trainval_odgt.open(mode='w+') as val_out:
        val_out.write('\n'.join(all_odgt[train_num:]))


if __name__ == '__main__':
    convert_to_odgt()
    split_train_val()

"""
# odgt example
{"gtboxes": [{"box": [250, 168, 70, 65], "occ": 0, "tag": "person", "extra": {"ignore": 0}},
               {"box": [435, 294, 13, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
               {"box": [447, 293, 12, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
               {"box": [460, 291, 13, 9], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
               {"box": [407, 287, 12, 10], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
               {"box": [618, 289, 11, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
               {"box": [512, 294, 21, 5], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
               {"box": [285, 370, 12, 19], "occ": 0, "tag": "person", "extra": {"ignore": 0}},
               {"box": [61, 43, 46, 79], "occ": 0, "tag": "kite", "extra": {"ignore": 0}},
               {"box": [238, 158, 61, 55], "occ": 0, "tag": "surfboard", "extra": {"ignore": 0}}],
    "fpath"  : "/val2014/COCO_val2014_000000532481.jpg",
    "dbName" : "COCO",
    "dbInfo" : {"vID": "COCO_trainval2014_womini", "frameID": -1},
    "width"  : 640,
    "height" : 426,
    "ID"     : "COCO_val2014_000000532481.jpg"}
"""
	import json

	from pathlib import Path
	from tqdm import tqdm
	from itertools import groupby

	train_data_root = Path('/data1/kimmyzeng/dataset/Detect_COCO/train/JPEGIMAGES')
	train_json_file = Path('../data/Detect_COCO/train_instances.json')
	trainall_odgt = Path('../data/Detect_COCO/odformat/vehicle_trainall.odgt')

	trainout_odgt = Path('../data/Detect_COCO/odformat/vehicle_train.odgt')
	trainval_odgt = Path('../data/Detect_COCO/odformat/vehicle_val.odgt')

	if not trainall_odgt.parent.exists():
	trainall_odgt.parent.mkdir()


	def to_index(image_id):
	"""
	Args:
	image_id: <int>, say, 2018042400001
	Returns:
	List index, i.e., 2018042400001 - 2018042400001 = 0
	"""
	return image_id - 2018042400001


	def convert_to_odgt():
	"""
	json_file: categories
	[{'id': 1, 'name': 'car', 'supercategory': 'none'},
	{'id': 2, 'name': 'pedestrian', 'supercategory': 'none'},
	{'id': 3, 'name': 'others', 'supercategory': 'none'},
	{'id': 4, 'name': 'bus', 'supercategory': 'none'},
	{'id': 5, 'name': 'van', 'supercategory': 'none'}]
	:return:
	"""
	with train_json_file.open() as fi:
	json_file = json.load(fi)

	images = json_file['images']
	tags = json_file['categories']
	annos = json_file['annotations'] # list of annotations
	annos.sort(key=lambda x: x['image_id'])
	# group annotations by image_id

	img_not_found_count = 0

	with trainall_odgt.open(mode='w+') as fo:
	od_line = {}
	for image_id, objects in tqdm(groupby(annos, key=lambda x: x['image_id'])):
	image_info = images[to_index(image_id)]

	gtboxes = []
	for obj in objects:
	gtbox = {
	'box' : obj['bbox'],
	'occ' : 0,
	'tag' : tags[obj['category_id'] - 1]['name'],
	'extra': {'ignore': 0}
	}
	gtboxes.append(gtbox)
	od_line['gtboxes'] = gtboxes

	img_path = train_data_root / image_info['file_name']
	if not img_path.exists():
	img_not_found_count += 1
	continue

	od_line['fpath'] = str(img_path)

	od_line['dbName'] = "COCO",
	od_line['dbInfo'] = {"vID": "vehicle_train", "frameID": -1}
	od_line['width'] = image_info['width']
	od_line['height'] = image_info['height']
	od_line['ID'] = image_info['file_name']
	fo.write(f'{json.dumps(od_line)}\n')
	print(f'Can\'t find {img_not_found_count} images')


	def split_train_val(train_ratio=0.9):
	with trainall_odgt.open() as fi:
	all_odgt = fi.readlines()
	all_odgt = [l.strip() for l in all_odgt]

	total = len(all_odgt)
	train_num = int(total * train_ratio)
	print(f'Total nums of data: {total}\tTrain: {train_num}\tVal: {total - train_num}')

	with trainout_odgt.open(mode='w+') as train_out:
	train_out.write('\n'.join(all_odgt[:train_num]))
	with trainval_odgt.open(mode='w+') as val_out:
	val_out.write('\n'.join(all_odgt[train_num:]))


	if __name__ == '__main__':
	convert_to_odgt()
	split_train_val()

	"""
	# odgt example
	{"gtboxes": [{"box": [250, 168, 70, 65], "occ": 0, "tag": "person", "extra": {"ignore": 0}},
	{"box": [435, 294, 13, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
	{"box": [447, 293, 12, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
	{"box": [460, 291, 13, 9], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
	{"box": [407, 287, 12, 10], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
	{"box": [618, 289, 11, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
	{"box": [512, 294, 21, 5], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
	{"box": [285, 370, 12, 19], "occ": 0, "tag": "person", "extra": {"ignore": 0}},
	{"box": [61, 43, 46, 79], "occ": 0, "tag": "kite", "extra": {"ignore": 0}},
	{"box": [238, 158, 61, 55], "occ": 0, "tag": "surfboard", "extra": {"ignore": 0}}],
	"fpath" : "/val2014/COCO_val2014_000000532481.jpg",
	"dbName" : "COCO",
	"dbInfo" : {"vID": "COCO_trainval2014_womini", "frameID": -1},
	"width" : 640,
	"height" : 426,
	"ID" : "COCO_val2014_000000532481.jpg"}
	"""