Skip to content

Instantly share code, notes, and snippets.

@zengyu714
Created April 25, 2018 08:05
Show Gist options
  • Save zengyu714/f2d23161e4e530dbc4c39d0c398c890e to your computer and use it in GitHub Desktop.
Save zengyu714/f2d23161e4e530dbc4c39d0c398c890e to your computer and use it in GitHub Desktop.
light-head-rcnn
import json
from pathlib import Path
from tqdm import tqdm
from itertools import groupby
train_data_root = Path('/data1/kimmyzeng/dataset/Detect_COCO/train/JPEGIMAGES')
train_json_file = Path('../data/Detect_COCO/train_instances.json')
trainall_odgt = Path('../data/Detect_COCO/odformat/vehicle_trainall.odgt')
trainout_odgt = Path('../data/Detect_COCO/odformat/vehicle_train.odgt')
trainval_odgt = Path('../data/Detect_COCO/odformat/vehicle_val.odgt')
if not trainall_odgt.parent.exists():
trainall_odgt.parent.mkdir()
def to_index(image_id):
"""
Args:
image_id: <int>, say, 2018042400001
Returns:
List index, i.e., 2018042400001 - 2018042400001 = 0
"""
return image_id - 2018042400001
def convert_to_odgt():
"""
json_file: categories
[{'id': 1, 'name': 'car', 'supercategory': 'none'},
{'id': 2, 'name': 'pedestrian', 'supercategory': 'none'},
{'id': 3, 'name': 'others', 'supercategory': 'none'},
{'id': 4, 'name': 'bus', 'supercategory': 'none'},
{'id': 5, 'name': 'van', 'supercategory': 'none'}]
:return:
"""
with train_json_file.open() as fi:
json_file = json.load(fi)
images = json_file['images']
tags = json_file['categories']
annos = json_file['annotations'] # list of annotations
annos.sort(key=lambda x: x['image_id'])
# group annotations by image_id
img_not_found_count = 0
with trainall_odgt.open(mode='w+') as fo:
od_line = {}
for image_id, objects in tqdm(groupby(annos, key=lambda x: x['image_id'])):
image_info = images[to_index(image_id)]
gtboxes = []
for obj in objects:
gtbox = {
'box' : obj['bbox'],
'occ' : 0,
'tag' : tags[obj['category_id'] - 1]['name'],
'extra': {'ignore': 0}
}
gtboxes.append(gtbox)
od_line['gtboxes'] = gtboxes
img_path = train_data_root / image_info['file_name']
if not img_path.exists():
img_not_found_count += 1
continue
od_line['fpath'] = str(img_path)
od_line['dbName'] = "COCO",
od_line['dbInfo'] = {"vID": "vehicle_train", "frameID": -1}
od_line['width'] = image_info['width']
od_line['height'] = image_info['height']
od_line['ID'] = image_info['file_name']
fo.write(f'{json.dumps(od_line)}\n')
print(f'Can\'t find {img_not_found_count} images')
def split_train_val(train_ratio=0.9):
with trainall_odgt.open() as fi:
all_odgt = fi.readlines()
all_odgt = [l.strip() for l in all_odgt]
total = len(all_odgt)
train_num = int(total * train_ratio)
print(f'Total nums of data: {total}\tTrain: {train_num}\tVal: {total - train_num}')
with trainout_odgt.open(mode='w+') as train_out:
train_out.write('\n'.join(all_odgt[:train_num]))
with trainval_odgt.open(mode='w+') as val_out:
val_out.write('\n'.join(all_odgt[train_num:]))
if __name__ == '__main__':
convert_to_odgt()
split_train_val()
"""
# odgt example
{"gtboxes": [{"box": [250, 168, 70, 65], "occ": 0, "tag": "person", "extra": {"ignore": 0}},
{"box": [435, 294, 13, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
{"box": [447, 293, 12, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
{"box": [460, 291, 13, 9], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
{"box": [407, 287, 12, 10], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
{"box": [618, 289, 11, 8], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
{"box": [512, 294, 21, 5], "occ": 0, "tag": "car", "extra": {"ignore": 0}},
{"box": [285, 370, 12, 19], "occ": 0, "tag": "person", "extra": {"ignore": 0}},
{"box": [61, 43, 46, 79], "occ": 0, "tag": "kite", "extra": {"ignore": 0}},
{"box": [238, 158, 61, 55], "occ": 0, "tag": "surfboard", "extra": {"ignore": 0}}],
"fpath" : "/val2014/COCO_val2014_000000532481.jpg",
"dbName" : "COCO",
"dbInfo" : {"vID": "COCO_trainval2014_womini", "frameID": -1},
"width" : 640,
"height" : 426,
"ID" : "COCO_val2014_000000532481.jpg"}
"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment