Skip to content

Instantly share code, notes, and snippets.

@zhaoweizhong
Created May 4, 2021 18:26
Show Gist options
  • Save zhaoweizhong/053ce08beb9047b710b3616f75130c31 to your computer and use it in GitHub Desktop.
Save zhaoweizhong/053ce08beb9047b710b3616f75130c31 to your computer and use it in GitHub Desktop.
Transform GTSDB Dataset Annotations to COCO Format
import json
import argparse
import copy
from rich.progress import track
def load_txt(file_name):
file = open(file_name, 'r')
data = []
for line in file.readlines():
data.append(line.replace('\n', ''))
return data
def parse(data):
# File Format
result_train = {
"info": {
"description": "GTSDB Dataset COCO Format",
"url": "https://github.com/zhaoweizhong",
"version": "1.0",
"year": 2021,
"contributor": "Zhaowei Zhong",
"date_created": "2021/05/05"
},
"licenses": [
{
"url": "https://creativecommons.org/licenses/by-nc-sa/4.0/",
"id": 1,
"name": "Attribution-NonCommercial-ShareAlike 4.0 International License (CC BY-NC-SA 4.0)"
}
],
"images": [],
"annotations": [],
"categories": []
}
for i in range(0, 43):
result_train['categories'].append({
"id": i,
"name": str(i)
})
result_test = copy.deepcopy(result_train)
# Images and Annotations
count = 900
count_train = int(count * 0.7)
anno_id = 0
for annotation in track(data):
img_id = int(annotation.split(';')[0][:5])
img_name = annotation.split(';')[0][:5] + '.jpg'
xmin = int(annotation.split(';')[1])
ymin = int(annotation.split(';')[2])
xmax = int(annotation.split(';')[3])
ymax = int(annotation.split(';')[4])
class_id = int(annotation.split(';')[5])
if img_id < count_train:
if not bool([True for img in result_train['images'] if img['id'] == img_id]):
result_train['images'].append({
"license": 1,
"file_name": img_name,
"height": 800,
"width": 1360,
"id": img_id
})
result_train['annotations'].append({
"segmentation": [[]],
"area": (xmax - xmin) * (ymax - ymin),
"iscrowd": 0,
"image_id": img_id,
"bbox": [
xmin,
ymin,
xmax - xmin,
ymax - ymin
],
"category_id": class_id,
"id": anno_id
})
else:
if not bool([True for img in result_test['images'] if img['id'] == img_id]):
result_test['images'].append({
"license": 1,
"file_name": img_name,
"height": 800,
"width": 1360,
"id": img_id
})
result_test['annotations'].append({
"segmentation": [[]],
"area": (xmax - xmin) * (ymax - ymin),
"iscrowd": 0,
"image_id": img_id,
"bbox": [
xmin,
ymin,
xmax - xmin,
ymax - ymin
],
"category_id": class_id,
"id": anno_id
})
anno_id = anno_id + 1
print('Train Images: ' + str(len(result_train['images'])))
print('Test Images: ' + str(len(result_test['images'])))
print('Train Annotations: ' + str(len(result_train['annotations'])))
print('Test Annotations: ' + str(len(result_test['annotations'])))
with open('train.json', "w") as f:
json.dump(result_train, f)
with open('test.json', "w") as f:
json.dump(result_test, f)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--file_name', type=str, default='gt.txt')
args = parser.parse_args()
file_name = args.file_name
data = load_txt(file_name)
parse(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment