Skip to content

Instantly share code, notes, and snippets.

@harrisonford
Last active December 12, 2019 19:34
Show Gist options
  • Save harrisonford/925decadd643dea481f3f6f40ee96fbb to your computer and use it in GitHub Desktop.
Save harrisonford/925decadd643dea481f3f6f40ee96fbb to your computer and use it in GitHub Desktop.
import json
import numpy as np
import pandas as pd
from pycocotools import mask as cocomask
# global order used when doing annotations, used when people tag without adding id's (because it's faster)
# I use this because when I tagged stuff with other people we didn't manually add the tags, but we tagged always
# in the same order so I can auto-tag the ID now.
tag_order = ['right_ankle', 'right_knee', 'right_hip', 'left_hip', 'left_knee', 'left_ankle', 'pelvis', 'thorax',
'neck', 'chin', 'forehead', 'right_wrist', 'right_elbow', 'right_shoulder', 'left_shoulder',
'left_elbow', 'left_wrist', 'right_ear', 'right_eye', 'left_eye', 'left_ear', 'nose', 'box_head',
'box_body', 'segm']
# this is the order models tend to show MSCOCO format
coco_order = ['nose', 'right_shoulder', 'right_elbow', 'right_wrist', 'left_shoulder',
'left_elbow', 'left_wrist', 'right_hip', 'right_knee', 'right_ankle', 'left_hip',
'left_knee', 'left_ankle', 'right_eye', 'left_eye', 'right_ear', 'left_ear']
# mscoco parts to be saved (example: not saving forehead, that's for mpii database)
coco_parts = dict(nose=0, right_shoulder=1, right_elbow=2, right_wrist=3, left_shoulder=4, left_elbow=5,
left_wrist=6, right_hip=7, right_knee=8, right_ankle=9, left_hip=10, left_knee=11, left_ankle=12,
right_eye=13, left_eye=14, right_ear=15, left_ear=16)
# convert video VIA-VGG annotations to a dataframe (and maybe later to MSCOCO format)
# "loaded_via_json" is the result of using json.load(file_path)
def via_video2df(loaded_via_json):
# saving output and raw annotations input
extracted_data = []
annotation_data = loaded_via_json['metadata']
file_dict = loaded_via_json['file']
# iterators for current tags (needed because some tags are in a neighbor frame)
current_frame = 0
tag_position = 0
# iterate over data and make their real ids and visible value
for key, value in annotation_data.items():
frame = value['z'][0]
# TODO: make a more robust way of cycling through annotations
if frame >= current_frame + 0.15: # next tag set, reset id count and update frame
tag_position = 0
current_frame = frame
tag = tag_order[tag_position]
xy = value['xy']
# THIS PART IS JUST FOR MY DATA:
# av['3'] has visible value, if empty it's a true, option '0' is true, '1' is false
visible = ('3' not in value['av'].keys()) or not bool(int(value['av']['3']))
file_id = file_dict[value['vid']]['fname']
extracted_data.append([file_id, current_frame, tag, xy[1:], visible])
tag_position += 1
print("tagging {} as {} for frame {} with xy {}".format(tag, visible, current_frame, xy))
# return as a dataframe
return pd.DataFrame(extracted_data, columns=['file', 'timestamp', 'id', 'xy', 'visible'])
def via_video_df2coco(video_df):
# create header info starting coco file
# first: 'categories' data
categories = []
category = dict(supercategory='person', id=1, skeleton=[],
keypoints=coco_order, name='baby')
categories.append(category)
# second: 'annotations' data
annotations = []
bbox = []
segm = []
filenames = [] # a list we'll use in fourth point below
frames = [] # same as above
area = 0 # this is the segmentation area or a weighted bbox area
keypoints = np.zeros(len(coco_order) * 3) # empty container, to_list() later so it's serializable
n_data = video_df.shape[0]
for index in range(n_data):
xy = video_df['xy'][index]
# need to generate keypoints vector, bbox vector and segm vector, for now box_head not being used
# do something depending on part_id
part_id = video_df['id'][index]
if part_id == 'segm':
segm = xy
area = segm_area(xy)
elif part_id == 'box_body': # update bbox
# we approximate the silly polygon to a square (very roughly)
bbox = box_approx(xy) # x0, y0, w, h
elif part_id in coco_order: # check if it's a keypoint part
real_index = coco_parts[part_id]*3
video_id = video_df['file'][index]
frame_id = video_df['timestamp'][index]
if keypoints[real_index] != 0: # if we're about to overwrite a value let's assume we finished a cycle
annotation = dict(image_id=video_id, category_id=1, iscrowd=0, num_keypoints=len(keypoints) / 3,
id=frame_id, segmentation=segm, area=area, keypoints=keypoints.tolist(), bbox=bbox)
annotations.append(annotation)
filenames.append(video_id)
frames.append(frame_id)
# reset data
bbox = []
segm = []
keypoints = np.zeros(len(coco_order) * 3)
keypoints[real_index] = xy[0]
keypoints[real_index + 1] = xy[1]
keypoints[real_index + 2] = video_df['visible'][index] + 1 # 1 = not visible, 2 = visible, 0 = no data
# TODO: last annotation is not being appended because it doesn't overwrite shit, and I don't want to duplicate code
# third: 'info' data
info = dict(url='https://github.com/harrisonford', contributor='harrisonford', year=2019, description='alpha',
date_created='2019', version=1.0)
# fourth: 'images' data
images = []
for a_file_id, a_frame_id in zip(filenames, frames):
# TODO: We add a lot of dummy numbers to image dict (check wid-hei most importantly)
an_image = dict(date_captured='secret_hehexd', id=a_frame_id, coco_url='no-coco', height=0, width=0, license=0,
file_name=a_file_id, flickr_url='who_uses_flicker_these_days?')
images.append(an_image)
# fifth: 'licenses' data
licenses = ['private']
# put data in final dictionary
data = dict(categories=categories, annotations=annotations, info=info, images=images, licenses=licenses)
return data
# a very rough way to approach a polygon to a square, format is [x1, y1,...,xn,yn]
def box_approx(xy):
# choose square point, min-x and max-y
x = xy[0::2]
y = xy[1::2]
x0 = np.min(x)
y0 = np.min(y)
# middle points are just an averaged x,y
x_middle = np.mean(x)
y_middle = np.mean(y)
# calculate width and height
w = 2*(x_middle - x0)
h = 2*(y_middle - y0)
return [x0, y0, w, h]
def segm_area(xy, im_wid=1920, im_hei=1080):
# using pycocotools area api
many_rle = cocomask.frPyObjects([xy], im_hei, im_wid)
rle = cocomask.merge(many_rle)
return int(cocomask.area(rle))
def load_annotations(json_path):
with open(json_path) as f:
return json.load(f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment