Last active
December 12, 2019 19:34
-
-
Save harrisonford/925decadd643dea481f3f6f40ee96fbb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import numpy as np | |
import pandas as pd | |
from pycocotools import mask as cocomask | |
# global order used when doing annotations, used when people tag without adding id's (because it's faster) | |
# I use this because when I tagged stuff with other people we didn't manually add the tags, but we tagged always | |
# in the same order so I can auto-tag the ID now. | |
tag_order = ['right_ankle', 'right_knee', 'right_hip', 'left_hip', 'left_knee', 'left_ankle', 'pelvis', 'thorax', | |
'neck', 'chin', 'forehead', 'right_wrist', 'right_elbow', 'right_shoulder', 'left_shoulder', | |
'left_elbow', 'left_wrist', 'right_ear', 'right_eye', 'left_eye', 'left_ear', 'nose', 'box_head', | |
'box_body', 'segm'] | |
# this is the order models tend to show MSCOCO format | |
coco_order = ['nose', 'right_shoulder', 'right_elbow', 'right_wrist', 'left_shoulder', | |
'left_elbow', 'left_wrist', 'right_hip', 'right_knee', 'right_ankle', 'left_hip', | |
'left_knee', 'left_ankle', 'right_eye', 'left_eye', 'right_ear', 'left_ear'] | |
# mscoco parts to be saved (example: not saving forehead, that's for mpii database) | |
coco_parts = dict(nose=0, right_shoulder=1, right_elbow=2, right_wrist=3, left_shoulder=4, left_elbow=5, | |
left_wrist=6, right_hip=7, right_knee=8, right_ankle=9, left_hip=10, left_knee=11, left_ankle=12, | |
right_eye=13, left_eye=14, right_ear=15, left_ear=16) | |
# convert video VIA-VGG annotations to a dataframe (and maybe later to MSCOCO format) | |
# "loaded_via_json" is the result of using json.load(file_path) | |
def via_video2df(loaded_via_json): | |
# saving output and raw annotations input | |
extracted_data = [] | |
annotation_data = loaded_via_json['metadata'] | |
file_dict = loaded_via_json['file'] | |
# iterators for current tags (needed because some tags are in a neighbor frame) | |
current_frame = 0 | |
tag_position = 0 | |
# iterate over data and make their real ids and visible value | |
for key, value in annotation_data.items(): | |
frame = value['z'][0] | |
# TODO: make a more robust way of cycling through annotations | |
if frame >= current_frame + 0.15: # next tag set, reset id count and update frame | |
tag_position = 0 | |
current_frame = frame | |
tag = tag_order[tag_position] | |
xy = value['xy'] | |
# THIS PART IS JUST FOR MY DATA: | |
# av['3'] has visible value, if empty it's a true, option '0' is true, '1' is false | |
visible = ('3' not in value['av'].keys()) or not bool(int(value['av']['3'])) | |
file_id = file_dict[value['vid']]['fname'] | |
extracted_data.append([file_id, current_frame, tag, xy[1:], visible]) | |
tag_position += 1 | |
print("tagging {} as {} for frame {} with xy {}".format(tag, visible, current_frame, xy)) | |
# return as a dataframe | |
return pd.DataFrame(extracted_data, columns=['file', 'timestamp', 'id', 'xy', 'visible']) | |
def via_video_df2coco(video_df): | |
# create header info starting coco file | |
# first: 'categories' data | |
categories = [] | |
category = dict(supercategory='person', id=1, skeleton=[], | |
keypoints=coco_order, name='baby') | |
categories.append(category) | |
# second: 'annotations' data | |
annotations = [] | |
bbox = [] | |
segm = [] | |
filenames = [] # a list we'll use in fourth point below | |
frames = [] # same as above | |
area = 0 # this is the segmentation area or a weighted bbox area | |
keypoints = np.zeros(len(coco_order) * 3) # empty container, to_list() later so it's serializable | |
n_data = video_df.shape[0] | |
for index in range(n_data): | |
xy = video_df['xy'][index] | |
# need to generate keypoints vector, bbox vector and segm vector, for now box_head not being used | |
# do something depending on part_id | |
part_id = video_df['id'][index] | |
if part_id == 'segm': | |
segm = xy | |
area = segm_area(xy) | |
elif part_id == 'box_body': # update bbox | |
# we approximate the silly polygon to a square (very roughly) | |
bbox = box_approx(xy) # x0, y0, w, h | |
elif part_id in coco_order: # check if it's a keypoint part | |
real_index = coco_parts[part_id]*3 | |
video_id = video_df['file'][index] | |
frame_id = video_df['timestamp'][index] | |
if keypoints[real_index] != 0: # if we're about to overwrite a value let's assume we finished a cycle | |
annotation = dict(image_id=video_id, category_id=1, iscrowd=0, num_keypoints=len(keypoints) / 3, | |
id=frame_id, segmentation=segm, area=area, keypoints=keypoints.tolist(), bbox=bbox) | |
annotations.append(annotation) | |
filenames.append(video_id) | |
frames.append(frame_id) | |
# reset data | |
bbox = [] | |
segm = [] | |
keypoints = np.zeros(len(coco_order) * 3) | |
keypoints[real_index] = xy[0] | |
keypoints[real_index + 1] = xy[1] | |
keypoints[real_index + 2] = video_df['visible'][index] + 1 # 1 = not visible, 2 = visible, 0 = no data | |
# TODO: last annotation is not being appended because it doesn't overwrite shit, and I don't want to duplicate code | |
# third: 'info' data | |
info = dict(url='https://github.com/harrisonford', contributor='harrisonford', year=2019, description='alpha', | |
date_created='2019', version=1.0) | |
# fourth: 'images' data | |
images = [] | |
for a_file_id, a_frame_id in zip(filenames, frames): | |
# TODO: We add a lot of dummy numbers to image dict (check wid-hei most importantly) | |
an_image = dict(date_captured='secret_hehexd', id=a_frame_id, coco_url='no-coco', height=0, width=0, license=0, | |
file_name=a_file_id, flickr_url='who_uses_flicker_these_days?') | |
images.append(an_image) | |
# fifth: 'licenses' data | |
licenses = ['private'] | |
# put data in final dictionary | |
data = dict(categories=categories, annotations=annotations, info=info, images=images, licenses=licenses) | |
return data | |
# a very rough way to approach a polygon to a square, format is [x1, y1,...,xn,yn] | |
def box_approx(xy): | |
# choose square point, min-x and max-y | |
x = xy[0::2] | |
y = xy[1::2] | |
x0 = np.min(x) | |
y0 = np.min(y) | |
# middle points are just an averaged x,y | |
x_middle = np.mean(x) | |
y_middle = np.mean(y) | |
# calculate width and height | |
w = 2*(x_middle - x0) | |
h = 2*(y_middle - y0) | |
return [x0, y0, w, h] | |
def segm_area(xy, im_wid=1920, im_hei=1080): | |
# using pycocotools area api | |
many_rle = cocomask.frPyObjects([xy], im_hei, im_wid) | |
rle = cocomask.merge(many_rle) | |
return int(cocomask.area(rle)) | |
def load_annotations(json_path): | |
with open(json_path) as f: | |
return json.load(f) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment