Skip to content

Instantly share code, notes, and snippets.

@iamarchisha
Last active July 10, 2021 03:13
Show Gist options
  • Save iamarchisha/f9fe4934cb39951e515be00ebde6c66d to your computer and use it in GitHub Desktop.
Save iamarchisha/f9fe4934cb39951e515be00ebde6c66d to your computer and use it in GitHub Desktop.
Ground Truth Manifest to PASCAL format. The manifest must be converted to a numpy "npy" file and the categories must also be stored as "npy" file. Use an interactive python shell to execute.
import re
import os
import json
import glob
import shutil
import logging
import boto3
import pickle
import collections
import numpy as np
import pandas as pd
from itertools import islice
##############################################
manifest_list = [
'food_manifest_dict_1.npy',
'zee-kitchen-food-2.npy',
'zee-outfits-accessories-1.npy',
'zee-outfits-accessories-2.npy'
]
def manifest_to_list(manifest):
result = collections.defaultdict(list)
for d in manifest:
result[d['source-ref']].append(d)
return list(result.values())
result_list_1 = manifest_to_list(np.load(manifest_list[0],allow_pickle=True))
result_list_2 = manifest_to_list(np.load(manifest_list[1],allow_pickle=True))
result_list_3 = manifest_to_list(np.load(manifest_list[2],allow_pickle=True))
result_list_4 = manifest_to_list(np.load(manifest_list[3],allow_pickle=True))
category_map = np.load("category_map.npy",allow_pickle=True)
# categore_map.npy will look something like this
# category_map = {
# 'cigarette':0,
# 'pipe':1,
# 'bong':2,
# 'ashtray':3
# }
##############################################
def pascal_sagemaker(json_file, zee_object_name ):
zee_object_name_metadata = zee_object_name+"-metadata"
image_filename = json_file['source-ref'].split('/')[-1]
size = json_file[zee_object_name]['image_size'][0]
image_width = size['width']
image_height = size['height']
json_document = {
'file': image_filename,
'image_size': [{
'width': image_width,
'height': image_height,
'depth': 3
}],
'annotations': [],
'categories': []
}
categories = {}
label_name = json_file[zee_object_name_metadata]['class-map'].values()
if label_name:
for label in label_name:
if label in category_map:
json_document['categories'].append({
'class_id': category_map[label],
'name': label
})
class_id = category_map[label]
for box in json_file[zee_object_name]['annotations']:
json_document['annotations'].append({
'class_id':class_id,
'top': box['top'],
'left': box['left'],
'width': box['width'],
'height': box['height'],
})
return json_document, image_filename
else:
pass
##############################################
#initiate s3 resource
s3 = boto3.resource('s3')
# select bucket
bucket_name= "zee-objects"
bucket = s3.Bucket(bucket_name)
# generating final destination path
dest_path = "all-sagemaker-json/"
# converting manifest to json
for jsons in result_list_1:
try:
json_file = jsons[0]
data_dict, json_name = pascal_sagemaker(json_file, 'zee-kitchen-food-1')
# writing json file to s3
object = s3.Object(bucket_name, dest_path+"{}.json".format(json_name[:-4]))
object.put(Body=json.dumps(data_dict))
except:
pass
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment