Skip to content

Instantly share code, notes, and snippets.

@anthony-sarkis
Created May 18, 2022 20:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anthony-sarkis/fd20eac85d52f7d66e26289ac619afd2 to your computer and use it in GitHub Desktop.
Save anthony-sarkis/fd20eac85d52f7d66e26289ac619afd2 to your computer and use it in GitHub Desktop.
cloud convert gcp
import json
import requests
import time
from PIL import Image, ExifTags
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from random import randrange
GCP_BUCKET_NAME = 'cloud-ai-platform-something-something/images'
def convert(diffgram_all_data_path):
with open(diffgram_all_data_path, 'r') as readfile:
all_data = json.load(readfile)
label_map = all_data.pop('label_map')
all_data.pop('export_info')
all_data.pop('attribute_groups_reference')
export_data = []
annotation = 0
over = 0
rotated = 0
yissue = 0
xissue = 0
for file_id, file_data in all_data.items():
item = all_data[file_id]
file = item.get('file')
if not file:
continue
single_file = {
"imageGcsUri": F"gs://{GCP_BUCKET_NAME}/{file_id}.jpg",
"boundingBoxAnnotations": []
}
width = item.get('image').get('width')
height = item.get('image').get('height')
# (Could also do streaming here to get this data)
# https://diffgram.readme.io/docs/sdk-dataset-list_files
instance_list = item.get('instance_list')
if not instance_list: continue
img = Image.open(f'images/{file_id}.jpg')
try:
exif = {
ExifTags.TAGS[k]: v
for k, v in img._getexif().items()
if k in ExifTags.TAGS
}
except:
pass
#print(exif.get('Orientation'))
for instance in instance_list:
annotation += 1
if exif.get('Orientation') == 6:
instance = rotate_box(instance, width)
rotated += 1
## Uncomment to plot example
#xMin = instance.get('x_min')
#yMin = instance.get('y_max')
#xMax = instance.get('x_max')
#yMax = instance.get('y_min')
#fig, ax = plt.subplots()
#ax.imshow(img)
#rect = patches.Rectangle((xMin, yMin), xMax-xMin, yMax-yMin, linewidth=1, edgecolor='r', facecolor='none')
#ax.add_patch(rect)
#plt.show()
xMin = instance.get('x_min') / height
yMin = instance.get('y_min') / width # swap
xMax = instance.get('x_max') / height
yMax = instance.get('y_max') / width
else:
xMin = instance.get('x_min') / width
yMin = instance.get('y_min') / height
xMax = instance.get('x_max') / width
yMax = instance.get('y_max') / height
new_item = {
"displayName": label_map[str(instance.get('label_file_id'))],
"xMin": xMin,
"yMin": yMin,
"xMax": xMax,
"yMax": yMax
}
#print(new_item)
single_file['boundingBoxAnnotations'].append(new_item)
if xMin > 1 or yMin > 1 or xMax > 1 or yMax > 1:
#new_item['over'] = True
over += 1
if yMax < yMin:
yissue += 1
if xMax < xMin:
xissue += 1
export_data.append(single_file)
print(F"Annotations {annotation} Over 1.0 {over} Percent { over / annotation} ")
print(F"rotated {rotated} ")
print(F"yissue {yissue} xissue {xissue} ")
print(len(export_data))
with open(F'google_format_{time.time()}.jsonl', 'w') as outfile:
for entry in export_data:
json.dump(entry, outfile)
outfile.write('\n')
def rotate_box(instance:dict, width):
# 90 degree counter clockwise
old_x_min = instance.get('x_min')
old_x_max = instance.get('x_max')
instance['x_min'] = instance.get('y_min')
instance['x_max'] = instance.get('y_max')
instance['y_min'] = width - old_x_max # swap
instance['y_max'] = width - old_x_min
return instance
from threading import Thread
from queue import Queue
class EventsWorker(Thread):
def __init__(self, queue):
Thread.__init__(self)
self.queue = queue
def run(self):
while True:
path, file_id = self.queue.get()
try:
download_one(path, file_id)
finally:
self.queue.task_done()
def download_one(path, file_id):
r = requests.get(path)
with open(F'images/{file_id}.jpg', 'wb') as f:
f.write(r.content)
def download_to_folder(diffgram_all_data_path):
file = open(diffgram_all_data_path)
all_data = json.load(file)
all_data.pop('export_info')
all_data.pop('attribute_groups_reference')
queue = Queue()
for i in range(25):
worker = EventsWorker(queue)
worker.daemon = True
worker.start()
for file_id, file_data in all_data.items():
item = all_data[file_id]
file = item.get('file')
if not file:
continue
instance_list = item.get('instance_list')
if not instance_list: continue
queue.put((file.get('blob_url'), file_id))
queue.join()
#download_to_folder("export.json")
convert("export.json")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment