Skip to content

Instantly share code, notes, and snippets.

@devarshi16
Created January 13, 2020 09:53
Show Gist options
  • Save devarshi16/864790d9ee0a1cdfb6b91c5edb1dce6c to your computer and use it in GitHub Desktop.
Save devarshi16/864790d9ee0a1cdfb6b91c5edb1dce6c to your computer and use it in GitHub Desktop.
Convert Form Labeller output json format to NAF data json format
import os
import json
import cv2
import sys
textBB_items = ["Printed Key","Written Key","Check Box Key","Comment/Info" ]
fieldBB_items = [None,"detectorPrediction","None","Printed Value", "Written Value", "Check Box Value", "Logo", "Signature", "Photograph"]
extensions = ['.JPEG','.png','.jpg','.tif','.tiff']
folder_names = ['internal28','internal17','internal7','internal8','internal9']
#folder_names = ['internal7']
for folder in folder_names:
files = os.listdir(folder)
json_files = [x for x in files if x.split('.')[-1] == 'json'] ############
save_dir = os.path.join(folder)
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for jf in json_files:
json_file_path = os.path.join(folder,jf)
print ("Reading json:", json_file_path)
with open(json_file_path,'r') as f:
file_data = json.load(f)
data = {}
data["textBBs"] = []
data["fieldBBs"] = []
data["pairs"] = []
data["samePairs"] = []
image_file_start = '.'.join(jf.split('.')[:-1]) ###################
image_loc = None
for ext in extensions:
image_loc = os.path.join(folder,image_file_start+ext)
if os.path.exists(image_loc):
break
print ("Image Loc",image_loc)
img = cv2.imread(image_loc)
h,w,_ = img.shape
data["height"] = h
data["widhth"] = w
for i,item in enumerate(file_data["textBBs"]):
new_data = {}
if len(item["poly_points"]) != 4:
continue
elif item["poly_points"][0][0] == item["poly_points"][1][0] and item["poly_points"][1][0] == item["poly_points"][2][0]:
continue
new_data["poly_points"] = item["poly_points"]
item_type = item["type"]
if item_type in [None,"None","detectorPrediction","Printed Key","Check Box Key"]:
new_data["type"] = "text"
elif item_type in ["Written Key","Printed Value","Written Value","Signature"]:
new_data["type"] = "field"
elif item_type in ["Comment/Info"]:
new_data["type"] = "comment"
elif item_type in ["Logo", "Photograph"]:
new_data["type"] = "graphic"
elif item_type in ["Check Box Value"]:
new_data["type"] = "fieldCheckBox"
#new_data["type"] = item_type
new_data["id"] = i
if item_type in textBB_items:
if item_type == "Written Key":
new_data["isBlank"] = 1
else:
new_data["isBlank"] = 0
data["textBBs"].append(new_data)
elif item_type in fieldBB_items:
if item_type == "Signature":
new_data["isBlank"] = 4
elif item_type == "Logo":
new_data["isBlank"] = 3
elif item_type == "Written Key":
new_data["isBlank"] = 0
else:
new_data["isBlank"] = 1
data["fieldBBs"].append(new_data)
else:
print ("Belongs to no group",item_type)
with open(os.path.join(save_dir,jf),'w') as new_f:
json.dump(data,new_f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment