Skip to content

Instantly share code, notes, and snippets.

@devarshi16
Created January 13, 2020 09:56
Show Gist options
  • Save devarshi16/b3b4f7b741a79460a5f035df3d428302 to your computer and use it in GitHub Desktop.
Save devarshi16/b3b4f7b741a79460a5f035df3d428302 to your computer and use it in GitHub Desktop.
Create train_val_test_split.json file in Visual template free parsing model. (Expects packets of 25 images + jsons(can be changed))
import os
import json
import sys
import random
data = {}
dir_names = sys.argv[1:]
current_dir = os.getcwd()
train = {}
val = {}
for dir_name in dir_names:
files = os.listdir(dir_name)
imgs = [x for x in files if ('.jpg' in x) or ('.png' in x) or ('.JPEG' in x) or ('.tiff' in x)]
train[dir_name]=[]
for i in range(20):
choice = random.choice(imgs)
train[dir_name].append(choice)
imgs.remove(choice)
val[dir_name]=[]
for i in range(5):
choice = random.choice(imgs)
val[dir_name].append(choice)
imgs.remove(choice)
#with open(os.path.join(current_dir,dir_name,json_file_name),'w') as f:
# f.write('{"textBBs":[],"fieldBBs":[],"pairs":[]}')
data["test"]={}
data["train"]=train
data["valid"]=val
print(data)
with open('../train_valid_test_split.json','w') as json_file:
json.dump(data,json_file,indent=4)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment