Skip to content

Instantly share code, notes, and snippets.

@allenday
Forked from sameerg07/tensorflow_json_parser.py
Last active January 2, 2020 09:56
Show Gist options
  • Save allenday/22da30122526b321e32e8b8967a26efb to your computer and use it in GitHub Desktop.
Save allenday/22da30122526b321e32e8b8967a26efb to your computer and use it in GitHub Desktop.
converts the json file downloaded using image classifer tool of dataturks to dataset folder
#This script has been solely created under dataturks. Copyrights are reserved
#EXAMPLE USAGE
#python3 tensorflow_json_parser.py --json_file "flower.json" --dataset_path "Dataset5/"
import json
import glob
import urllib.request
import argparse
import random
import os
from pathlib import Path
def downloader(image_url , i):
file_name = str(i)
full_file_name = str(file_name) + '.jpg'
prefix = 'http://'
offset=7
if image_url[:5] == 'https':
prefix = 'https://'
offset=8
escaped_url = prefix + urllib.request.quote(image_url[offset:])
print(escaped_url)
urllib.request.urlretrieve(escaped_url,full_file_name)
if __name__ == "__main__":
a = argparse.ArgumentParser()
a.add_argument("--json_file", help="path to json")
a.add_argument("--dataset_path", help="path to the dataset")
args = a.parse_args()
if args.json_file is None and args.dataset_path is None:
a.print_help()
sys.exit(1)
with open(args.json_file) as file1:
lis = []
for i in file1:
lis.append(json.loads(i))
folder_names = []
label_to_urls = {}
for i in lis:
if len(i['annotation']['labels']) == 0:
continue
if i['annotation']['labels'][0] not in folder_names:
folder_names.append(i['annotation']['labels'][0])
label_to_urls[i['annotation']['labels'][0]] = [i['content']]
else:
label_to_urls[i['annotation']['labels'][0]].append(i['content'])
print(label_to_urls.keys())
Path(args.dataset_path).mkdir(parents=True, exist_ok=True)
os.chdir(args.dataset_path)
for i in label_to_urls.keys():
Path(str(i)).mkdir(parents=True, exist_ok=True)
os.chdir(str(i))
k = 0;
for j in label_to_urls[i]:
b = os.path.basename(j)
if not Path.exists(Path(b)):
downloader(j , b)
k+=1
os.chdir("../")
@allenday
Copy link
Author

allenday commented Jan 2, 2020

skip empty label sets

@allenday
Copy link
Author

allenday commented Jan 2, 2020

escape urls as needed

@allenday
Copy link
Author

allenday commented Jan 2, 2020

detect protocol

@allenday
Copy link
Author

allenday commented Jan 2, 2020

only download if not exists

@allenday
Copy link
Author

allenday commented Jan 2, 2020

keep original filename

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment