Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
converts the json file downloaded using image classifer tool of dataturks to dataset folder
#This script has been solely created under dataturks. Copyrights are reserved
#EXAMPLE USAGE
#python3 tensorflow_json_parser.py --json_file "flower.json" --dataset_path "Dataset5/"
import json
import glob
import urllib.request
import argparse
import random
import os
from pathlib import Path
def downloader(image_url , i):
file_name = str(i)
full_file_name = str(file_name) + '.jpg'
prefix = 'http://'
offset=7
if image_url[:5] == 'https':
prefix = 'https://'
offset=8
escaped_url = prefix + urllib.request.quote(image_url[offset:])
print(escaped_url)
urllib.request.urlretrieve(escaped_url,full_file_name)
if __name__ == "__main__":
a = argparse.ArgumentParser()
a.add_argument("--json_file", help="path to json")
a.add_argument("--dataset_path", help="path to the dataset")
args = a.parse_args()
if args.json_file is None and args.dataset_path is None:
a.print_help()
sys.exit(1)
with open(args.json_file) as file1:
lis = []
for i in file1:
lis.append(json.loads(i))
folder_names = []
label_to_urls = {}
for i in lis:
if len(i['annotation']['labels']) == 0:
continue
if i['annotation']['labels'][0] not in folder_names:
folder_names.append(i['annotation']['labels'][0])
label_to_urls[i['annotation']['labels'][0]] = [i['content']]
else:
label_to_urls[i['annotation']['labels'][0]].append(i['content'])
print(label_to_urls.keys())
Path(args.dataset_path).mkdir(parents=True, exist_ok=True)
os.chdir(args.dataset_path)
for i in label_to_urls.keys():
Path(str(i)).mkdir(parents=True, exist_ok=True)
os.chdir(str(i))
k = 0;
for j in label_to_urls[i]:
b = os.path.basename(j)
if not Path.exists(Path(b)):
downloader(j , b)
k+=1
os.chdir("../")
@allenday

This comment has been minimized.

Copy link
Owner Author

allenday commented Jan 2, 2020

skip empty label sets

@allenday

This comment has been minimized.

Copy link
Owner Author

allenday commented Jan 2, 2020

escape urls as needed

@allenday

This comment has been minimized.

Copy link
Owner Author

allenday commented Jan 2, 2020

detect protocol

@allenday

This comment has been minimized.

Copy link
Owner Author

allenday commented Jan 2, 2020

only download if not exists

@allenday

This comment has been minimized.

Copy link
Owner Author

allenday commented Jan 2, 2020

keep original filename

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.