Skip to content

Instantly share code, notes, and snippets.

@allenday
Forked from sameerg07/tensorflow_json_parser.py
Last active January 2, 2020 09:56
Show Gist options
  • Select an option

  • Save allenday/22da30122526b321e32e8b8967a26efb to your computer and use it in GitHub Desktop.

Select an option

Save allenday/22da30122526b321e32e8b8967a26efb to your computer and use it in GitHub Desktop.
converts the json file downloaded using image classifer tool of dataturks to dataset folder
#This script has been solely created under dataturks. Copyrights are reserved
#EXAMPLE USAGE
#python3 tensorflow_json_parser.py --json_file "flower.json" --dataset_path "Dataset5/"
import json
import glob
import urllib.request
import argparse
import random
import os
from pathlib import Path
def downloader(image_url , i):
file_name = str(i)
full_file_name = str(file_name) + '.jpg'
prefix = 'http://'
offset=7
if image_url[:5] == 'https':
prefix = 'https://'
offset=8
escaped_url = prefix + urllib.request.quote(image_url[offset:])
print(escaped_url)
urllib.request.urlretrieve(escaped_url,full_file_name)
if __name__ == "__main__":
a = argparse.ArgumentParser()
a.add_argument("--json_file", help="path to json")
a.add_argument("--dataset_path", help="path to the dataset")
args = a.parse_args()
if args.json_file is None and args.dataset_path is None:
a.print_help()
sys.exit(1)
with open(args.json_file) as file1:
lis = []
for i in file1:
lis.append(json.loads(i))
folder_names = []
label_to_urls = {}
for i in lis:
if len(i['annotation']['labels']) == 0:
continue
if i['annotation']['labels'][0] not in folder_names:
folder_names.append(i['annotation']['labels'][0])
label_to_urls[i['annotation']['labels'][0]] = [i['content']]
else:
label_to_urls[i['annotation']['labels'][0]].append(i['content'])
print(label_to_urls.keys())
Path(args.dataset_path).mkdir(parents=True, exist_ok=True)
os.chdir(args.dataset_path)
for i in label_to_urls.keys():
Path(str(i)).mkdir(parents=True, exist_ok=True)
os.chdir(str(i))
k = 0;
for j in label_to_urls[i]:
b = os.path.basename(j)
if not Path.exists(Path(b)):
downloader(j , b)
k+=1
os.chdir("../")
@allenday
Copy link
Copy Markdown
Author

allenday commented Jan 2, 2020

skip empty label sets

@allenday
Copy link
Copy Markdown
Author

allenday commented Jan 2, 2020

escape urls as needed

@allenday
Copy link
Copy Markdown
Author

allenday commented Jan 2, 2020

detect protocol

@allenday
Copy link
Copy Markdown
Author

allenday commented Jan 2, 2020

only download if not exists

@allenday
Copy link
Copy Markdown
Author

allenday commented Jan 2, 2020

keep original filename

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment