Skip to content

Instantly share code, notes, and snippets.

@darcyliu
Created November 16, 2016 00:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save darcyliu/849c92a664539b60bc729b3de3cfd075 to your computer and use it in GitHub Desktop.
Save darcyliu/849c92a664539b60bc729b3de3cfd075 to your computer and use it in GitHub Desktop.
openimages dataset downloader
#!/usr/bin/env python
# encoding: utf-8
"""
openimages_dataset_downloader.py
Created by Darcy on 15/11/2016.
Copyright (c) 2016 Darcy. All rights reserved.
"""
import argparse
import base64
import csv
import hashlib
import os
import sys
from six.moves import urllib
def md5(fname):
hash_md5 = hashlib.md5()
with open(fname, "rb") as f:
for chunk in iter(lambda: f.read(4096), b""):
hash_md5.update(chunk)
return base64.b64encode(hash_md5.digest()).decode('ascii')
def download(filename):
i = 0
with open(filename, 'r') as csvfile:
cvsreader = csv.reader(csvfile, delimiter=',')
next(cvsreader, None)
for row in cvsreader:
i+=1
source = row[2]
_, file_ext = os.path.splitext(source)
file_path = 'data/%s%s' % (row[0],file_ext)
if os.path.exists(file_path):
ms = md5(file_path)
if ms == row[9]:
print('skiped', source)
continue
print('Download:', file_path, source)
def _progress(count, block_size, total_size):
pass
#sys.stdout.write('>> Downloading %s %.1f%%\n' % \
# (file_path, float(count * block_size) / float(total_size) * 100.0))
#sys.stdout.flush()
file_path, _ = urllib.request.urlretrieve(source, file_path, reporthook=_progress)
stat_info = os.stat(file_path)
ms = md5(file_path)
print(ms,row[9])
if ms == row[9]:
print('Successfully downloaded', file_path, stat_info.st_size, 'bytes.')
else:
print('Error:', file_path, source)
print('')
print('Total:', i)
def main():
parser = argparse.ArgumentParser(description='')
parser.add_argument('-i', dest='input', required=True, help='input images.csv')
args = parser.parse_args()
download(args.input)
if '__main__' == __name__:
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment