Skip to content

Instantly share code, notes, and snippets.

@larryv
Created May 26, 2011 06:42
Show Gist options
  • Save larryv/992662 to your computer and use it in GitHub Desktop.
Save larryv/992662 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3.2
import csv
import datetime
import glob
import io
import os
import os.path
import sys
import tempfile
import time
import threading
from urllib.parse import unquote_plus, urlparse
import urllib.request
import zipfile
class DownloadWebRes(threading.Thread):
def __init__(self, remote, local):
super().__init__()
self.remote = remote
self.local = local
self.data = b''
self.written = 0
@property
def progress(self):
return len(self.data)
def run(self):
chunk = self.remote.read(io.DEFAULT_BUFFER_SIZE)
while chunk:
self.data += chunk
chunk = self.remote.read(io.DEFAULT_BUFFER_SIZE)
self.written = self.local.write(self.data)
def download_web_res(url, dest):
# Set up remote location
fremote = urllib.request.urlopen(url)
size = int(fremote.getheader('Content-length'))
print('Downloading {} bytes from "{}"'.format(size, url))
# Download!
with open(dest, 'w+b') as flocal:
decile = 0
dl_thread = DownloadWebRes(fremote, flocal)
dl_thread.start()
while dl_thread.is_alive():
incr = int((dl_thread.progress / size * 100 - decile * 10) // 10)
print('.' * incr, end='')
sys.stdout.flush()
decile += incr
time.sleep(0.1)
print('.' * (10 - decile))
# Wrap up
print('Wrote {} bytes to "{}"'.format(dl_thread.written, dest))
fremote.close()
def go():
# Read in data
sources = {}
for src in glob.iglob(join('update_data_sources', '*')):
agency = os.path.splitext(os.path.basename(src))[0]
with open(src, newline='') as f:
sources[agency] = list(csv.DictReader(f))
# Write out data
today = datetime.date.today().isoformat()
for agency in sources:
for data_file in sources[agency]:
print(data_file['name'] + '\n' + '=' * len(data_file['name']))
# Create requested directory
dest = os.path.join('data', today, agency,
os.path.normpath(data_file['dest']))
os.makedirs(dest, mode=0o700, exist_ok=True)
# Determine file path to save to
url_path = unquote_plus(urlparse(data_file['url']).path)
url_file = url_path.rpartition('/')[2]
is_zip = os.path.splitext(url_file)[1].lower() == '.zip'
path = os.path.join(os.path.dirname(dest) if is_zip else dest,
url_file)
download_web_res(data_file['url'], path)
# If zip file, extract members
if is_zip:
with zipfile.ZipFile(path) as fzip:
fzip.extractall(dest)
print('Extracted "{}" to "{}"'.format(url_file, dest))
print()
if __name__ == '__main__':
go()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment