Skip to content

Instantly share code, notes, and snippets.

@mcgill-a
Last active May 20, 2020 11:43
Show Gist options
  • Save mcgill-a/d61d27618a35e631f2dbae572fd1b907 to your computer and use it in GitHub Desktop.
Save mcgill-a/d61d27618a35e631f2dbae572fd1b907 to your computer and use it in GitHub Desktop.
Download and extract ZIP files from URLs in a CSV file
import sys
import csv
from tqdm import tqdm
import requests, zipfile, io
def load_data(filename):
arr = []
with open(filename, 'r', encoding='UTF-8') as file:
reader = csv.reader(file)
for row in reader:
arr.append(row)
return arr
def get_links(arr):
links = []
for row in arr:
links.append(row[3])
# remove the column header
links.pop(0)
return links
# parameters
in_filename = 'data/epa-coal.csv'
out_directory = 'data/downloads/'
# variables
data = load_data(in_filename)
links = get_links(data)
for link in tqdm(links):
# download and extract the zip file
r = requests.get(link)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall(out_directory)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment