Skip to content

Instantly share code, notes, and snippets.

@rfmcnally
Last active March 19, 2018 18:43
Show Gist options
  • Save rfmcnally/f07fd1d9d326776b94b97559fcc2d2da to your computer and use it in GitHub Desktop.
Save rfmcnally/f07fd1d9d326776b94b97559fcc2d2da to your computer and use it in GitHub Desktop.
Script for downloading latest CSVs from a list of Import.io Extractors with API key passed in as an environment value
""" Script for downloading latest CSVs from a list of Import.io extractors """
import os
import sys
import csv
import datetime
import requests
EXTRACTORS_FILE = 'importio_extractors.txt'
def main(text_file):
""" Function for downloading files from list of extractor IDs"""
extractor_ids = open(text_file).read().splitlines()
current_time = datetime.datetime.now().isoformat()[:-10].replace(":", "-")
output_dir = '{0}_output'.format(current_time)
curr_dir = os.path.dirname(os.path.abspath(__file__))
os.makedirs(output_dir)
new_dir = os.path.join(curr_dir, output_dir)
for extractor_id in extractor_ids:
download_extractor = Extractor(extractor_id)
extractor_name = download_extractor.get_name()
extractor_csv = download_extractor.get_csv()
reader = csv.reader(extractor_csv, delimiter=',')
filename = extractor_name.replace('/', '-') + '.csv'
new_file = os.path.join(new_dir, filename)
with open(new_file, 'w', encoding='utf-8-sig') as output:
writer = csv.writer(output, delimiter=',')
for row in reader:
writer.writerow(row)
print(extractor_name + ' saved to ' + output_dir)
print('All CSVs saved.')
return
class Extractor(object):
""" Class for handling extractors """
def __init__(self, extractor_id):
self._extractor_id = extractor_id
self._api_key = os.environ['IMPORT_IO_API_KEY']
def get_csv(self):
""" Call the Extractor API to download the latest CSV """
url = "https://data.import.io/extractor/{0}/csv/latest".format(self._extractor_id)
querystring = {"_apikey": self._api_key}
headers = {'Accept': "text/csv"}
response = requests.get(url, params=querystring, headers=headers, stream=True)
csv_resp = response.content.decode('utf-8')
return csv_resp.splitlines()
def get_name(self):
""" Call the Extractor API to get the Extractor's name """
url = "https://store.import.io/store/extractor/{0}".format(self._extractor_id)
querystring = querystring = {"_apikey": self._api_key}
headers = {'Accept': "application/json"}
response = requests.get(url, params=querystring, headers=headers, stream=True)
name = None
try:
results = response.json()
try:
name = results['name']
except KeyError:
pass
except ValueError:
pass
return name
if __name__ == '__main__':
main(EXTRACTORS_FILE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment