Last active
March 19, 2018 18:43
-
-
Save rfmcnally/f07fd1d9d326776b94b97559fcc2d2da to your computer and use it in GitHub Desktop.
Script for downloading latest CSVs from a list of Import.io Extractors with API key passed in as an environment value
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Script for downloading latest CSVs from a list of Import.io extractors """ | |
import os | |
import sys | |
import csv | |
import datetime | |
import requests | |
EXTRACTORS_FILE = 'importio_extractors.txt' | |
def main(text_file): | |
""" Function for downloading files from list of extractor IDs""" | |
extractor_ids = open(text_file).read().splitlines() | |
current_time = datetime.datetime.now().isoformat()[:-10].replace(":", "-") | |
output_dir = '{0}_output'.format(current_time) | |
curr_dir = os.path.dirname(os.path.abspath(__file__)) | |
os.makedirs(output_dir) | |
new_dir = os.path.join(curr_dir, output_dir) | |
for extractor_id in extractor_ids: | |
download_extractor = Extractor(extractor_id) | |
extractor_name = download_extractor.get_name() | |
extractor_csv = download_extractor.get_csv() | |
reader = csv.reader(extractor_csv, delimiter=',') | |
filename = extractor_name.replace('/', '-') + '.csv' | |
new_file = os.path.join(new_dir, filename) | |
with open(new_file, 'w', encoding='utf-8-sig') as output: | |
writer = csv.writer(output, delimiter=',') | |
for row in reader: | |
writer.writerow(row) | |
print(extractor_name + ' saved to ' + output_dir) | |
print('All CSVs saved.') | |
return | |
class Extractor(object): | |
""" Class for handling extractors """ | |
def __init__(self, extractor_id): | |
self._extractor_id = extractor_id | |
self._api_key = os.environ['IMPORT_IO_API_KEY'] | |
def get_csv(self): | |
""" Call the Extractor API to download the latest CSV """ | |
url = "https://data.import.io/extractor/{0}/csv/latest".format(self._extractor_id) | |
querystring = {"_apikey": self._api_key} | |
headers = {'Accept': "text/csv"} | |
response = requests.get(url, params=querystring, headers=headers, stream=True) | |
csv_resp = response.content.decode('utf-8') | |
return csv_resp.splitlines() | |
def get_name(self): | |
""" Call the Extractor API to get the Extractor's name """ | |
url = "https://store.import.io/store/extractor/{0}".format(self._extractor_id) | |
querystring = querystring = {"_apikey": self._api_key} | |
headers = {'Accept': "application/json"} | |
response = requests.get(url, params=querystring, headers=headers, stream=True) | |
name = None | |
try: | |
results = response.json() | |
try: | |
name = results['name'] | |
except KeyError: | |
pass | |
except ValueError: | |
pass | |
return name | |
if __name__ == '__main__': | |
main(EXTRACTORS_FILE) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment