rfmcnally/importio_latest_2.py

## importio_latest_2.py
""" Script for downloading latest CSVs from a list of Import.io extractors """
import os
import sys
import csv
import datetime
import requests

EXTRACTORS_FILE = 'importio_extractors.txt'

def main(text_file):
    """ Function for downloading files from list of extractor IDs"""
    extractor_ids = open(text_file).read().splitlines()
    current_time = datetime.datetime.now().isoformat()[:-10].replace(":", "-")
    output_dir = '{0}_output'.format(current_time)
    curr_dir = os.path.dirname(os.path.abspath(__file__))
    os.makedirs(output_dir)
    new_dir = os.path.join(curr_dir, output_dir)
    for extractor_id in extractor_ids:
        download_extractor = Extractor(extractor_id)
        extractor_name = download_extractor.get_name()
        extractor_csv = download_extractor.get_csv()
        reader = csv.reader(extractor_csv, delimiter=',')
        filename = extractor_name.replace('/', '-') + '.csv'
        new_file = os.path.join(new_dir, filename)
        with open(new_file, 'w', encoding='utf-8-sig') as output:
            writer = csv.writer(output, delimiter=',')
            for row in reader:
                writer.writerow(row)
            print(extractor_name + ' saved to ' + output_dir)
    print('All CSVs saved.')
    return


class Extractor(object):
    """ Class for handling extractors """
    def __init__(self, extractor_id):
        self._extractor_id = extractor_id
        self._api_key = os.environ['IMPORT_IO_API_KEY']
    def get_csv(self):
        """ Call the Extractor API to download the latest CSV """
        url = "https://data.import.io/extractor/{0}/csv/latest".format(self._extractor_id)
        querystring = {"_apikey": self._api_key}
        headers = {'Accept': "text/csv"}
        response = requests.get(url, params=querystring, headers=headers, stream=True)
        csv_resp = response.content.decode('utf-8')
        return csv_resp.splitlines()
    def get_name(self):
        """ Call the Extractor API to get the Extractor's name """
        url = "https://store.import.io/store/extractor/{0}".format(self._extractor_id)
        querystring = querystring = {"_apikey": self._api_key}
        headers = {'Accept': "application/json"}
        response = requests.get(url, params=querystring, headers=headers, stream=True)
        name = None
        try:
            results = response.json()
            try:
                name = results['name']
            except KeyError:
                pass
        except ValueError:
            pass
        return name


if __name__ == '__main__':
    main(EXTRACTORS_FILE)
	""" Script for downloading latest CSVs from a list of Import.io extractors """
	import os
	import sys
	import csv
	import datetime
	import requests

	EXTRACTORS_FILE = 'importio_extractors.txt'

	def main(text_file):
	""" Function for downloading files from list of extractor IDs"""
	extractor_ids = open(text_file).read().splitlines()
	current_time = datetime.datetime.now().isoformat()[:-10].replace(":", "-")
	output_dir = '{0}_output'.format(current_time)
	curr_dir = os.path.dirname(os.path.abspath(__file__))
	os.makedirs(output_dir)
	new_dir = os.path.join(curr_dir, output_dir)
	for extractor_id in extractor_ids:
	download_extractor = Extractor(extractor_id)
	extractor_name = download_extractor.get_name()
	extractor_csv = download_extractor.get_csv()
	reader = csv.reader(extractor_csv, delimiter=',')
	filename = extractor_name.replace('/', '-') + '.csv'
	new_file = os.path.join(new_dir, filename)
	with open(new_file, 'w', encoding='utf-8-sig') as output:
	writer = csv.writer(output, delimiter=',')
	for row in reader:
	writer.writerow(row)
	print(extractor_name + ' saved to ' + output_dir)
	print('All CSVs saved.')
	return


	class Extractor(object):
	""" Class for handling extractors """
	def __init__(self, extractor_id):
	self._extractor_id = extractor_id
	self._api_key = os.environ['IMPORT_IO_API_KEY']
	def get_csv(self):
	""" Call the Extractor API to download the latest CSV """
	url = "https://data.import.io/extractor/{0}/csv/latest".format(self._extractor_id)
	querystring = {"_apikey": self._api_key}
	headers = {'Accept': "text/csv"}
	response = requests.get(url, params=querystring, headers=headers, stream=True)
	csv_resp = response.content.decode('utf-8')
	return csv_resp.splitlines()
	def get_name(self):
	""" Call the Extractor API to get the Extractor's name """
	url = "https://store.import.io/store/extractor/{0}".format(self._extractor_id)
	querystring = querystring = {"_apikey": self._api_key}
	headers = {'Accept': "application/json"}
	response = requests.get(url, params=querystring, headers=headers, stream=True)
	name = None
	try:
	results = response.json()
	try:
	name = results['name']
	except KeyError:
	pass
	except ValueError:
	pass
	return name


	if __name__ == '__main__':
	main(EXTRACTORS_FILE)