Skip to content

Instantly share code, notes, and snippets.

@d136o
Last active August 29, 2015 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save d136o/ad79dcffded4dd442993 to your computer and use it in GitHub Desktop.
Save d136o/ad79dcffded4dd442993 to your computer and use it in GitHub Desktop.
Fetched data from Full Contact API and throws it into SQLite
import argparse
import codecs
import json
import os.path
import requests
import sys
import sqlite3
import time
import traceback
# three columns:
# email (with unique constraint)
# status
# information
def main(args=None):
FULL_CONTACT_URL = "https://api.fullcontact.com/v2/person.json"
FULL_CONTACT_EMAIL_TEMPLATE_URL = "https://api.fullcontact.com/v2/person.json?email={0}"
FULL_CONTACT_BATCH_URL = "https://api.fullcontact.com/v2/batch.json"
BATCH_SIZE = 20
if args is None:
args = sys.argv
parser = argparse.ArgumentParser(description='Gets FullContact demographic information for every email in a list of emails')
parser.add_argument('sqlite_filename',
help='Sqlite db File containing user emails')
parser.add_argument('apiKey',
help='the full contact apikey that allows us to make api requests')
args = parser.parse_args(args[1:])
db_connection = sqlite3.connect(args.sqlite_filename)
db_cursor = db_connection.cursor()
db_inner_cursor = db_connection.cursor()
db_cursor.execute('SELECT email,status,data from full_contact_info WHERE status=0;')
rows = db_cursor.fetchmany(BATCH_SIZE)
count = 0
while len(rows) > 0:
urls_to_emails = { FULL_CONTACT_EMAIL_TEMPLATE_URL.format(r[0]) : r[0] for r in rows }
individual_query_urls = list(urls_to_emails.keys())
payload = json.dumps({"requests":individual_query_urls})
fc_response = requests.post(FULL_CONTACT_BATCH_URL,
params={'apiKey':args.apiKey},
data=payload)
response_json = json.loads(fc_response.text.encode('utf-8'))
if response_json['status'] == 200:
for query_url, response in response_json['responses'].iteritems():
email = urls_to_emails[query_url]
db_inner_cursor.execute('UPDATE full_contact_info SET status=?,data=? WHERE email=?',
(str(response['status']).encode('utf-8'),
json.dumps(response).encode('utf-8'),
email.encode('utf-8')))
else:
print 'bad batch response!'
db_connection.commit()
time.sleep(.1) #throttling requests for a total of 600 per minute max
if count % 600*BATCH_SIZE == 0:
print 'queried emails: {0}'.format(count)
#statuses should be one of 200,202,404
s_list = ('200','202','404',)
for s in s_list:
total = db_connection.execute('SELECT COUNT(*) from full_contact_info where status=:status ;',
{'status':s}).fetchone()[0]
print 'total queries with {0}: {1}'.format(s,total)
total = db_connection.execute('SELECT COUNT(*) from full_contact_info where status!=0;').fetchone()[0]
print 'total nonzero status: {0}'.format(total)
count = count + len(rows)
rows = db_cursor.fetchmany(BATCH_SIZE)
print 'queried for {0} emails'.format(count)
def initial_import(db_fname,emails_fname):
with open(emails_fname) as f:
emails = f.readlines()
db = sqlite3.connect(db_fname)
cursor = db.cursor()
for email in emails:
cursor.execute('INSERT INTO full_contact_info (email,status,data) VALUES (?,?,?)',
(email.strip(), 0, None));
db.commit()
db.close()
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment