Skip to content

Instantly share code, notes, and snippets.

@chrislawlor
Last active October 2, 2017 19:23
Show Gist options
  • Save chrislawlor/5c8458c4e02dfcd7b045acb11d24cd84 to your computer and use it in GitHub Desktop.
Save chrislawlor/5c8458c4e02dfcd7b045acb11d24cd84 to your computer and use it in GitHub Desktop.
Foursquare category update to PostgreSQL table.
"""
Download the category list from Foursquare, and upsert it into
a flat table in a PostgreSQL database.
See https://developer.foursquare.com/docs/venues/categories
1. Create the table with the following DDL statement:
CREATE TABLE foursquare_categories
(
id TEXT NOT NULL
CONSTRAINT foursquare_categories_pkey
PRIMARY KEY
CONSTRAINT foursquare_categories_parent_fk
REFERENCES foursquare_categories
ON DELETE CASCADE,
name TEXT,
plural_name TEXT,
short_name TEXT,
icon TEXT,
parent TEXT
);
2. Install dependencies:
pip install sqlalchemy requests psycopg2
3. Set the following environment variables:
FOURSQUARE_CLIENT_ID
FOURSQUARE_CLIENT_SECRET
FOURSQUARE_VERSION (optional, see https://developer.foursquare.com/overview/versioning)
DATABASE_URL (e.g postgresql://username:password@hostname:port/db)
4. Run `python foursquare_category_update.py`, via weekly cron job
Written for Python 3.6, though it should be trivial to adapt to older versions.
"""
import os
from urllib.parse import urlencode, urljoin
import requests
from sqlalchemy import text as sa_text, create_engine
from sqlalchemy.orm import sessionmaker, scoped_session
from sqlalchemy.exc import (DataError, IntegrityError, OperationalError,
ProgrammingError)
FOURSQUARE_CLIENT_ID = os.environ.get('FOURSQUARE_CLIENT_ID')
FOURSQUARE_CLIENT_SECRET = os.environ.get('FOURSQUARE_CLIENT_SECRET')
FOURSQUARE_VERSION = os.environ.get('FOURSQUARE_VERSION', '20170823')
DATABASE_URL = os.environ.get('DATABASE_URL')
BASE = 'https://api.foursquare.com/v2/'
class FoursquareError(Exception):
pass
# NOTE: The `get_session` and `get_foursquare_url` functions are
# more decomposed than they need to be for the purposes of this script,
# to allow them to be used as utility functions elsewhere.
def get_session():
"""
Returns a session with Foursquare API request parameters set.
"""
s = requests.Session()
s.params = dict(
client_id=FOURSQUARE_CLIENT_ID,
client_secret=FOURSQUARE_CLIENT_SECRET,
v=FOURSQUARE_VERSION
)
return s
def get_foursquare_url(endpoint, **params):
"""
Given an andpoint, and query string parameters as
keyword arguments, return a URL suitable for submission
to the Foursquare API.
Does NOT automatically add authentication parameters,
use the session returned by `foursquare.get_session`.
"""
path = urljoin(BASE, endpoint)
qs = urlencode(params)
return f'{path}?{qs}'
def _get_categories():
"""
Returns the JSON-formatted nested list of Foursquare
categories.
"""
s = get_session()
url = get_foursquare_url('venues/categories')
resp = s.get(url)
if resp.status_code == 200:
return resp.json()['response']['categories']
raise FoursquareError(resp.text)
def _bulk_upsert_categories(categories):
"""
Do a bulk upsert of the given categories.
"""
# Get a SQLAlchemy session
engine = create_engine(DATABASE_URL)
session = scoped_session(sessionmaker(bind=engine))
# Convert the category list of dicts into a dict of lists,
# keyed by column name
values = defaultdict(list)
for c in categories:
for attr in ('id', 'name', 'plural_name', 'short_name',
'icon', 'parent'):
values[attr].append(c[attr])
# Upsert the categories, all in one query!
q = sa_text("""
insert into foursquare_categories(
id, name, plural_name, short_name, icon, parent
)
select
unnest(:id)
, unnest(:name)
, unnest(:plural_name)
, unnest(:short_name)
, unnest(:icon)
, unnest(:parent)
on conflict (id) do update set
name=excluded.name
, plural_name=excluded.plural_name
, short_name=excluded.short_name
, icon=excluded.icon
, parent=excluded.parent
""").bindparams(**values)
try:
session.execute(q)
session.commit()
except (ProgrammingError, DataError, OperationalError, IntegrityError):
session.rollback()
raise
def update_categories():
"""
Updates the Foursquare categories table in the Data Science
DB, with a fresh list from the Foursquare API.
Foursquare recommends running this weekly.
"""
category_db_list = []
# Flatten the list with a simple recursive function
def transform_categories(categories, parent=None):
for c in categories:
category_db_list.append(dict(
id=c['id'],
name=c['name'],
plural_name=c['pluralName'],
short_name=c['shortName'],
icon=f"{c['icon']['prefix']}{c['icon']['suffix']}",
parent=parent))
if c['categories']:
transform_categories(c['categories'], parent=c['id'])
transform_categories(_get_categories())
_bulk_upsert_categories(category_db_list)
if __name__ == '__main__':
update_categories()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment