aschleg/download_soda_api_data.py

## download_soda_api_data.py
"""

Simple script for extracting Socrata Open Data Access (SODA) datasets. Compatible with 3+, though one can easily make it 2.7
compatible by changing the `from urllib.error import HTTPError` import to `from urllib2 import HTTPError`

Parameters
----------
endpoint : string
  SODA API endpoint of the dataset.
count : int, default 1000
  The number of records to return in each paged result.
offset : int, default 0
  Offset the results returned. For example, if offset = 1000, the script will extract all records after the 1,000th record.
return_df : bool, default True
  Convert the returned SODA API dataset into a pandas DataFrame from JSON.

"""

import requests
import pandas as pd
from urllib.error import HTTPError # from urllib2 import HTTPError


def get_soda_api_data(endpoint, count=1000, offset=0, return_df=True):
    params = {'$limit': count, '$offset': offset}

    results = []

    while True:

        try:
            r = requests.get(endpoint, params=params)
            rcontent = r.json()

            if rcontent == []:
                break

            results.append(rcontent)
            offset += count
            params['$offset'] = offset

        except HTTPError as err:

            if err.response.status_code == '404':
                break
            else:
                print(err.response.status_code)

    if return_df:
        results_df = pd.DataFrame()

        for i in results:
            results_df = results_df.append(pd.io.json.json_normalize(i))

        return results_df

    else:
        return results
	"""

	Simple script for extracting Socrata Open Data Access (SODA) datasets. Compatible with 3+, though one can easily make it 2.7
	compatible by changing the `from urllib.error import HTTPError` import to `from urllib2 import HTTPError`

	Parameters
	----------
	endpoint : string
	SODA API endpoint of the dataset.
	count : int, default 1000
	The number of records to return in each paged result.
	offset : int, default 0
	Offset the results returned. For example, if offset = 1000, the script will extract all records after the 1,000th record.
	return_df : bool, default True
	Convert the returned SODA API dataset into a pandas DataFrame from JSON.

	"""

	import requests
	import pandas as pd
	from urllib.error import HTTPError # from urllib2 import HTTPError


	def get_soda_api_data(endpoint, count=1000, offset=0, return_df=True):
	params = {'$limit': count, '$offset': offset}

	results = []

	while True:

	try:
	r = requests.get(endpoint, params=params)
	rcontent = r.json()

	if rcontent == []:
	break

	results.append(rcontent)
	offset += count
	params['$offset'] = offset

	except HTTPError as err:

	if err.response.status_code == '404':
	break
	else:
	print(err.response.status_code)

	if return_df:
	results_df = pd.DataFrame()

	for i in results:
	results_df = results_df.append(pd.io.json.json_normalize(i))

	return results_df

	else:
	return results