Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aschleg/10b2c68953c119a734c247edee5b3049 to your computer and use it in GitHub Desktop.
Save aschleg/10b2c68953c119a734c247edee5b3049 to your computer and use it in GitHub Desktop.
Functions used in series of analyses on the Austin Animal Center's animal intakes and outcomes datasets.
import requests
import numpy as np
import pandas as pd
from six.moves.urllib.error import HTTPError
def get_soda_api_data(endpoint, count=1000, offset=0, return_df=True):
params = {'$limit': count, '$offset': offset}
results = []
while True:
try:
r = requests.get(endpoint, params=params)
rcontent = r.json()
if rcontent == []:
break
results.append(rcontent)
offset += count
params['$offset'] = offset
except HTTPError as err:
if err.response.status_code == '404':
break
else:
print(err.response.status_code)
if return_df:
results_df = pd.DataFrame()
for i in results:
results_df = results_df.append(pd.io.json.json_normalize(i))
return results_df
else:
return results
def create_unique_id(df, event):
df.reset_index(inplace=True)
df[event + '_number'] = df.groupby(['animal_id'])[event + '_datetime'].rank(method='dense', ascending=False)
df['animal_id_new'] = df['animal_id'] + '_' + df[event + '_number'].astype(int).astype(str)
return df
def transform_age(df, age_column):
df = df.loc[df.loc[:, age_column] != 'NULL']
range_column = age_column + '_Period Range'
df[age_column + '_Periods'], df[range_column] = df.loc[:, age_column].str.split(' ').str[0].fillna(0).astype(int), df.loc[:, age_column].str.split(' ').str[1].fillna(0)
df[range_column] = np.where(df[range_column].str.contains('day'), 1,
np.where(df[range_column].str.contains('week'), 7,
np.where(df[range_column].str.contains('month'), 30,
np.where(df[range_column].str.contains('year'), 365, 0)))).astype(int)
df[age_column + '_(days)'] = df[range_column] * df[age_column + '_Periods']
df[age_column + '_(years)'] = df[age_column + '_(days)'] / 365
df[age_column + '_age_group'] = pd.cut(df[age_column + '_(years)'], 10)
return df
def transform_date(df, event):
event_date = event + '_datetime'
df[event_date] = pd.to_datetime(df['datetime'])
df[event + '_month'] = df[event_date].dt.month
df[event + '_year'] = df[event_date].dt.year
df[event + '_monthyear'] = df[event + '_datetime'].dt.to_period('M')
df[event + '_weekday'] = df[event_date].dt.weekday_name
df[event + '_hour'] = df[event_date].dt.hour
return df
def transform_birthdates(df):
df['date_of_birth'] = pd.to_datetime(df['date_of_birth'])
df['dob_year'] = df['date_of_birth'].dt.year
df['dob_month'] = df['date_of_birth'].dt.month
df['dob_monthyear'] = pd.to_datetime(df['monthyear']).dt.to_period('M')
return df
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment