Created
March 29, 2018 01:30
-
-
Save aschleg/10b2c68953c119a734c247edee5b3049 to your computer and use it in GitHub Desktop.
Functions used in series of analyses on the Austin Animal Center's animal intakes and outcomes datasets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import numpy as np | |
import pandas as pd | |
from six.moves.urllib.error import HTTPError | |
def get_soda_api_data(endpoint, count=1000, offset=0, return_df=True): | |
params = {'$limit': count, '$offset': offset} | |
results = [] | |
while True: | |
try: | |
r = requests.get(endpoint, params=params) | |
rcontent = r.json() | |
if rcontent == []: | |
break | |
results.append(rcontent) | |
offset += count | |
params['$offset'] = offset | |
except HTTPError as err: | |
if err.response.status_code == '404': | |
break | |
else: | |
print(err.response.status_code) | |
if return_df: | |
results_df = pd.DataFrame() | |
for i in results: | |
results_df = results_df.append(pd.io.json.json_normalize(i)) | |
return results_df | |
else: | |
return results | |
def create_unique_id(df, event): | |
df.reset_index(inplace=True) | |
df[event + '_number'] = df.groupby(['animal_id'])[event + '_datetime'].rank(method='dense', ascending=False) | |
df['animal_id_new'] = df['animal_id'] + '_' + df[event + '_number'].astype(int).astype(str) | |
return df | |
def transform_age(df, age_column): | |
df = df.loc[df.loc[:, age_column] != 'NULL'] | |
range_column = age_column + '_Period Range' | |
df[age_column + '_Periods'], df[range_column] = df.loc[:, age_column].str.split(' ').str[0].fillna(0).astype(int), df.loc[:, age_column].str.split(' ').str[1].fillna(0) | |
df[range_column] = np.where(df[range_column].str.contains('day'), 1, | |
np.where(df[range_column].str.contains('week'), 7, | |
np.where(df[range_column].str.contains('month'), 30, | |
np.where(df[range_column].str.contains('year'), 365, 0)))).astype(int) | |
df[age_column + '_(days)'] = df[range_column] * df[age_column + '_Periods'] | |
df[age_column + '_(years)'] = df[age_column + '_(days)'] / 365 | |
df[age_column + '_age_group'] = pd.cut(df[age_column + '_(years)'], 10) | |
return df | |
def transform_date(df, event): | |
event_date = event + '_datetime' | |
df[event_date] = pd.to_datetime(df['datetime']) | |
df[event + '_month'] = df[event_date].dt.month | |
df[event + '_year'] = df[event_date].dt.year | |
df[event + '_monthyear'] = df[event + '_datetime'].dt.to_period('M') | |
df[event + '_weekday'] = df[event_date].dt.weekday_name | |
df[event + '_hour'] = df[event_date].dt.hour | |
return df | |
def transform_birthdates(df): | |
df['date_of_birth'] = pd.to_datetime(df['date_of_birth']) | |
df['dob_year'] = df['date_of_birth'].dt.year | |
df['dob_month'] = df['date_of_birth'].dt.month | |
df['dob_monthyear'] = pd.to_datetime(df['monthyear']).dt.to_period('M') | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment