Skip to content

Instantly share code, notes, and snippets.

@cordon-thiago
Created December 1, 2019 21:53
Show Gist options
  • Save cordon-thiago/7211384aaffb741a3dccb1897d1202b8 to your computer and use it in GitHub Desktop.
Save cordon-thiago/7211384aaffb741a3dccb1897d1202b8 to your computer and use it in GitHub Desktop.
Create age dataframe column
import pandas as pd
import numpy as np
from datetime import date
from functions import aux_functions
curr_date = pd.to_datetime(date.today())
hardbounce_2['age'] = (curr_date - hardbounce_2['birthDate_n']) / np.timedelta64(1, 'Y')
# There are ages missing
aux_functions.percMissing(hardbounce_2)
# There are negative ages
hardbounce_2['age'][hardbounce_2['age'].notnull()].astype(int).describe()
# Verify age < 18
print('Qty:' + str(len(hardbounce_2[hardbounce_2['age'] < 18])))
print('%:' + str((len(hardbounce_2[hardbounce_2['age'] < 18]) / len(hardbounce_2)) * 100 ))
# Age median excluding bellow 18
age_median = hardbounce_2['age'][hardbounce_2['age'] >= 18].median()
print('Median: ' + str(age_median))
# fill with median values < 18
hardbounce_2.loc[hardbounce_2.age < 18, 'age'] = age_median
# fill missing with median
hardbounce_2['age'] = hardbounce_2['age'].fillna(age_median)
# Convert to int
hardbounce_2['age'] = hardbounce_2['age'].astype(int)
# Describe age
hardbounce_2['age'].describe()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment