Created
December 1, 2019 21:53
-
-
Save cordon-thiago/7211384aaffb741a3dccb1897d1202b8 to your computer and use it in GitHub Desktop.
Create age dataframe column
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from datetime import date | |
from functions import aux_functions | |
curr_date = pd.to_datetime(date.today()) | |
hardbounce_2['age'] = (curr_date - hardbounce_2['birthDate_n']) / np.timedelta64(1, 'Y') | |
# There are ages missing | |
aux_functions.percMissing(hardbounce_2) | |
# There are negative ages | |
hardbounce_2['age'][hardbounce_2['age'].notnull()].astype(int).describe() | |
# Verify age < 18 | |
print('Qty:' + str(len(hardbounce_2[hardbounce_2['age'] < 18]))) | |
print('%:' + str((len(hardbounce_2[hardbounce_2['age'] < 18]) / len(hardbounce_2)) * 100 )) | |
# Age median excluding bellow 18 | |
age_median = hardbounce_2['age'][hardbounce_2['age'] >= 18].median() | |
print('Median: ' + str(age_median)) | |
# fill with median values < 18 | |
hardbounce_2.loc[hardbounce_2.age < 18, 'age'] = age_median | |
# fill missing with median | |
hardbounce_2['age'] = hardbounce_2['age'].fillna(age_median) | |
# Convert to int | |
hardbounce_2['age'] = hardbounce_2['age'].astype(int) | |
# Describe age | |
hardbounce_2['age'].describe() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment