Skip to content

Instantly share code, notes, and snippets.

@cordon-thiago
Last active December 1, 2019 21:56
Show Gist options
  • Save cordon-thiago/68c993e27aa978cf654e7f69b6af9b09 to your computer and use it in GitHub Desktop.
Save cordon-thiago/68c993e27aa978cf654e7f69b6af9b09 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from datetime import date
from functions import aux_functions
curr_date = pd.to_datetime(date.today())
hardbounce_2['monthsSinceRegDate'] = (curr_date - hardbounce_2['regDate_n']) / np.timedelta64(1, 'M')
# There are rows with monthsSinceRegDate missing
aux_functions.percMissing(hardbounce_2)
# There are rows with months less than 1
hardbounce_2['monthsSinceRegDate'][hardbounce_2['monthsSinceRegDate'].notnull()].astype(int).describe()
# Fill with 1 if the value < 0
hardbounce_2.loc[hardbounce_2.monthsSinceRegDate < 0, 'monthsSinceRegDate'] = 1
# Fill with median whem missing
print('Median: ' + str(hardbounce_2['monthsSinceRegDate'].median()))
hardbounce_2['monthsSinceRegDate'] = hardbounce_2['monthsSinceRegDate'].fillna(hardbounce_2['monthsSinceRegDate'].median())
# Convert to int
hardbounce_2['monthsSinceRegDate'] = hardbounce_2['monthsSinceRegDate'].astype(int)
# Describe monthsSinceRegDate
hardbounce_2['monthsSinceRegDate'].describe()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment