Skip to content

Instantly share code, notes, and snippets.

@justinhchae
justinhchae / impute_dates.py
Last active January 3, 2021 20:22
impute_dates
import pandas as pd
col1 = 'event_date'
col_new = str(col1 + '_new')
col2 = 'received_date'
curr_year = 2020
past_year = 2010
# given a dataframe df with col1 and col2 as datetime columns
@justinhchae
justinhchae / impute_dates_iteration.py
Created December 30, 2020 22:01
impute with iteration
# iterate through a DataFrame with iterrows()
curr_year = 2020
past_year = 2010
# loop through index and rows of df
for idx, row in df.iterrows():
# compare years as integers
if row[col1].year > curr_year:
# return received_date's year as integer
new_year = row[col2].year
@justinhchae
justinhchae / impute_dates_iter_change_log.py
Last active December 30, 2020 22:07
impute dates with iterative solution and change log
# same iterrows() solution with change log
curr_year = 2020
past_year = 2010
change_log = []
# loop through index and rows of df
for idx, row in df.iterrows():
if row[col1].year > curr_year:
new_year = row[col2].year
# save reference data as tuple
records = tuple((row['case_id']
@justinhchae
justinhchae / impute_lambdas_changelog.py
Last active January 1, 2021 02:17
impute dates with apply and lambdas with change log
# store the lambda function as an object
impute = lambda x: x[col1].replace(year=x[col2].year) if x[col1].year > curr_year \
else x[col1].replace(year=x[col2].year) if x[col1].year < past_year \
else x[col1]
# simplify the code later by calling impute
df[col_new] = df.apply(impute, axis=1)
# a new dataframe called change_log
change_log = df[(df[col1].dt.year > curr_year)]
@justinhchae
justinhchae / lambda_pandas_two_conditionals.py
Last active January 3, 2021 20:24
Lambda function with two conditional statements
# lambda with two conditional statements
df[col_new] = df.apply(lambda x: x[col1].replace(year=x[col2].year) if x[col1].year > curr_year
else x[col1].replace(year=x[col2].year) if x[col1].year < past_year
else x[col1]
, axis=1)
@justinhchae
justinhchae / lambda_pandas_one_conditiona.py
Last active January 1, 2021 02:15
Lambdas on Pandas DF with apply and one conditional
# replace col1 year with col2 year on a condition
# if does not meet condition, use the original col1 value
df[col_new] = df.apply(lambda x: x[col1].replace(year=x[col2].year)
if x[col1].year > curr_year else x[col1]
, axis=1)
# filter df where year is greater than current year
df = df[(df[col1].dt.year > curr_year)]
print(df[[col1, col_new, col2]].head(2))
# output
@justinhchae
justinhchae / simple_lambdas_ondf.py
Created December 30, 2020 22:14
A simple, do nothing lambdas function on a DataFrame
# copy col2 value into a new column
col_new = str(col1 + '_new')
df[col_new] = df.apply(lambda x: x[col2] , axis=1)
print(df[[col1, col_new, col2]].head(2))
# output
"""
event_date event_date_new received_date
2011-01-05 2011-01-31 2011-01-31
@justinhchae
justinhchae / simple_lambdas_example.py
Last active January 1, 2021 02:15
Another example of a lambdas that changes all values
# replace all values with the same thing
df[col1] = df[col1].apply(lambda x: x.replace(year=2109, month=1, day=1))
print(df[col1].head(2))
# output
"""
event_date
2109-01-01
2109-01-01
data = {'dates':
['2012-05-04',
'2012-05-04',
'2012-06-04',
'2012-08-08'],
'types':
['a',
'a',
'z',
'z',],
freq='M'
# or 'D' or 'Y'
df = df[['dates', 'types']].groupby([pd.Grouper(key='dates', freq=freq)]).agg('count').reset_index()
"""
dates count
2 2012-07-31 0
1 2012-06-30 1
3 2012-08-31 1
0 2012-05-31 2