Last active
December 30, 2020 22:07
-
-
Save justinhchae/5c39015aa4192ecf37be297d7d089ec2 to your computer and use it in GitHub Desktop.
impute dates with iterative solution and change log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# same iterrows() solution with change log | |
curr_year = 2020 | |
past_year = 2010 | |
change_log = [] | |
# loop through index and rows of df | |
for idx, row in df.iterrows(): | |
if row[col1].year > curr_year: | |
new_year = row[col2].year | |
# save reference data as tuple | |
records = tuple((row['case_id'] | |
, row['event_id'] | |
, row[col1].year | |
, new_year)) | |
# append to list | |
change_log.append(records) | |
df.loc[idx, col1] = row[col1].replace(year=new_year) | |
if row[col1].year < past_year: | |
new_year = row[col2].year | |
# save reference data as tuple | |
records = tuple((row['case_id'] | |
, row['event_id'] | |
, row[col1].year | |
, new_year)) | |
# append to list | |
change_log.append(records) | |
df.loc[idx, col1] = row[col1].replace(year=new_year) | |
# some new column names | |
old_date = str('old_' + date_type) | |
new_date = str('new_' + date_type) | |
# a new DataFrame from list of tuples | |
log = pd.DataFrame.from_records(change_log, | |
columns=['case_id', | |
'event_id', | |
old_date, | |
new_date]) | |
# traceable record of changes to csv | |
log.to_csv('change_log.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment