Skip to content

Instantly share code, notes, and snippets.

@MattSandy
Last active December 3, 2016 20:27
Show Gist options
  • Save MattSandy/9112e88b8a7f103233472f5e05d73415 to your computer and use it in GitHub Desktop.
Save MattSandy/9112e88b8a7f103233472f5e05d73415 to your computer and use it in GitHub Desktop.
Update and Append Multiple Files in R
setwd("~/R/Merge Stuff")
install.packages("openxlsx")
library("openxlsx")
file <- list()
#base file is the original you are working from
#update file is the file with new information which updates base cells
#error file contains information in new columns which are appended
file$base <- read.xlsx("base.xlsx")
file$update <- read.xlsx("update.xlsx")
file$error <- read.xlsx("error.xlsx")
#set up empty columns for columns in update file which aren't in base file
for(column in names(file$update)[!names(file$update) %in% names(file$base)]) {
print(column)
file$base[,column] <- NA
}
#set up empty columns for columns in error file which aren't in base file
for(column in names(file$error)[!names(file$error) %in% names(file$base)]) {
print(column)
file$base[,column] <- NA
}
#loops through file$base looking for matches by FIRSTNAME and LASTNAME in both datasets
for(i in 1:nrow(file$base)) {
#look for rows in the update dataset which match current looped row in base dataset
match <- file$update[which(file$update$FIRSTNAME==file$base[i,"FIRSTNAME"] & file$update$LASTNAME==file$base[i,"LASTNAME"]),]
#more than one row returned in the update file
if(nrow(match)>1) {
print("Collision in file$update")
#print the row which caused collision
print(file$base[i,])
} else if(nrow(match)==1) {
#update base dataset with column info from update dataset for current row
#uses all column names from update dataset
file$base[i,names(file$update)] <- match[1,]
}
#look for rows in the error dataset which match current looped row in base dataset
match <- file$error[which(file$error$FIRSTNAME==file$base[i,"FIRSTNAME"] & file$error$LASTNAME==file$base[i,"LASTNAME"]),]
#more than one row returned
if(nrow(match)>1) {
print("Collision in file$error")
#print the row which caused collision
print(file$base[i,])
} else if(nrow(match)==1) {
#append column information from error dataset to base dataset for current row
#uses all column names from error dataset which aren't in the update dataset
file$base[i,names(file$error)[!names(file$error) %in% names(file$update)]] <- match[1,names(file$error)[!names(file$error) %in% names(file$update)]]
}
}
#export modified base file by creating excel worksheet inside a workbook
wb <- createWorkbook()
addWorksheet(wb, "Export")
writeData(wb, 1, file$base)
saveWorkbook(wb, file = "./export.xlsx", overwrite = TRUE)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment