Skip to content

Instantly share code, notes, and snippets.

@pipitone
Last active August 29, 2015 14:16
Show Gist options
  • Save pipitone/0228a24a400760eed2f0 to your computer and use it in GitHub Desktop.
Save pipitone/0228a24a400760eed2f0 to your computer and use it in GitHub Desktop.
It merges stuff. So well.
#
#
# ````````````
# ```.......................```
# ```..................................``
# `...........................................``
# `.................................................``
# ``......................................................`
# `............................................................`
# `................................................................`
# `...................................................................`
# `....---:---....................................--::---................`
# `..-/oydmmmmmdy+:-...........................-:+yhdmmmmmdyo/-..............
# `..+dNNmyso+osdMMNh+-.......................-odNNmhso++oshNMNms:.............`
# ..-yNMh:` `+dMMMMMd/....................-omMms:. .sNMMMMNy:............`
# ..-hMN+` +MMMMMMMMNs-.................:hMNs. `hMMMMMMMMm/............`
# `..yMN+ sMMMMMMMMMMo................:dMm/ `mMMMMMMMMMN/............`
# `-./NMy -dMMMMMMNmMN/...............hMN/ +NMMMMMMmmMm:............
# ...hMN- `/yhdho-:NMy............../NMy -oyddy+./MMs............`
# `..-mMm` `dMm-.............sMM: `mMd-............
# ...-mMd` `hMN-.............yMN: `dMm-.............
# `....hMN. `dMm-.............oMM/ .mMd..............
# .....oMMy++++++++++++++++++sMMy..............:NMd++++++++++++++++++++++sMMo..............`
# .....-hNNNNNNNNNNNNNNNNNNNNNNm:...............sNNNNNNNNNNNNNNNNNNNNNNNNNNd-...............
# -.....------------------------.................---------------------------...............-
# -........................................................................................-
# -........................................................................................-
# ........----------------------------------------------------------------..................
# .......-dNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNs................`
# `.......osshMMNmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmmNMN/..............-
# -.........sMMdhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdMMh...............
# ........../NMNhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhNMN:............-`
# ..........yMMdhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhmMM/.............
# `.........:NMNhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhmMN/............
# `.........+NMNhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhNMm-...........`
# ..........oNMmhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhmMMy...........`
# `.........oNMNhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhdNMm:..........`
# `.........oNMNdhhhhhhhhhhhhhhhhhhhhhyso+/::--:::/+syhhhhhhhhhhdNMm/..........`
# `.........+mMNdhhhhhhhhhhhhhhhhhhs/-..............-:oyhhhhhhmMMd/..........
# .........:hMMmdhhhhhhhhhhhhhho:....................-+yhhdNMNy:.........`
# `........-+mMMmdhhhhhhhhhhy:......................../dNMMd/..........
# `........-smMNmdhhhhhhhs-......................-+hNMNh+-.........`
# `........:omMMNmdhhhy-...................-:ohmMNms/-.........`
# `........-+ymNMNmms--............--:/oydNMNmy/-.........``
# `........-/shmNNNmhyysoooossyhdmNNNmdyo:-..........`
# `.........-:+oyhdmmmNNNmmddhso+:-...........``
# ``.............-------...............``
# ``.........................```
# ``````````````
#
#
# Usage: datmerge("stuff.xls")
# It merges all the sheets in the workbook by the first column of each sheet.
# and it gives every column in each sheet a prefix similar to the name of the sheet.
# oh yeah.
datmerge <- function(xlsfile) {
library(XLConnect)
wb <- loadWorkbook(xlsfile, create=FALSE)
# load workbook sheets into a list of named dataframes
# names are the sheet names
data = list()
sheet_names = getSheets(wb)
for (sheet_name in sheet_names) {
data[[sheet_name]] = readWorksheet(wb, sheet=sheet_name)
}
# dat merge
# merge incrementally. Start with the first sheet.
sheet = sheet_names[1]
merged = data[[sheet]]
merge_id = colnames(merged)[1] # usually, ParticipantID
pre = gsub("\\W", "", sheet) # column name prefix is the sheet name without any non-word characters
colnames(merged) = c(merge_id, paste(pre,"_",colnames(merged)[2:length(colnames(merged))], sep=""))
# now, loop over the rest of the sheets and merge them in by merge_id, renaming columns
# as we go.
for (sheet in sheet_names[2:length(sheet_names)]) {
d = data[[sheet]]
pre = gsub("\\W", "", sheet)
colnames(d) = c(merge_id, paste(pre, "_", colnames(d)[2:length(colnames(d))], sep=""))
merged = merge(merged,d, by = merge_id, all = T)
}
# dat clean
# drop all columns that are all na
cleaned = merged[,colSums(is.na(merged)) != nrow(merged)]
return(cleaned)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment