Skip to content

Instantly share code, notes, and snippets.

@emanuelhuber
Last active December 7, 2023 09:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save emanuelhuber/fbe7799b1dcfae0cbb997508af283ede to your computer and use it in GitHub Desktop.
Save emanuelhuber/fbe7799b1dcfae0cbb997508af283ede to your computer and use it in GitHub Desktop.
if(!require("openxlsx")) install.packages("openxlsx")
library("openxlsx")
# this function open an explorer window.
# select the file you want to split
# the file is split into files according to the values
# in the column "Projektleitung"
splitExcelFile <- function(fpath = NULL, fpathold = TRUE, startRow = 1){
# where the file starts
# startRow = 3
if(is.null(fpath)){
fpath <- file.choose()
}else{
if(!file.exists(fpath)) stop("error: no '", fpath, "' file found!")
}
if(!is.null(fpathold)){
if(isTRUE(fpathold)){
fpathold <- file.choose()
}else{
if(!file.exists(fpathold)) stop("error: no '", fpathold, "' file found!")
}
message("I extract data from ", fpathold)
fpath <- recoverInfo(fpath = fpath, fpathold = fpathold, startRow = startRow)
}
#
# filename without extension
fname_wext <- gsub(".xlsx", "", basename(fpath))
dirpath <- dirname(fpath)
x <- read.xlsx(fpath, sheet = 1, startRow = startRow)
wb <- loadWorkbook(fpath)
x$Projektleitung[is.na(x$Projektleitung)] <- "Nobody"
xProj <- x$Projektleitung
xProj[length(xProj)] <- NA
xlist <- split(x, xProj)
for(i in seq_along(xlist)){
PL <- xlist[[i]]$Projektleitung[1]
PL2 <- trimws(gsub(" ", "_", PL))
message("PL: ", PL)
deleteData(wb, sheet = 1, cols= 1:ncol(x), rows = 1:(nrow(x)+5) + startRow , gridExpand = TRUE)
writeData(wb, sheet = 1, x = xlist[[i]], startRow = startRow + 1, startCol = 1, colNames = FALSE)
saveWorkbook(wb,
file = file.path(dirpath, paste0(fname_wext, "_", PL2, ".xlsx")),
overwrite = TRUE)
}
}
splitExcelFileBatch <- function(){
fpaths <- list.files()
k <- grep("(\\.xlsx)$", fpaths)
if(length(k) == 0){
stop("error: no '.xlsx' file found!")
}else if(length(k) > 1){
# fDir <- dirname(fpaths[[1]])
# fnames <- basename(fpaths[k])
# fnames_lmin <- min(unlist(sapply(fnames, nchar, USE.NAMES = FALSE)))
# tst <- sapply(fnames, function(x, k){unlist(strsplit(x,""))[1:k]}, fnames_lmin, USE.NAMES = FALSE)
# i <- which(sapply(apply(tst, 1, unique), length) > 1)[1]
# if(length(i) > 0 && i > 1) i <- i -1
# if(length(i) == 0) i <- nrow(tst)
# fname_combines0 <- paste0(tst[1:i, 1], collapse ="")
# fname_combines <- gsub("_$", "", fname_combines0)
# ftarget <- paste0(fname_combines, ".xlsx")
# message("There is more than one '.xlsx' file.")
# message("I will split this one: '", ftarget,"'")
# splitExcelFile(ftarget)
stop("There is more than one '.xslx' file! Please delete the other files...")
}else{
splitExcelFile(fpaths[k])
}
}
recombineExcelFilesBatch <- function(){
fpaths <- list.files()
k <- grepl("(\\.xlsx)$", fpaths) & !grepl("^(~)", fpaths) & !grepl("(recombined)", fpaths)
recombineExcelFiles(fpaths[k])
}
recombineExcelFiles <- function(fPaths = NULL, startRow = 1){
# startRow = 3
# select the excel files
if(is.null(fPaths)){
fPaths <- choose.files()
}else{
if(!all(sapply(fPaths, file.exists))) stop("Not all files exist")
}
fDir <- dirname(fPaths[[1]])
fnames <- basename(fPaths)
fnames_lmin <- min(unlist(sapply(fnames, nchar, USE.NAMES = FALSE)))
tst <- sapply(fnames, function(x, k){unlist(strsplit(x,""))[1:k]}, fnames_lmin, USE.NAMES = FALSE)
i <- which(sapply(apply(tst, 1, unique), length) > 1)[1]
if(length(i) > 0 && i > 1) i <- i -1
if(length(i) == 0) i <- nrow(tst)
fname_combines0 <- paste0(tst[1:i, 1], collapse ="")
fname_combines <- gsub("_$", "", fname_combines0)
ftarget <- paste0(fname_combines, ".xlsx")
if(!file.exists( file.path(fDir,ftarget)) ){
lf <- list.files(fDir)
j <- which.min(adist(lf, ftarget))
warning("I could not find the file '", ftarget, "'!!!\n",
"I will try with this file: '", lf[j], "'... wish me good luck!")
ftarget <- lf[j]
}else{
message("Target file = '", ftarget, "'")
}
fPaths <- fPaths[basename(fPaths) != ftarget]
x <- read.xlsx(file.path(fDir, ftarget), sheet = 1, startRow = startRow)
x$Projektleitung[is.na(x$Projektleitung) & seq_along(x$Projektleitung) < length(x$Projektleitung)] <- "Nobody"
xorder <- order(x$Projektleitung[!is.na(x$Projektleitung)])
PLlist <- unique(x$Projektleitung)
PLlist <- PLlist[!is.na(PLlist)]
PLlist <- sapply(PLlist, trimws, USE.NAMES = FALSE)
wb <- loadWorkbook(file.path(fDir, ftarget))
Y <- list()
PLflist <- character(length(fPaths))
message("PL files to recombine:")
for(i in seq_along(fPaths)){
PLf <- gsub(fname_combines0, "", basename(fPaths[[i]]))
PLf <- gsub(".xlsx", "", gsub(fname_combines0, "", basename(fPaths[[i]])))
PLf <- gsub("_", " ", PLf)
PLf <- trimws(PLf)
if(PLf != ""){
message(" - '", PLf, "'")
Y[[i]] <- read.xlsx(fPaths[[i]], sheet = 1, startRow = startRow)
}
}
Yall <- do.call(rbind, Y)
Yall_sub <- Yall#[order(xorder), ncol(Yall) - 3:0]
# writeData(wb, sheet = 1, x = Yall_sub, startRow = 4, startCol = ncol(Yall) - 3, colNames = FALSE)
writeData(wb, sheet = 1, x = Yall_sub, startRow = startRow+1, startCol = 1, colNames = FALSE)
saveWorkbook(wb,
file = file.path(fDir, paste0(fname_combines, "_recombined", ".xlsx")),
overwrite = TRUE)
message("Recombined excel file:\n'", paste0(fname_combines, "_recombined", ".xlsx"),"'")
}
recoverInfo <- function(fpath = NULL, fpathold = NULL, startRow){
# fpathold <- "2023_12b/WIP_Hydrogeo_2023_12_recombined.xlsx"
# fpath <- "2023_12b/WIP_Hydrogeo_2023_12b.xlsx"
fname_wext <- gsub(".xlsx", "", basename(fpath))
dirpath <- dirname(fpath)
wb <- loadWorkbook(fpath)
xold <- read.xlsx(fpathold, sheet = 1, startRow = startRow)
x <- read.xlsx(fpath, sheet = 1, startRow = startRow)
names(xold)
xold["Nr."]
for(i in 1:nrow(xold)){
if(!is.na(xold[i, "Nr."])){
tst <- xold[i, "Nr."] == x["Nr."]
if( sum(tst) > 0 ){
k <- which(tst)
x[k, 10:12] <- xold[i, 10:12]
writeData(wb, sheet = 1, x = xold[i, 10:12], startRow = startRow+k, startCol = 10, colNames = FALSE)
}
}
}
saveWorkbook(wb,
file = file.path(dirpath, paste0(fname_wext, "_mod", ".xlsx")),
overwrite = TRUE)
return(file.path(dirpath, paste0(fname_wext, "_mod", ".xlsx")))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment