Last active
December 7, 2023 09:15
-
-
Save emanuelhuber/fbe7799b1dcfae0cbb997508af283ede to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if(!require("openxlsx")) install.packages("openxlsx") | |
library("openxlsx") | |
# this function open an explorer window. | |
# select the file you want to split | |
# the file is split into files according to the values | |
# in the column "Projektleitung" | |
splitExcelFile <- function(fpath = NULL, fpathold = TRUE, startRow = 1){ | |
# where the file starts | |
# startRow = 3 | |
if(is.null(fpath)){ | |
fpath <- file.choose() | |
}else{ | |
if(!file.exists(fpath)) stop("error: no '", fpath, "' file found!") | |
} | |
if(!is.null(fpathold)){ | |
if(isTRUE(fpathold)){ | |
fpathold <- file.choose() | |
}else{ | |
if(!file.exists(fpathold)) stop("error: no '", fpathold, "' file found!") | |
} | |
message("I extract data from ", fpathold) | |
fpath <- recoverInfo(fpath = fpath, fpathold = fpathold, startRow = startRow) | |
} | |
# | |
# filename without extension | |
fname_wext <- gsub(".xlsx", "", basename(fpath)) | |
dirpath <- dirname(fpath) | |
x <- read.xlsx(fpath, sheet = 1, startRow = startRow) | |
wb <- loadWorkbook(fpath) | |
x$Projektleitung[is.na(x$Projektleitung)] <- "Nobody" | |
xProj <- x$Projektleitung | |
xProj[length(xProj)] <- NA | |
xlist <- split(x, xProj) | |
for(i in seq_along(xlist)){ | |
PL <- xlist[[i]]$Projektleitung[1] | |
PL2 <- trimws(gsub(" ", "_", PL)) | |
message("PL: ", PL) | |
deleteData(wb, sheet = 1, cols= 1:ncol(x), rows = 1:(nrow(x)+5) + startRow , gridExpand = TRUE) | |
writeData(wb, sheet = 1, x = xlist[[i]], startRow = startRow + 1, startCol = 1, colNames = FALSE) | |
saveWorkbook(wb, | |
file = file.path(dirpath, paste0(fname_wext, "_", PL2, ".xlsx")), | |
overwrite = TRUE) | |
} | |
} | |
splitExcelFileBatch <- function(){ | |
fpaths <- list.files() | |
k <- grep("(\\.xlsx)$", fpaths) | |
if(length(k) == 0){ | |
stop("error: no '.xlsx' file found!") | |
}else if(length(k) > 1){ | |
# fDir <- dirname(fpaths[[1]]) | |
# fnames <- basename(fpaths[k]) | |
# fnames_lmin <- min(unlist(sapply(fnames, nchar, USE.NAMES = FALSE))) | |
# tst <- sapply(fnames, function(x, k){unlist(strsplit(x,""))[1:k]}, fnames_lmin, USE.NAMES = FALSE) | |
# i <- which(sapply(apply(tst, 1, unique), length) > 1)[1] | |
# if(length(i) > 0 && i > 1) i <- i -1 | |
# if(length(i) == 0) i <- nrow(tst) | |
# fname_combines0 <- paste0(tst[1:i, 1], collapse ="") | |
# fname_combines <- gsub("_$", "", fname_combines0) | |
# ftarget <- paste0(fname_combines, ".xlsx") | |
# message("There is more than one '.xlsx' file.") | |
# message("I will split this one: '", ftarget,"'") | |
# splitExcelFile(ftarget) | |
stop("There is more than one '.xslx' file! Please delete the other files...") | |
}else{ | |
splitExcelFile(fpaths[k]) | |
} | |
} | |
recombineExcelFilesBatch <- function(){ | |
fpaths <- list.files() | |
k <- grepl("(\\.xlsx)$", fpaths) & !grepl("^(~)", fpaths) & !grepl("(recombined)", fpaths) | |
recombineExcelFiles(fpaths[k]) | |
} | |
recombineExcelFiles <- function(fPaths = NULL, startRow = 1){ | |
# startRow = 3 | |
# select the excel files | |
if(is.null(fPaths)){ | |
fPaths <- choose.files() | |
}else{ | |
if(!all(sapply(fPaths, file.exists))) stop("Not all files exist") | |
} | |
fDir <- dirname(fPaths[[1]]) | |
fnames <- basename(fPaths) | |
fnames_lmin <- min(unlist(sapply(fnames, nchar, USE.NAMES = FALSE))) | |
tst <- sapply(fnames, function(x, k){unlist(strsplit(x,""))[1:k]}, fnames_lmin, USE.NAMES = FALSE) | |
i <- which(sapply(apply(tst, 1, unique), length) > 1)[1] | |
if(length(i) > 0 && i > 1) i <- i -1 | |
if(length(i) == 0) i <- nrow(tst) | |
fname_combines0 <- paste0(tst[1:i, 1], collapse ="") | |
fname_combines <- gsub("_$", "", fname_combines0) | |
ftarget <- paste0(fname_combines, ".xlsx") | |
if(!file.exists( file.path(fDir,ftarget)) ){ | |
lf <- list.files(fDir) | |
j <- which.min(adist(lf, ftarget)) | |
warning("I could not find the file '", ftarget, "'!!!\n", | |
"I will try with this file: '", lf[j], "'... wish me good luck!") | |
ftarget <- lf[j] | |
}else{ | |
message("Target file = '", ftarget, "'") | |
} | |
fPaths <- fPaths[basename(fPaths) != ftarget] | |
x <- read.xlsx(file.path(fDir, ftarget), sheet = 1, startRow = startRow) | |
x$Projektleitung[is.na(x$Projektleitung) & seq_along(x$Projektleitung) < length(x$Projektleitung)] <- "Nobody" | |
xorder <- order(x$Projektleitung[!is.na(x$Projektleitung)]) | |
PLlist <- unique(x$Projektleitung) | |
PLlist <- PLlist[!is.na(PLlist)] | |
PLlist <- sapply(PLlist, trimws, USE.NAMES = FALSE) | |
wb <- loadWorkbook(file.path(fDir, ftarget)) | |
Y <- list() | |
PLflist <- character(length(fPaths)) | |
message("PL files to recombine:") | |
for(i in seq_along(fPaths)){ | |
PLf <- gsub(fname_combines0, "", basename(fPaths[[i]])) | |
PLf <- gsub(".xlsx", "", gsub(fname_combines0, "", basename(fPaths[[i]]))) | |
PLf <- gsub("_", " ", PLf) | |
PLf <- trimws(PLf) | |
if(PLf != ""){ | |
message(" - '", PLf, "'") | |
Y[[i]] <- read.xlsx(fPaths[[i]], sheet = 1, startRow = startRow) | |
} | |
} | |
Yall <- do.call(rbind, Y) | |
Yall_sub <- Yall#[order(xorder), ncol(Yall) - 3:0] | |
# writeData(wb, sheet = 1, x = Yall_sub, startRow = 4, startCol = ncol(Yall) - 3, colNames = FALSE) | |
writeData(wb, sheet = 1, x = Yall_sub, startRow = startRow+1, startCol = 1, colNames = FALSE) | |
saveWorkbook(wb, | |
file = file.path(fDir, paste0(fname_combines, "_recombined", ".xlsx")), | |
overwrite = TRUE) | |
message("Recombined excel file:\n'", paste0(fname_combines, "_recombined", ".xlsx"),"'") | |
} | |
recoverInfo <- function(fpath = NULL, fpathold = NULL, startRow){ | |
# fpathold <- "2023_12b/WIP_Hydrogeo_2023_12_recombined.xlsx" | |
# fpath <- "2023_12b/WIP_Hydrogeo_2023_12b.xlsx" | |
fname_wext <- gsub(".xlsx", "", basename(fpath)) | |
dirpath <- dirname(fpath) | |
wb <- loadWorkbook(fpath) | |
xold <- read.xlsx(fpathold, sheet = 1, startRow = startRow) | |
x <- read.xlsx(fpath, sheet = 1, startRow = startRow) | |
names(xold) | |
xold["Nr."] | |
for(i in 1:nrow(xold)){ | |
if(!is.na(xold[i, "Nr."])){ | |
tst <- xold[i, "Nr."] == x["Nr."] | |
if( sum(tst) > 0 ){ | |
k <- which(tst) | |
x[k, 10:12] <- xold[i, 10:12] | |
writeData(wb, sheet = 1, x = xold[i, 10:12], startRow = startRow+k, startCol = 10, colNames = FALSE) | |
} | |
} | |
} | |
saveWorkbook(wb, | |
file = file.path(dirpath, paste0(fname_wext, "_mod", ".xlsx")), | |
overwrite = TRUE) | |
return(file.path(dirpath, paste0(fname_wext, "_mod", ".xlsx"))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment