Skip to content

Instantly share code, notes, and snippets.

@markheckmann
Last active February 4, 2022 05:27

Revisions

  1. markheckmann revised this gist May 5, 2018. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion remove_password_excel.R
    Original file line number Diff line number Diff line change
    @@ -38,7 +38,7 @@ for (ws in worksheet_paths) {
    writeLines(out, ws)
    }

    # create a new zip file from the unzipped and modified XML files from the Excel file
    # create a new zip, i.e. Excel file, containing the modified XML files
    old_wd <- setwd(temp_dir)
    f <- list.files(recursive = T, full.names = F, all.files = T, no..=T)
    # as the Excel file is a zip file, we can directly replace the .zip extension by .xlsx
  2. markheckmann revised this gist May 5, 2018. 1 changed file with 1 addition and 0 deletions.
    1 change: 1 addition & 0 deletions remove_password_excel.R
    Original file line number Diff line number Diff line change
    @@ -29,6 +29,7 @@ worksheet_paths <- list.files(
    pattern = ".xml")

    # remove the XML node which contains the sheet protection
    # We might of course use e.g. xml2 to parse the XML file, but this simple approach will suffice here
    for (ws in worksheet_paths) {
    x <- readLines(ws, encoding = "windows1")
    # the "sheetProtection" node contains the hashed password "<sheetProtection SOME INFO />"
  3. markheckmann created this gist May 5, 2018.
    48 changes: 48 additions & 0 deletions remove_password_excel.R
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,48 @@
    # remove sheet protection in Excel
    # Sample file: https://www.dropbox.com/s/4ul0kowrscyr8cz/excel_protected.xlsx?dl=0

    library(stringr)
    library(zip)

    # file with protected sheets
    file <- "data/excel_protected.xlsx"

    # file name and path after removing protection
    file_unlocked <- str_replace(basename(file), ".xlsx$", "_unlocked.xlsx")
    file_unlocked_path <- file.path(getwd(), "data", file_unlocked)

    # create temporary directory in project folder
    # so we see what is going on
    temp_dir <- "_tmp"

    # remove and recreate _tmp folder in case it already exists
    unlink(temp_dir, recursive = T)
    dir.create(temp_dir)

    # unzip Excel file into temp folder
    unzip(file, exdir = temp_dir)

    # get full path to XML files for all worksheets
    worksheet_paths <- list.files(
    paste0(temp_dir, "/xl/worksheets"),
    full.name = TRUE,
    pattern = ".xml")

    # remove the XML node which contains the sheet protection
    for (ws in worksheet_paths) {
    x <- readLines(ws, encoding = "windows1")
    # the "sheetProtection" node contains the hashed password "<sheetProtection SOME INFO />"
    # we simply remove the whole node
    out <- str_replace(x, "<sheetProtection.*?/>", "")
    writeLines(out, ws)
    }

    # create a new zip file from the unzipped and modified XML files from the Excel file
    old_wd <- setwd(temp_dir)
    f <- list.files(recursive = T, full.names = F, all.files = T, no..=T)
    # as the Excel file is a zip file, we can directly replace the .zip extension by .xlsx
    zip::zip(file_unlocked_path, files = f) # utils::zip does not work for some reason
    setwd(old_wd)

    # clean up and remove temporary directory
    unlink(temp_dir, recursive = T)