Skip to content

Instantly share code, notes, and snippets.

@ateucher
Last active May 19, 2022 23:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ateucher/a60e539f70bdaff2e13362fda4ec4deb to your computer and use it in GitHub Desktop.
Save ateucher/a60e539f70bdaff2e13362fda4ec4deb to your computer and use it in GitHub Desktop.
Extract notes from a pptx slide and export to markdown or docx
extract_pptx_notes <- function(pp_file, format = c("md", "docx")) {
if (!requireNamespace("officer", quietly = TRUE))
stop("pacakge 'officer' required.")
if (!requireNamespace("xml2", quietly = TRUE))
stop("pacakge 'xml2' required.")
if (!requireNamespace("rmarkdown", quietly = TRUE))
stop("pacakge 'rmarkdown' required.")
if (!requireNamespace("glue", quietly = TRUE))
stop("pacakge 'glue' required.")
format <- match.arg(format)
pp <- read_pptx(pp_file)
file_sans_ext <- tools::file_path_sans_ext(pp_file)
slides_with_notes_meta <- pp$notesSlide$get_metadata()
slides_with_notes_meta$notes <- as.numeric(gsub("notesSlide(\\d{1,3}).*", "\\1", rownames(slides_with_notes_meta)))
slides_with_notes_meta <- slides_with_notes_meta[!grepl("notesMaster", slides_with_notes_meta$target), ]
slides_with_notes_meta$slide <- as.numeric(gsub(".+slide(\\d{1,3})\\.xml", "\\1", slides_with_notes_meta$target))
if (!nrow(slides_with_notes_meta)) stop("No notes in this presentation")
slide_nums <- seq(1, max(slides_with_notes_meta$slide))
notes <- lapply(slide_nums, \(x) {
notes_slide <- slides_with_notes_meta[slides_with_notes_meta$slide == x, "notes"]
if (!length(notes_slide)) return(character(0))
slide <- pp$notesSlide$get_slide(notes_slide)
xml <- slide$get()
# xpath search from here: https://robaboukhalil.medium.com/your-slide-deck-is-a-zip-file-in-disguise-36bb14f11c0b
xpath <- "//*[local-name()='txBody']/*[local-name()='p']/*[local-name()='r']/*[local-name()='t']/text()"
node <- xml_find_all(xml, xpath)
as.character(node)
})
names(notes) <- paste("Slide", slide_nums)
out <- paste0(file_sans_ext, "_notes.md")
if (file.exists(out)) {
overwrite <- askYesNo(glue("File {out} already exists. Overwrite?"))
if (!overwrite) stop("Quitting", call. = FALSE)
file.remove(out)
}
cat(glue('---
title: "{basename(file_sans_ext)}"
output: word_document
date: "{Sys.Date()}"
---\n\n\n'), file = out)
for (n in names(notes)) {
cat(paste0("## ", n, ":\n\n"), file = out, append = TRUE)
if (length(notes[[n]])) {
cat(notes[[n]], file = out, sep = "\n\n", append = TRUE)
cat("\n", file = out, append = TRUE)
}
}
if (format == "docx") {
out_docx <- paste0(file_sans_ext, "_notes.docx")
return(rmarkdown::render(out, output_file = out_docx))
}
out
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment