Skip to content

Instantly share code, notes, and snippets.

@h-a-graham
Last active January 5, 2023 11:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save h-a-graham/27f3fceca4616cd54809dd3c28b8689b to your computer and use it in GitHub Desktop.
Save h-a-graham/27f3fceca4616cd54809dd3c28b8689b to your computer and use it in GitHub Desktop.
Download a Google Drive Directory
#' download drive files from dribble with path column
#'
#' @param x a dribble with path column
#' @param .overwrite logical. Should files be overwritten
#'
#' @return The original input dribble
#' @noRd
drive_down_files <- function(x, .overwrite = TRUE) {
.x <- x[, !(names(x) == "drive_resource")]
purrr::pwalk(.x, function(...){
drib <- list(...)
.dir <- dirname(drib$path)
if (!dir.exists(.dir)) dir.create(.dir, recursive = TRUE)
googledrive::drive_download(drib$id, drib$path, overwrite = .overwrite)
})
return(x)
}
#' Search files and folder in drive
#'
#' retain file paths as a column in a dribble. Recusrively search directory for
#' files and retain their folder structure.
#'
#' @param drive.parent The parent drive from which to search for files
#'
#' @return
#' @noRd
split_files_n_folds <- function(drive.parent){
dr_fold <- googledrive::drive_ls(drive.parent, type='folder')
dr_all <- googledrive::drive_ls(drive.parent, type=NULL)
files <- generics::setdiff(dr_all, dr_fold)
files$path <- file.path(drive.parent, files$name)
if (nrow(dr_fold)>0){
p.list <- file.path(drive.parent, dr_fold$name)
drib.deep <- p.list |>
purrr::map(~split_files_n_folds(.x))
}
if (exists("drib.deep")) {
.f <- do.call("rbind", c(list(files),drib.deep))
} else {
.f <- files
}
return(.f)
}
#' Download a drive directory
#'
#' This function downloads the entire contents of a google drive direcory,
#' whilst maintaining the folder structure.
#'
#' @param drive.folder Something that identifies the folder of interest on your
#' Google Drive. Can be a name or path, a file id or URL marked with as_id(),
#' or a dribble.
#' @param dest.folder default is `here::here()`. The folder in which to save the
#' folder from google drive.
#' @param fileext default NULL. Can be any file extension - if given, only files
#' with this extension will be downloaded.
#' @param .overwrite logical, should files be overwritten. If a file exists an error is thrown.
#'
#' @return A dribble containing all files downloaded including a path column
#' detailing the location of the downloaded file.
#' @export
#'
#' @examples
drive_download_dir <- function(drive.folder, dest.folder=getwd(),
fileext=NULL, .overwrite = TRUE){
if(!googledrive::is_folder(
suppressMessages(googledrive::drive_get(drive.folder)))){
stop("A Google Drive folder for drive.folder is required.")
}
.parent <- googledrive::drive_get(drive.folder)$name
file.drib <- split_files_n_folds(.parent)
file.drib$path <- file.path(dest.folder, file.drib$path)
if (!is.null(fileext)){
file.drib <- file.drib[grepl(fileext, file.drib$name),]
}
if (nrow(file.drib)<1) stop("There are no files to download!")
drive_down_files(file.drib, .overwrite=.overwrite)
}
library(googledrive)
library(fs)
source("drive_downloaders.R")

d <- suppressMessages(
  drive_download_dir(drive.folder="Drive-DL-testing")
  )

print(d)
#> # A dribble: 12 × 4
#>    name   id                                drive_resource    path              
#>    <chr>  <drv_id>                          <list>            <chr>             
#>  1 t3.R   1bfyrf9WslgEt7j-MMQwCJx9xlt6d5OfE <named list [42]> /tmp/RtmplJQIcX/r…
#>  2 t2.txt 1XuYOqsJ8XaxhnEF4L_1LJ8p_gK3Pexof <named list [43]> /tmp/RtmplJQIcX/r…
#>  3 t1.md  14SgS8jVuxdIr8hhrGc9FOE8giCuoSxQN <named list [42]> /tmp/RtmplJQIcX/r…
#>  4 t2.txt 1IdR7_W5msi4MqV1vYAILjwgU4kYUW2m- <named list [43]> /tmp/RtmplJQIcX/r…
#>  5 t3.R   1shqI-kkjGILqH-fiG2_PkI2sY6WLD_kH <named list [42]> /tmp/RtmplJQIcX/r…
#>  6 t1.md  1syayUnie6TM8ZaT-yuumJ3VqrXN8n22d <named list [42]> /tmp/RtmplJQIcX/r…
#>  7 t2.txt 1HDGoAh-5voxDU6LGEdkgwGfIvUIgo-dk <named list [43]> /tmp/RtmplJQIcX/r…
#>  8 t1.md  1VXkVkYeKkgQ_WSpavSUsrc_QXBmvrams <named list [42]> /tmp/RtmplJQIcX/r…
#>  9 t3.R   10FSBmGxiMGWBDaTt93ZCD2WCC4UUcf3r <named list [42]> /tmp/RtmplJQIcX/r…
#> 10 t3.R   1Fj7-8CCCtBQnkOswEaBv5EZA8OYDn_mh <named list [42]> /tmp/RtmplJQIcX/r…
#> 11 t2.txt 1tqJ95UZKKq1ldBKX_RLUMWQLunP1u09o <named list [43]> /tmp/RtmplJQIcX/r…
#> 12 t1.md  1yF9o0HTN6XPOozvGoAXPI6Q7KWywdmgS <named list [42]> /tmp/RtmplJQIcX/r…

dir_tree(path = "Drive-DL-testing", recurse = TRUE)
#> Drive-DL-testing
#> ├── F1
#> │   ├── t1.md
#> │   ├── t2.txt
#> │   └── t3.R
#> ├── F2
#> │   ├── F2-B
#> │   │   ├── t1.md
#> │   │   ├── t2.txt
#> │   │   └── t3.R
#> │   ├── t1.md
#> │   ├── t2.txt
#> │   └── t3.R
#> ├── t1.md
#> ├── t2.txt
#> └── t3.R
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment