Skip to content

Instantly share code, notes, and snippets.

@fauxneticien
Created May 10, 2017 09:48
Show Gist options
  • Save fauxneticien/08bd16238d9e2ba4104d0f3f7f6a8178 to your computer and use it in GitHub Desktop.
Save fauxneticien/08bd16238d9e2ba4104d0f3f7f6a8178 to your computer and use it in GitHub Desktop.
Download files with names matching pattern from an Alveo list
############################## 1. Set up ######################################
# Make sure you have the necessary packages installed (see section 2).
# The last number from the list URL for example 1045 for:
# https://app.alveo.edu.au/item_lists/1045
alveo_list_id <- 904
# Regex pattern to search for 'speaker16.wav$' means 'ending with speaker16.wav'
alveo_search_pattern <- "speaker16.wav$"
# Directory to download the wav files into
output_path <- "~/Desktop/"
########################### 2. Packages & config ##############################
# Install packages if you don't have them, e.g.:
# install.packages("pbapply")
library(purrr)
library(pbapply)
library(stringr)
# For alveo grab the latest copy from me (for now)
# devtools::install_github("fauxneticien/alveo-r")
library(alveo)
if(!file.exists("~/alveo.config")) {
stop("Do you have alveo.config file in your home folder?")
}
client <- RestClient(server_uri="app.alveo.edu.au")
item_list <- client$get_item_list_by_id(alveo_list_id)
################################ 3. Pipeline ##################################
# Note I've split the pipeline up to give some verbose output message() calls
message("Getting items from list at https://app.alveo.edu.au/item_lists/", alveo_list_id)
pblapply(X = 1:length(item_list$items),
FUN = function(an_item) { item_list$get_item(an_item) }) -> result
message("Getting metadata of each item in list...")
pblapply(X = result,
FUN = function(alveo_doc) {
metadata <- alveo_doc$get_metadata()
return(metadata$`ausnc:document`)
}) -> result
message("Downloading files matching search string '", alveo_search_pattern,"' to ", output_path)
map(result, function(doc_string) {
str_split(string = doc_string, pattern = ", ")[[1]] %>%
keep(~ grepl(pattern = alveo_search_pattern, x = .))
}) %>%
map(~ Document(uri = ., type = "audio/wav")) %>%
pblapply(FUN = function(alveo_obj) { alveo_obj$download(output_path) }) -> result
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment