Skip to content

Instantly share code, notes, and snippets.

@jpshanno
Created February 9, 2021 02:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jpshanno/6f54835a6ad5bf0ab941e42d2226937a to your computer and use it in GitHub Desktop.
Save jpshanno/6f54835a6ad5bf0ab941e42d2226937a to your computer and use it in GitHub Desktop.
Lightweight Functions to Download Machine-to-Machine Earth Explorer Data
# Functions inspired by espa.tools espa_inventory_post_api
# getSpatialData provides similar functionality, but is larger & slower than
# what I need
library(httr)
library(jsonlite)
library(parallel)
build_request <-
function(target, ...){
base_url <- "https://m2m.cr.usgs.gov/api/api/json/stable/"
json <- toJSON(list(...), auto_unbox = TRUE)
url <- paste0(base_url, target)
list(url = url,
parameters = json)
}
ee_make_acquisition_filter <-
function(yr){
list(start = paste0(yr, "-01-01"),
end = paste0(yr, "-12-31"))
}
ee_get_dataset_name <-
function(yr){
dplyr::if_else(yr < 2013,
"landsat_tm_c1",
"landsat_8_c1")}
# These could self populate by running ee_post("dataset-filters")
ee_make_row_filter <-
function(ds, wrs.row){
match.arg(ds, c("landsat_tm_c1",
"landsat_8_c1"))
stopifnot(is.numeric(wrs.row))
field_id <-
if_else(ds == "landsat_tm_c1",
"5e83d08ffa032790",
"5e83d0b849ed5ee7")
list(filterType = "value",
filterId = field_id,
value = wrs.row,
operand = "=")
}
ee_and_filter <-
function(...){
list(filterType = "and",
childFilters = list(...))
}
ee_make_path_filter <-
function(ds, wrs.path){
match.arg(ds, c("landsat_tm_c1",
"landsat_8_c1"))
stopifnot(is.numeric(wrs.path))
field_id <-
if_else(ds == "landsat_tm_c1",
"5e83d08f6487afc7",
"5e83d0b81d20cee8")
list(filterType = "value",
filterId = field_id,
value = wrs.path,
operand = "=")
}
ee_make_temporal_filter <-
function(yr){
bounds <-
list(startDate = paste0(yr, "-01-01"),
endDate = paste0(yr, "-12-31"))
list(temporalFilter = bounds)
}
check_response <-
function(response){
http_status <-
status_code(response)
error_code <-
content(response)$errorCode
error_message <-
content(response)$errorMessage
if(http_status != 200 | !is.null(error_code)){
message(" The api request returned an error code of '", error_code,
"':\n ", error_message,
"\n The returned HTTP status was ", http_status)
x <- NA
attributes(x) <-
list(error_code = error_code,
error_message = error_message,
http_status = http_status)
return(x)
}
http_status
}
http_error <-
function(response){
con <-
content(response)
message <-
xml2::xml_text(xml2::xml_find_first(con, "//body//*"))
is_json <-
grepl("^\\s*\\{", message)
if(is_json){
x <- NA
attributes(x) <- fromJSON(message)
return(x)
}
x <- NA
attributes(x) <- list(error_message = message)
}
ee_api <-
function(verb, target, ..., as = "parsed"){
request <-
build_request(target, ...)
good_targets <-
c("user-clear", "order-submit", "ingest-subscription-create",
"dataset-filters", "dataset-search", "scene-search-delete",
"download-request", "download-options", "tram-queue-units",
"tram-queues", "grid2ll", "scene-list-add + scene-list-get",
"login", "logout", "notifications", "tram-order-status",
"scene-metadata", "scene-search")
match.arg(target,
good_targets)
match.arg(as,
c("raw",
"text",
"parsed"))
if(target != "login"){
args <-
list(url = request[["url"]],
request[["parameters"]],
config = content_type("application/x-www-form-urlencoded"),
add_headers("X-Auth-Token" = ee_load_key()))
} else {
args <-
list(url = request[["url"]],
request[["parameters"]],
config = content_type("application/x-www-form-urlencoded"))
}
names(args)[2] <-
switch(verb,
POST = "body",
GET = "query")
response <-
do.call(verb,
args)
# If the service is down it returns html not json
if(http_type(response) == "text/html"){
http_response <-
http_error(response)
return(http_response)
}
response_status <-
check_response(response)
if(is.na(response_status) | response_status != 200){
return(response_status)
}
con <-
content(response, as = as)
if(as == "parsed"){
return(con$data)
}
con
}
ee_post <-
function(target, ..., as = "parsed"){
verb <-
match.call()[[1]]
verb <-
toupper(sub("ee_", "", verb))
ee_api(verb = verb,
target = target,
...,
as = as)
}
ee_get <-
function(target, ..., as = "parsed"){
verb <-
match.call()[[1]]
verb <-
toupper(sub("ee_", "", verb))
ee_api(verb = verb,
target = target,
...,
as = as)
}
ee_cache_key <-
function(username = Sys.getenv("USGS_ID"),
password = Sys.getenv("USGS_PW")){
expiration_time <-
as.character(Sys.time() + 6900)
api_key <-
ee_post("login",
username = username,
password = password)
writeLines(c(api_key, expiration_time),
"api.key")
if(!is.na(api_key)){
return(TRUE)
}
api_key
}
ee_load_key <-
function(){
if(!file.exists("api.key")){
ee_cache_key()
}
key_info <-
readLines("api.key")
api_key <-
key_info[1]
expiration_time <-
as.POSIXct(key_info[2])
if(Sys.time() > key_info[2]){
api_key <-
ee_cache_key()
}
if(is.na(api_key)){
cat("\n API key request failed\n\n")
if(interactive()){
stop()
} else {
quit(save = "no",
status = Sys.getenv("E_AUTH"))
}
}
key_info[1]
}
ee_n_records <-
function(res){
pluck(res,
"data", "recordsReturned",
.default = NA_real_)
}
ee_entity_id <-
function(res){
pluck(res,
"data", "results", "entityId",
.default = NA_character_)
}
ee_geotiff_size <-
function(res){
products <-
pluck(res, "data", "productName",
.default = NA_character_)
file_sizes <-
pluck(res, "data", "filesize",
.default = NA_real_)
if(any(is.na(products) | is.na(file_sizes))) return(NA_real_)
file_sizes[which(products == "Level-1 GeoTIFF Data Product")]
}
ee_geotiff_url <-
function(res){
products <-
pluck(res, "data", "productName",
.default = NA_character_)
file_sizes <-
pluck(res, "data", "filesize",
.default = NA_real_)
if(any(is.na(products) | is.na(file_sizes))) return(NA_real_)
file_sizes[which(products == "Level-1 GeoTIFF Data Product")]
}
ee_safe_download <-
function(urls, out.paths, force.download = FALSE, download.method = "libcurl",
download.log = NULL){
if(!is.null(download.log)){
existing_files <-
read_csv(download.log, col_types = "c")$file_name
}
# Remove files that already exist
if(!force.download){
new_files <-
!(out.paths %in% existing_files)
old.urls <- urls
urls <-
urls[new_files]
if(length(urls) == 0){
# message(paste0("All images for ", ard.tile.ids, " are already downloaded", collapse = "\n"))
return(out.paths)
}
out.paths <-
out.paths[new_files]
}
log_file <-
glue("/run/media/jpshanno/Elements/landsat/download_log.txt")
write(c(paste0(rep("-", 80), collapse = ""), as.character(Sys.time()), paste0(rep("-", 80), collapse = ""), "\n"),
log_file,
append = TRUE)
capture.output(download.file(url = urls, destfile = out.paths, method = download.method),
file = log_file,
append = TRUE,
type = "message")
write_csv(data.frame(file_name = out.paths),
path = download.log,
append = TRUE)
out.paths
}
ee_download_geotiff <-
function(dat, download.log, path){
df <-
dat %>%
group_by(inventory_year, PATH, ROW) %>%
summarize(landsat_scene_identifier = list(landsat_scene_identifier),
landsat_product_identifier = list(landsat_product_identifier)) %>%
mutate(downloads = map(landsat_scene_identifier, ~cross(list(entityId = .x, productId = ifelse(str_detect(.x[1], "LC8"), "5e83d0b84df8d8c2", "5e83d08fd9932768"))))) %>%
mutate(response = map(downloads, ~ee_post("download-request", downloads = downloads[[1]], as = "text")),
parsed = map(response, fromJSON, flatten = TRUE),
urls = map(parsed, pluck, "data", "preparingDownloads", "url", .default = NA_character_)) %>%
unnest(cols = c(landsat_product_identifier, urls)) %>%
mutate(out.path = path(path,
ROW, PATH, landsat_product_identifier,
ext = "tar.gz")) %>%
ungroup() %>%
select(landsat_product_identifier, urls, out.path) %>%
filter_all(~!is.na(.)) %>%
mutate(urls = rev(urls)) # It looks like USGS returns first in last out responses
if(!all(str_detect(df$out.path, df$landsat_product_identifier))){
stop("Landsat_product_identier does not match out.path in ee_download_geotiff")
}
cat(paste0("\n Downloading images for ",
unique(dat$inventory_year),
", row ", unique(dat$ROW),
", path ", unique(dat$PATH), "\n"))
downloaded <-
ee_safe_download(urls = df$urls,
out.paths = df$out.path,
download.log = download.log)
downloaded
}
on.exit(ee_post("logout"))
on.exit(unlink("api.key"), add = TRUE)
# Logon -------------------------------------------------------------------
cat("\n Logging into EPSA\n")
key_cached <-
ee_load_key()
if(is.na(key_cached)){
cat("\n Login Failed\n\n")
if(interactive()){
stop()
}
quit(save = "no",
status = Sys.getenv("E_AUTH"))
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment