Created
February 9, 2021 02:09
-
-
Save jpshanno/6f54835a6ad5bf0ab941e42d2226937a to your computer and use it in GitHub Desktop.
Lightweight Functions to Download Machine-to-Machine Earth Explorer Data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Functions inspired by espa.tools espa_inventory_post_api | |
# getSpatialData provides similar functionality, but is larger & slower than | |
# what I need | |
library(httr) | |
library(jsonlite) | |
library(parallel) | |
build_request <- | |
function(target, ...){ | |
base_url <- "https://m2m.cr.usgs.gov/api/api/json/stable/" | |
json <- toJSON(list(...), auto_unbox = TRUE) | |
url <- paste0(base_url, target) | |
list(url = url, | |
parameters = json) | |
} | |
ee_make_acquisition_filter <- | |
function(yr){ | |
list(start = paste0(yr, "-01-01"), | |
end = paste0(yr, "-12-31")) | |
} | |
ee_get_dataset_name <- | |
function(yr){ | |
dplyr::if_else(yr < 2013, | |
"landsat_tm_c1", | |
"landsat_8_c1")} | |
# These could self populate by running ee_post("dataset-filters") | |
ee_make_row_filter <- | |
function(ds, wrs.row){ | |
match.arg(ds, c("landsat_tm_c1", | |
"landsat_8_c1")) | |
stopifnot(is.numeric(wrs.row)) | |
field_id <- | |
if_else(ds == "landsat_tm_c1", | |
"5e83d08ffa032790", | |
"5e83d0b849ed5ee7") | |
list(filterType = "value", | |
filterId = field_id, | |
value = wrs.row, | |
operand = "=") | |
} | |
ee_and_filter <- | |
function(...){ | |
list(filterType = "and", | |
childFilters = list(...)) | |
} | |
ee_make_path_filter <- | |
function(ds, wrs.path){ | |
match.arg(ds, c("landsat_tm_c1", | |
"landsat_8_c1")) | |
stopifnot(is.numeric(wrs.path)) | |
field_id <- | |
if_else(ds == "landsat_tm_c1", | |
"5e83d08f6487afc7", | |
"5e83d0b81d20cee8") | |
list(filterType = "value", | |
filterId = field_id, | |
value = wrs.path, | |
operand = "=") | |
} | |
ee_make_temporal_filter <- | |
function(yr){ | |
bounds <- | |
list(startDate = paste0(yr, "-01-01"), | |
endDate = paste0(yr, "-12-31")) | |
list(temporalFilter = bounds) | |
} | |
check_response <- | |
function(response){ | |
http_status <- | |
status_code(response) | |
error_code <- | |
content(response)$errorCode | |
error_message <- | |
content(response)$errorMessage | |
if(http_status != 200 | !is.null(error_code)){ | |
message(" The api request returned an error code of '", error_code, | |
"':\n ", error_message, | |
"\n The returned HTTP status was ", http_status) | |
x <- NA | |
attributes(x) <- | |
list(error_code = error_code, | |
error_message = error_message, | |
http_status = http_status) | |
return(x) | |
} | |
http_status | |
} | |
http_error <- | |
function(response){ | |
con <- | |
content(response) | |
message <- | |
xml2::xml_text(xml2::xml_find_first(con, "//body//*")) | |
is_json <- | |
grepl("^\\s*\\{", message) | |
if(is_json){ | |
x <- NA | |
attributes(x) <- fromJSON(message) | |
return(x) | |
} | |
x <- NA | |
attributes(x) <- list(error_message = message) | |
} | |
ee_api <- | |
function(verb, target, ..., as = "parsed"){ | |
request <- | |
build_request(target, ...) | |
good_targets <- | |
c("user-clear", "order-submit", "ingest-subscription-create", | |
"dataset-filters", "dataset-search", "scene-search-delete", | |
"download-request", "download-options", "tram-queue-units", | |
"tram-queues", "grid2ll", "scene-list-add + scene-list-get", | |
"login", "logout", "notifications", "tram-order-status", | |
"scene-metadata", "scene-search") | |
match.arg(target, | |
good_targets) | |
match.arg(as, | |
c("raw", | |
"text", | |
"parsed")) | |
if(target != "login"){ | |
args <- | |
list(url = request[["url"]], | |
request[["parameters"]], | |
config = content_type("application/x-www-form-urlencoded"), | |
add_headers("X-Auth-Token" = ee_load_key())) | |
} else { | |
args <- | |
list(url = request[["url"]], | |
request[["parameters"]], | |
config = content_type("application/x-www-form-urlencoded")) | |
} | |
names(args)[2] <- | |
switch(verb, | |
POST = "body", | |
GET = "query") | |
response <- | |
do.call(verb, | |
args) | |
# If the service is down it returns html not json | |
if(http_type(response) == "text/html"){ | |
http_response <- | |
http_error(response) | |
return(http_response) | |
} | |
response_status <- | |
check_response(response) | |
if(is.na(response_status) | response_status != 200){ | |
return(response_status) | |
} | |
con <- | |
content(response, as = as) | |
if(as == "parsed"){ | |
return(con$data) | |
} | |
con | |
} | |
ee_post <- | |
function(target, ..., as = "parsed"){ | |
verb <- | |
match.call()[[1]] | |
verb <- | |
toupper(sub("ee_", "", verb)) | |
ee_api(verb = verb, | |
target = target, | |
..., | |
as = as) | |
} | |
ee_get <- | |
function(target, ..., as = "parsed"){ | |
verb <- | |
match.call()[[1]] | |
verb <- | |
toupper(sub("ee_", "", verb)) | |
ee_api(verb = verb, | |
target = target, | |
..., | |
as = as) | |
} | |
ee_cache_key <- | |
function(username = Sys.getenv("USGS_ID"), | |
password = Sys.getenv("USGS_PW")){ | |
expiration_time <- | |
as.character(Sys.time() + 6900) | |
api_key <- | |
ee_post("login", | |
username = username, | |
password = password) | |
writeLines(c(api_key, expiration_time), | |
"api.key") | |
if(!is.na(api_key)){ | |
return(TRUE) | |
} | |
api_key | |
} | |
ee_load_key <- | |
function(){ | |
if(!file.exists("api.key")){ | |
ee_cache_key() | |
} | |
key_info <- | |
readLines("api.key") | |
api_key <- | |
key_info[1] | |
expiration_time <- | |
as.POSIXct(key_info[2]) | |
if(Sys.time() > key_info[2]){ | |
api_key <- | |
ee_cache_key() | |
} | |
if(is.na(api_key)){ | |
cat("\n API key request failed\n\n") | |
if(interactive()){ | |
stop() | |
} else { | |
quit(save = "no", | |
status = Sys.getenv("E_AUTH")) | |
} | |
} | |
key_info[1] | |
} | |
ee_n_records <- | |
function(res){ | |
pluck(res, | |
"data", "recordsReturned", | |
.default = NA_real_) | |
} | |
ee_entity_id <- | |
function(res){ | |
pluck(res, | |
"data", "results", "entityId", | |
.default = NA_character_) | |
} | |
ee_geotiff_size <- | |
function(res){ | |
products <- | |
pluck(res, "data", "productName", | |
.default = NA_character_) | |
file_sizes <- | |
pluck(res, "data", "filesize", | |
.default = NA_real_) | |
if(any(is.na(products) | is.na(file_sizes))) return(NA_real_) | |
file_sizes[which(products == "Level-1 GeoTIFF Data Product")] | |
} | |
ee_geotiff_url <- | |
function(res){ | |
products <- | |
pluck(res, "data", "productName", | |
.default = NA_character_) | |
file_sizes <- | |
pluck(res, "data", "filesize", | |
.default = NA_real_) | |
if(any(is.na(products) | is.na(file_sizes))) return(NA_real_) | |
file_sizes[which(products == "Level-1 GeoTIFF Data Product")] | |
} | |
ee_safe_download <- | |
function(urls, out.paths, force.download = FALSE, download.method = "libcurl", | |
download.log = NULL){ | |
if(!is.null(download.log)){ | |
existing_files <- | |
read_csv(download.log, col_types = "c")$file_name | |
} | |
# Remove files that already exist | |
if(!force.download){ | |
new_files <- | |
!(out.paths %in% existing_files) | |
old.urls <- urls | |
urls <- | |
urls[new_files] | |
if(length(urls) == 0){ | |
# message(paste0("All images for ", ard.tile.ids, " are already downloaded", collapse = "\n")) | |
return(out.paths) | |
} | |
out.paths <- | |
out.paths[new_files] | |
} | |
log_file <- | |
glue("/run/media/jpshanno/Elements/landsat/download_log.txt") | |
write(c(paste0(rep("-", 80), collapse = ""), as.character(Sys.time()), paste0(rep("-", 80), collapse = ""), "\n"), | |
log_file, | |
append = TRUE) | |
capture.output(download.file(url = urls, destfile = out.paths, method = download.method), | |
file = log_file, | |
append = TRUE, | |
type = "message") | |
write_csv(data.frame(file_name = out.paths), | |
path = download.log, | |
append = TRUE) | |
out.paths | |
} | |
ee_download_geotiff <- | |
function(dat, download.log, path){ | |
df <- | |
dat %>% | |
group_by(inventory_year, PATH, ROW) %>% | |
summarize(landsat_scene_identifier = list(landsat_scene_identifier), | |
landsat_product_identifier = list(landsat_product_identifier)) %>% | |
mutate(downloads = map(landsat_scene_identifier, ~cross(list(entityId = .x, productId = ifelse(str_detect(.x[1], "LC8"), "5e83d0b84df8d8c2", "5e83d08fd9932768"))))) %>% | |
mutate(response = map(downloads, ~ee_post("download-request", downloads = downloads[[1]], as = "text")), | |
parsed = map(response, fromJSON, flatten = TRUE), | |
urls = map(parsed, pluck, "data", "preparingDownloads", "url", .default = NA_character_)) %>% | |
unnest(cols = c(landsat_product_identifier, urls)) %>% | |
mutate(out.path = path(path, | |
ROW, PATH, landsat_product_identifier, | |
ext = "tar.gz")) %>% | |
ungroup() %>% | |
select(landsat_product_identifier, urls, out.path) %>% | |
filter_all(~!is.na(.)) %>% | |
mutate(urls = rev(urls)) # It looks like USGS returns first in last out responses | |
if(!all(str_detect(df$out.path, df$landsat_product_identifier))){ | |
stop("Landsat_product_identier does not match out.path in ee_download_geotiff") | |
} | |
cat(paste0("\n Downloading images for ", | |
unique(dat$inventory_year), | |
", row ", unique(dat$ROW), | |
", path ", unique(dat$PATH), "\n")) | |
downloaded <- | |
ee_safe_download(urls = df$urls, | |
out.paths = df$out.path, | |
download.log = download.log) | |
downloaded | |
} | |
on.exit(ee_post("logout")) | |
on.exit(unlink("api.key"), add = TRUE) | |
# Logon ------------------------------------------------------------------- | |
cat("\n Logging into EPSA\n") | |
key_cached <- | |
ee_load_key() | |
if(is.na(key_cached)){ | |
cat("\n Login Failed\n\n") | |
if(interactive()){ | |
stop() | |
} | |
quit(save = "no", | |
status = Sys.getenv("E_AUTH")) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment