Created
March 18, 2020 19:33
-
-
Save matt-sandgren/2434da156b98c2e743d606fe9e098f4f to your computer and use it in GitHub Desktop.
Data Management API write function
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ------------------------------------------------------------------------------ | |
# Helper function to create the file to POST to the source | |
# | |
# I'm letting either a dataframe or a path to a file be passed. I thought it may | |
# be possible for a situation where tempfile() fails to arise. Maybe on a server | |
# on an account with no write permissions? | |
# | |
# If a path is passed, return the uploaded file. | |
# | |
# If a dataframe/tibble is passed, create a tempfile for the upload and return | |
# that | |
# ------------------------------------------------------------------------------ | |
create_data_upload <- function(data, ...) { | |
stopifnot((is.data.frame(data) | is.character(data))) | |
if (is.data.frame(data)) { | |
temp_file <- tempfile("socrata_temp.csv") | |
# write.csv(data, file, ...) | |
data.table::fwrite(data, temp_file, ...) | |
return(httr::upload_file(temp_file)) | |
} | |
if (is.character(data)) { | |
return(httr::upload_file(data)) | |
} | |
} | |
# This should be pretty easy to write proper unit tests for | |
# create_data_upload(appeals) | |
# create_data_upload("//isiloncifs/StrategicPlanning/DataManagement/TransformLoad/COSCAppeals/Data/COSCAppeals.csv") | |
# ------------------------------------------------------------------------------ | |
# This can, and probably should, be broken up into some smaller functions. | |
# | |
# action_type is case sensitive, so for back compatibility with write.socrata(), | |
# there would have to be some kind of translation going on. | |
# | |
# | |
# | |
#' Write data to socrata | |
# | |
#' @param data A dataframe, tibble, or path to a .csv | |
#' @param four_by_four The four by four of the dataset to be updated. Found at | |
#' the end of every assets url, looks something like 'jk5h-abcd' | |
#' @param domain_url The base domain of your site. For us at fulton county, it's | |
#' https://sharefulton.fultoncountyga.gov | |
#' @param action_type What to do with the dataset? Either replace to delete all | |
#' rows and write, update to upsert/add rows, or delete to just delete all rows | |
#' (I think\) | |
#' @param email Your socrata email address. Alternatively, an API Key | |
#' @param password Your socrata password. Alternatively, an API key secret | |
# ------------------------------------------------------------------------------ | |
push_socrata_2 <- function( | |
data, | |
four_by_four, | |
domain_url, | |
action_type = c("replace", "update", "delete"), | |
email, | |
password) { | |
stopifnot( | |
(is.data.frame(data) | is.character(data)), | |
(is.character(four_by_four) & nchar(four_by_four) == 9), | |
is.character(domain_url) | |
) | |
action_type <- match.arg(action_type) | |
revision_url_string <- '/api/publishing/v1/revision' | |
source_type <- "upload" #I think it's safe to hard code this? | |
source_parse <- "true" #But maybe they should be arguments | |
#------------------------------------------------------------------------------- | |
# POST to revision endpoint to create new revision | |
#------------------------------------------------------------------------------- | |
body <- list( | |
action = list( | |
type = action_type | |
)) | |
open_revision_endpoint <- paste0(domain_url, revision_url_string, "/", four_by_four) | |
open_revision_response <- httr::POST( | |
open_revision_endpoint, | |
body = body, | |
encode = "json", | |
httr::add_headers("Content-Type" = "application/json"), | |
httr::authenticate(email, password, type = "basic") | |
) | |
status_code <- open_revision_response$status_code | |
if (status_code == "201") { | |
message("Opened new revision on ", four_by_four) | |
# httr::message_for_status(status_code) | |
open_revision_response <- httr::content(open_revision_response) | |
} else { | |
httr::stop_for_status(status_code, "open revision") | |
} | |
#------------------------------------------------------------------------------- | |
# POST to create a source, indicating where the data will come from | |
#------------------------------------------------------------------------------- | |
source_json <- list( | |
source_type = list( | |
type = source_type, | |
filename = file_name | |
), | |
parse_options = list( | |
parse_source = source_parse | |
) | |
) | |
create_source_url <- paste0(domain_url, open_revision_response$links$create_source) | |
source_response <- httr::POST( | |
create_source_url, | |
body = source_json, | |
encode = "json", | |
httr::add_headers("Content-Type" = "application/json"), | |
httr::authenticate(email, password, type = "basic") | |
) | |
status_code <- source_response$status_code | |
if (status_code == "201") { | |
message("Created source for ", four_by_four) | |
# httr::message_for_status(status_code) | |
source_response <- httr::content(source_response) | |
} else { | |
httr::stop_for_status(status_code, "create source") | |
} | |
#------------------------------------------------------------------------------- | |
# POST to upload the file to source | |
#------------------------------------------------------------------------------- | |
upload_data_url <- paste0(domain_url, source_response$links$bytes) | |
data_for_upload <- create_data_upload(df) | |
upload_data_response <- httr::POST( | |
upload_data_url, | |
body = data_for_upload, | |
httr::add_headers("Content-Type" = "text/csv"), | |
httr::authenticate(email, password, type = "basic") | |
) | |
status_code <- upload_data_response$status_code | |
if (status_code == "200") { | |
message("Uploaded data to draft of ", four_by_four) | |
upload_data_response <- httr::content(upload_data_response) | |
} else { | |
httr::stop_for_status(status_code, "failed to upload data to source") | |
} | |
# I don't think this is necessary | |
# poll_for_status <- 0 | |
# repeat { | |
# | |
# poll_for_status <- poll_for_status + 1 | |
# | |
# if (!is.null(upload_data_response$resource$failed_at)) { | |
# stop("Upload failed. Check upload response.") | |
# } else if (!is.null(upload_data_response$resource$finished_at)) { | |
# message("Upload finished") | |
# break | |
# } else if (poll_for_status == 100) { | |
# stop("Polling for upload status verification has timed out. Check upload response and/or increase poll limit.") | |
# } else { | |
# | |
# message("Polling for upload and data validation status. Stay tuned.") | |
# upload_data_response <- | |
# httr::GET( | |
# paste0(domain_url, upload_data_response$links$show), | |
# httr::authenticate(email, password, type = "basic") | |
# ) | |
# | |
# httr::stop_for_status(upload_data_response) | |
# | |
# upload_data_response <- | |
# upload_data_response %>% | |
# httr::content("text") %>% | |
# jsonlite::fromJSON() | |
# | |
# Sys.sleep(1) | |
# | |
# } | |
# } | |
#------------------------------------------------------------------------------- | |
# PUT to the apply revision endpoint to apply revision (publish) | |
#------------------------------------------------------------------------------- | |
apply_revision_url <- paste0(domain_url, open_revision_response$links$apply) | |
revision_number <- open_revision_response$resource$revision_seq | |
body <- list( | |
resource = list( | |
id = revision_number | |
) | |
) | |
attempt <- 1 | |
max_attempts <- 5 | |
status_code <- 1L | |
while (attempt <= max_attempts && status_code != "200") { | |
Sys.sleep(3**attempt) | |
message("Applying revision: Attempt ", attempt, " of ", max_attempts, "...") | |
apply_revision_response <- httr::PUT( | |
apply_revision_url, | |
body = body, | |
encode = "json", | |
httr::add_headers("Content-Type" = "application/json"), | |
httr::authenticate(email, password, type = "basic") | |
) | |
status_code <- apply_revision_response$status_code | |
if (apply_revision_response$status_code == "200") { | |
message("Revision applied. Socrata is processing the update") | |
} else { | |
message("Revision failed to apply. Trying again...") | |
# httr::message_for_status(status_code, "apply revision. Trying again...") | |
attempt <- attempt + 1 | |
} | |
} | |
return(apply_revision_response) | |
} | |
# ------------------------------------------------------------------------------ | |
# Test it | |
# ------------------------------------------------------------------------------ | |
# Sys.setenv( | |
# SOCRATA_EMAIL = "foo@bar.gov", | |
# SOCRATA_PASSWORD = "1234" | |
# ) | |
data <- appeals #your dataframe or path to a .csv here | |
four_by_four <- "9vyf-janf" #four by four for your dataset to update | |
domain_url <- "https://sharefulton.fultoncountyga.gov" #your domain | |
email <- Sys.getenv("SOCRATA_EMAIL") | |
password <- Sys.getenv("SOCRATA_PASSWORD") | |
out <- push_socrata_2( | |
data = data, | |
four_by_four = four_by_four, | |
domain_url = domain_url, | |
email = email, | |
password = password | |
) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment