Skip to content

Instantly share code, notes, and snippets.

@matt-sandgren
Created March 18, 2020 19:33
Show Gist options
  • Save matt-sandgren/2434da156b98c2e743d606fe9e098f4f to your computer and use it in GitHub Desktop.
Save matt-sandgren/2434da156b98c2e743d606fe9e098f4f to your computer and use it in GitHub Desktop.
Data Management API write function
# ------------------------------------------------------------------------------
# Helper function to create the file to POST to the source
#
# I'm letting either a dataframe or a path to a file be passed. I thought it may
# be possible for a situation where tempfile() fails to arise. Maybe on a server
# on an account with no write permissions?
#
# If a path is passed, return the uploaded file.
#
# If a dataframe/tibble is passed, create a tempfile for the upload and return
# that
# ------------------------------------------------------------------------------
create_data_upload <- function(data, ...) {
stopifnot((is.data.frame(data) | is.character(data)))
if (is.data.frame(data)) {
temp_file <- tempfile("socrata_temp.csv")
# write.csv(data, file, ...)
data.table::fwrite(data, temp_file, ...)
return(httr::upload_file(temp_file))
}
if (is.character(data)) {
return(httr::upload_file(data))
}
}
# This should be pretty easy to write proper unit tests for
# create_data_upload(appeals)
# create_data_upload("//isiloncifs/StrategicPlanning/DataManagement/TransformLoad/COSCAppeals/Data/COSCAppeals.csv")
# ------------------------------------------------------------------------------
# This can, and probably should, be broken up into some smaller functions.
#
# action_type is case sensitive, so for back compatibility with write.socrata(),
# there would have to be some kind of translation going on.
#
#
#
#' Write data to socrata
#
#' @param data A dataframe, tibble, or path to a .csv
#' @param four_by_four The four by four of the dataset to be updated. Found at
#' the end of every assets url, looks something like 'jk5h-abcd'
#' @param domain_url The base domain of your site. For us at fulton county, it's
#' https://sharefulton.fultoncountyga.gov
#' @param action_type What to do with the dataset? Either replace to delete all
#' rows and write, update to upsert/add rows, or delete to just delete all rows
#' (I think\)
#' @param email Your socrata email address. Alternatively, an API Key
#' @param password Your socrata password. Alternatively, an API key secret
# ------------------------------------------------------------------------------
push_socrata_2 <- function(
data,
four_by_four,
domain_url,
action_type = c("replace", "update", "delete"),
email,
password) {
stopifnot(
(is.data.frame(data) | is.character(data)),
(is.character(four_by_four) & nchar(four_by_four) == 9),
is.character(domain_url)
)
action_type <- match.arg(action_type)
revision_url_string <- '/api/publishing/v1/revision'
source_type <- "upload" #I think it's safe to hard code this?
source_parse <- "true" #But maybe they should be arguments
#-------------------------------------------------------------------------------
# POST to revision endpoint to create new revision
#-------------------------------------------------------------------------------
body <- list(
action = list(
type = action_type
))
open_revision_endpoint <- paste0(domain_url, revision_url_string, "/", four_by_four)
open_revision_response <- httr::POST(
open_revision_endpoint,
body = body,
encode = "json",
httr::add_headers("Content-Type" = "application/json"),
httr::authenticate(email, password, type = "basic")
)
status_code <- open_revision_response$status_code
if (status_code == "201") {
message("Opened new revision on ", four_by_four)
# httr::message_for_status(status_code)
open_revision_response <- httr::content(open_revision_response)
} else {
httr::stop_for_status(status_code, "open revision")
}
#-------------------------------------------------------------------------------
# POST to create a source, indicating where the data will come from
#-------------------------------------------------------------------------------
source_json <- list(
source_type = list(
type = source_type,
filename = file_name
),
parse_options = list(
parse_source = source_parse
)
)
create_source_url <- paste0(domain_url, open_revision_response$links$create_source)
source_response <- httr::POST(
create_source_url,
body = source_json,
encode = "json",
httr::add_headers("Content-Type" = "application/json"),
httr::authenticate(email, password, type = "basic")
)
status_code <- source_response$status_code
if (status_code == "201") {
message("Created source for ", four_by_four)
# httr::message_for_status(status_code)
source_response <- httr::content(source_response)
} else {
httr::stop_for_status(status_code, "create source")
}
#-------------------------------------------------------------------------------
# POST to upload the file to source
#-------------------------------------------------------------------------------
upload_data_url <- paste0(domain_url, source_response$links$bytes)
data_for_upload <- create_data_upload(df)
upload_data_response <- httr::POST(
upload_data_url,
body = data_for_upload,
httr::add_headers("Content-Type" = "text/csv"),
httr::authenticate(email, password, type = "basic")
)
status_code <- upload_data_response$status_code
if (status_code == "200") {
message("Uploaded data to draft of ", four_by_four)
upload_data_response <- httr::content(upload_data_response)
} else {
httr::stop_for_status(status_code, "failed to upload data to source")
}
# I don't think this is necessary
# poll_for_status <- 0
# repeat {
#
# poll_for_status <- poll_for_status + 1
#
# if (!is.null(upload_data_response$resource$failed_at)) {
# stop("Upload failed. Check upload response.")
# } else if (!is.null(upload_data_response$resource$finished_at)) {
# message("Upload finished")
# break
# } else if (poll_for_status == 100) {
# stop("Polling for upload status verification has timed out. Check upload response and/or increase poll limit.")
# } else {
#
# message("Polling for upload and data validation status. Stay tuned.")
# upload_data_response <-
# httr::GET(
# paste0(domain_url, upload_data_response$links$show),
# httr::authenticate(email, password, type = "basic")
# )
#
# httr::stop_for_status(upload_data_response)
#
# upload_data_response <-
# upload_data_response %>%
# httr::content("text") %>%
# jsonlite::fromJSON()
#
# Sys.sleep(1)
#
# }
# }
#-------------------------------------------------------------------------------
# PUT to the apply revision endpoint to apply revision (publish)
#-------------------------------------------------------------------------------
apply_revision_url <- paste0(domain_url, open_revision_response$links$apply)
revision_number <- open_revision_response$resource$revision_seq
body <- list(
resource = list(
id = revision_number
)
)
attempt <- 1
max_attempts <- 5
status_code <- 1L
while (attempt <= max_attempts && status_code != "200") {
Sys.sleep(3**attempt)
message("Applying revision: Attempt ", attempt, " of ", max_attempts, "...")
apply_revision_response <- httr::PUT(
apply_revision_url,
body = body,
encode = "json",
httr::add_headers("Content-Type" = "application/json"),
httr::authenticate(email, password, type = "basic")
)
status_code <- apply_revision_response$status_code
if (apply_revision_response$status_code == "200") {
message("Revision applied. Socrata is processing the update")
} else {
message("Revision failed to apply. Trying again...")
# httr::message_for_status(status_code, "apply revision. Trying again...")
attempt <- attempt + 1
}
}
return(apply_revision_response)
}
# ------------------------------------------------------------------------------
# Test it
# ------------------------------------------------------------------------------
# Sys.setenv(
# SOCRATA_EMAIL = "foo@bar.gov",
# SOCRATA_PASSWORD = "1234"
# )
data <- appeals #your dataframe or path to a .csv here
four_by_four <- "9vyf-janf" #four by four for your dataset to update
domain_url <- "https://sharefulton.fultoncountyga.gov" #your domain
email <- Sys.getenv("SOCRATA_EMAIL")
password <- Sys.getenv("SOCRATA_PASSWORD")
out <- push_socrata_2(
data = data,
four_by_four = four_by_four,
domain_url = domain_url,
email = email,
password = password
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment