daltare/csv_query_gist.R

## csv_query_gist.R
# This script shows how to filter and/or download a resource from the CA Open
# Data Portal (https://data.ca.gov/) as a csv file using a persistent link
# (i.e., a link that doesn't change based on the resource's filename)


# load packages -----------------------------------------------------------
library(tidyverse)


# setup -------------------------------------------------------------------

## base url ----
## (the URL to download a csv resource starts with: https://data.ca.gov/datastore/dump/
url_base <- 'https://data.ca.gov/datastore/dump/'

## get resource ID ----
## (you can find the ID associated with a resource from the last part of the
## resource's URL. For example, the full URL for the "2021 CEDEN Water
## Chemistry Data" resource is:
# https://data.ca.gov/dataset/surface-water-chemistry-results/resource/dde19a95-504b-48d7-8f3e-8af3d484009f
## So, the ID for this resource is: dde19a95-504b-48d7-8f3e-8af3d484009f)
resource_id <- "dde19a95-504b-48d7-8f3e-8af3d484009f"


# download full file ------------------------------------------------------

## build URL to download data ----
url_full_dataset <- paste0(url_base, resource_id, '?format=csv&bom=true') %>%
    URLencode()

## read csv data ----
dataset_full <- read_csv(url_full_dataset)


# download filtered dataset -----------------------------------------------

## enter statement to filter data ----
## (use this format: "Field 1":"Value 1", "Field 2":"Value 2")
filter_statement <- '"Program":"American Rivers Restoration", "Analyte":"Salinity, Total"'

## build URL to download data ----
url_filtered_dataset <- paste0(url_base,
                               resource_id,
                               '?filters={',
                               filter_statement,
                               '}&format=csv&bom=true') %>%
    URLencode()

## read csv data ----
dataset_filtered <- read_csv(url_filtered_dataset)

## !!!! NOTE !!!!
## you can also paste this URL directly into a browser to manually download a
## csv containing the filtered dataset - the URL is:
## https://data.ca.gov/datastore/dump/dde19a95-504b-48d7-8f3e-8af3d484009f?filters=%7B%22Program%22:%22American%20Rivers%20Restoration%22,%20%22Analyte%22:%22Salinity,%20Total%22%7D&format=csv&bom=true
	# This script shows how to filter and/or download a resource from the CA Open
	# Data Portal (https://data.ca.gov/) as a csv file using a persistent link
	# (i.e., a link that doesn't change based on the resource's filename)



	# load packages -----------------------------------------------------------
	library(tidyverse)



	# setup -------------------------------------------------------------------

	## base url ----
	## (the URL to download a csv resource starts with: https://data.ca.gov/datastore/dump/
	url_base <- 'https://data.ca.gov/datastore/dump/'

	## get resource ID ----
	## (you can find the ID associated with a resource from the last part of the
	## resource's URL. For example, the full URL for the "2021 CEDEN Water
	## Chemistry Data" resource is:
	# https://data.ca.gov/dataset/surface-water-chemistry-results/resource/dde19a95-504b-48d7-8f3e-8af3d484009f
	## So, the ID for this resource is: dde19a95-504b-48d7-8f3e-8af3d484009f)
	resource_id <- "dde19a95-504b-48d7-8f3e-8af3d484009f"



	# download full file ------------------------------------------------------

	## build URL to download data ----
	url_full_dataset <- paste0(url_base, resource_id, '?format=csv&bom=true') %>%
	URLencode()

	## read csv data ----
	dataset_full <- read_csv(url_full_dataset)



	# download filtered dataset -----------------------------------------------

	## enter statement to filter data ----
	## (use this format: "Field 1":"Value 1", "Field 2":"Value 2")
	filter_statement <- '"Program":"American Rivers Restoration", "Analyte":"Salinity, Total"'

	## build URL to download data ----
	url_filtered_dataset <- paste0(url_base,
	resource_id,
	'?filters={',
	filter_statement,
	'}&format=csv&bom=true') %>%
	URLencode()

	## read csv data ----
	dataset_filtered <- read_csv(url_filtered_dataset)

	## !!!! NOTE !!!!
	## you can also paste this URL directly into a browser to manually download a
	## csv containing the filtered dataset - the URL is:
	## https://data.ca.gov/datastore/dump/dde19a95-504b-48d7-8f3e-8af3d484009f?filters=%7B%22Program%22:%22American%20Rivers%20Restoration%22,%20%22Analyte%22:%22Salinity,%20Total%22%7D&format=csv&bom=true