Last active
August 11, 2022 17:00
-
-
Save daltare/a934669c8933c3ebd955f18dfb198456 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script shows how to filter and/or download a resource from the CA Open | |
# Data Portal (https://data.ca.gov/) as a csv file using a persistent link | |
# (i.e., a link that doesn't change based on the resource's filename) | |
# load packages ----------------------------------------------------------- | |
library(tidyverse) | |
# setup ------------------------------------------------------------------- | |
## base url ---- | |
## (the URL to download a csv resource starts with: https://data.ca.gov/datastore/dump/ | |
url_base <- 'https://data.ca.gov/datastore/dump/' | |
## get resource ID ---- | |
## (you can find the ID associated with a resource from the last part of the | |
## resource's URL. For example, the full URL for the "2021 CEDEN Water | |
## Chemistry Data" resource is: | |
# https://data.ca.gov/dataset/surface-water-chemistry-results/resource/dde19a95-504b-48d7-8f3e-8af3d484009f | |
## So, the ID for this resource is: dde19a95-504b-48d7-8f3e-8af3d484009f) | |
resource_id <- "dde19a95-504b-48d7-8f3e-8af3d484009f" | |
# download full file ------------------------------------------------------ | |
## build URL to download data ---- | |
url_full_dataset <- paste0(url_base, resource_id, '?format=csv&bom=true') %>% | |
URLencode() | |
## read csv data ---- | |
dataset_full <- read_csv(url_full_dataset) | |
# download filtered dataset ----------------------------------------------- | |
## enter statement to filter data ---- | |
## (use this format: "Field 1":"Value 1", "Field 2":"Value 2") | |
filter_statement <- '"Program":"American Rivers Restoration", "Analyte":"Salinity, Total"' | |
## build URL to download data ---- | |
url_filtered_dataset <- paste0(url_base, | |
resource_id, | |
'?filters={', | |
filter_statement, | |
'}&format=csv&bom=true') %>% | |
URLencode() | |
## read csv data ---- | |
dataset_filtered <- read_csv(url_filtered_dataset) | |
## !!!! NOTE !!!! | |
## you can also paste this URL directly into a browser to manually download a | |
## csv containing the filtered dataset - the URL is: | |
## https://data.ca.gov/datastore/dump/dde19a95-504b-48d7-8f3e-8af3d484009f?filters=%7B%22Program%22:%22American%20Rivers%20Restoration%22,%20%22Analyte%22:%22Salinity,%20Total%22%7D&format=csv&bom=true |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment