Skip to content

Instantly share code, notes, and snippets.

@rajadain
Created November 13, 2023 15:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rajadain/e68b14ebd95cbe052d5132d2901c054e to your computer and use it in GitHub Desktop.
Save rajadain/e68b14ebd95cbe052d5132d2901c054e to your computer and use it in GitHub Desktop.
EPR Workflow for Model My Watershed
#!/usr/bin/env bash
set -ex
###
# Basic settings
###
# Model My Watershed URL to hit
MMW_API_URL="http://localhost:8000/api"
# API Key for a user
MMW_API_KEY="my-api-key"
# Built JAR of this repository: https://github.com/jpolchlo/mmw-etl
MMW_ETL_JAR="target/scala-2.12/mmw-etl-assembly-0.0.1.jar"
# Bucket that stores MMW layers
S3_BUCKET="s3://my-bucket-name"
# Reference layer to use to align new layers with
REF_LAYER="nlcd-2019-30m-epsg5070-512-uint8raw"
# Path to folder containing input data
IN_PATH="scratch/example_lu_data"
###
# Function that takes an input to use with /modeling/gwlf-e/prepare endpoint,
# and returns the output of GWLF-E for that input. To see documentation on how
# that input should be shaped, reference: https://modelmywatershed.org/api/docs/
###
function mmw_run_gwlfe () {
MMW_GWLFE_PREPARE_REQUEST_DATA=$1
# Post a request to /modeling/gwlf-e/prepare and save the job uuid
MMW_GWLFE_PREPARE_JOB_RESPONSE=$(curl -X POST \
-H "Content-Type: application/json" \
-H "Authorization: Token ${MMW_API_KEY}" \
-d "${MMW_GWLFE_PREPARE_REQUEST_DATA}" \
"${MMW_API_URL}/modeling/gwlf-e/prepare/")
MMW_GWLFE_PREPARE_JOB_UUID=$(echo "${MMW_GWLFE_PREPARE_JOB_RESPONSE}" \
| jq -r .job_uuid)
if [[ "${MMW_GWLFE_PREPARE_JOB_UUID}" == "null" ]]
then
echo "Unable to start prepare job: ${MMW_GWLFE_PREPARE_JOB_RESPONSE}"
exit 1
fi
MMW_GWLFE_PREPARE_JOB_STATUS=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
| jq -r .status)
# While the job status is "started", poll once a second and update status
while [[ "${MMW_GWLFE_PREPARE_JOB_STATUS}" == "started" ]]
do
sleep 1
MMW_GWLFE_PREPARE_JOB_RESPONSE=$(curl -X GET \
-H "Authorization: Token ${MMW_API_KEY}" \
"${MMW_API_URL}/jobs/${MMW_GWLFE_PREPARE_JOB_UUID}/")
MMW_GWLFE_PREPARE_JOB_STATUS=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
| jq -r .status)
done
# If the job status is not "complete", report error and stop
if [[ "${MMW_GWLFE_PREPARE_JOB_STATUS}" != "complete" ]]
then
MMW_GWLFE_PREPARE_JOB_ERROR=$(echo ${MMW_GWLFE_PREPARE_JOB_RESPONSE} \
| jq .)
echo "GWLF-E PREPARE job ${MMW_GWLFE_PREPARE_JOB_UUID} failed with error: ${MMW_GWLFE_PREPARE_JOB_ERROR}"
exit 1
fi
# Otherwise, post a request to /modeling/gwlf-e/run/ with the prepare job uuid
MMW_GWLFE_RUN_REQUEST_DATA="{\"job_uuid\": \"${MMW_GWLFE_PREPARE_JOB_UUID}\"}"
MMW_GWLFE_RUN_JOB_UUID=$(curl -X POST \
-H "Content-Type: application/json" \
-H "Authorization: Token ${MMW_API_KEY}" \
-d "${MMW_GWLFE_RUN_REQUEST_DATA}" \
"${MMW_API_URL}/modeling/gwlf-e/run/" \
| jq -r .job_uuid)
MMW_GWLFE_RUN_JOB_STATUS="started"
# While the job status is "started", poll once a second and update status
while [[ "${MMW_GWLFE_RUN_JOB_STATUS}" == "started" ]]
do
sleep 1
MMW_GWLFE_RUN_JOB_RESPONSE=$(curl -X GET \
-H "Authorization: Token ${MMW_API_KEY}" \
"${MMW_API_URL}/jobs/${MMW_GWLFE_RUN_JOB_UUID}/")
MMW_GWLFE_RUN_JOB_STATUS=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
| jq -r .status)
done
# If the job status is not "complete", report error and stop
if [[ "${MMW_GWLFE_RUN_JOB_STATUS}" != "complete" ]]
then
MMW_GWLFE_RUN_JOB_ERROR=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
| jq .)
echo "GWLF-E RUN job ${MMW_GWLFE_RUN_JOB_UUID} failed with error: ${MMW_GWLFE_RUN_JOB_ERROR}"
exit 1
fi
# Fetch the GWLF-E result and report it
MMW_GWLFE_RUN_JOB_RESULT=$(echo ${MMW_GWLFE_RUN_JOB_RESPONSE} \
| jq -r .result)
echo ${MMW_GWLFE_RUN_JOB_RESULT}
}
###
# For every .tif file in the specified input data folder, we read the corresponding
# .geojson shape, run GWLF-E for that shape and save the results to a _baseline.json,
# then ingest the .tif file, run GWLF-E for that shape with that land layer, and
# save the results to a _modified.json.
###
for filename in ${IN_PATH}/*.tif
do
# Skip if there was an error with file globbing
[ -e "$filename" ] || continue
# Extract name of the .tif file without extension, use that for everything
IN_LABEL=$(basename "$filename" .tif)
# The source .tif file
SOURCE_IMAGE="${IN_PATH}/${IN_LABEL}.tif"
# The source .geojson file
SOURCE_SHAPE="${IN_PATH}/${IN_LABEL}.geojson"
# The name of the output layer to ingest to, and use with MMW
OUT_LAYER="20231109-tt-${IN_LABEL}"
# Path of the baseline output
OUT_GWLFE_BASELINE="${IN_PATH}/${IN_LABEL}_gwlfe_baseline.json"
# Path of the modified output
OUT_GWLFE_MODIFIED="${IN_PATH}/${IN_LABEL}_gwlfe_modified.json"
# Run source shape against vanilla GWLF-E and save a baseline
MMW_GWLFE_BASELINE_INPUT="{\"area_of_interest\": $(<$SOURCE_SHAPE)}"
MMW_GWLFE_BASELINE_OUTPUT=$(mmw_run_gwlfe "${MMW_GWLFE_BASELINE_INPUT}")
echo "${MMW_GWLFE_BASELINE_OUTPUT}" | jq -S . > $OUT_GWLFE_BASELINE
# Delete any existing layer with the same name
aws s3 rm "${S3_BUCKET}/_attributes/metadata__${OUT_LAYER}__0.json"
aws s3 rm --recursive "${S3_BUCKET}/${OUT_LAYER}"
# Ingest given .tif into MMW layer
spark-submit "${MMW_ETL_JAR}" \
--source-image "${SOURCE_IMAGE}" \
--reference-catalog "${S3_BUCKET}" \
--reference-layer "${REF_LAYER}" \
--output-catalog "${S3_BUCKET}" \
--output-layer "${OUT_LAYER}"
# Run source shape with this new ingested layer and save modified output
MMW_GWLFE_MODIFIED_INPUT="{
\"area_of_interest\": $(<$SOURCE_SHAPE),
\"layer_overrides\": {
\"__LAND__\": \"${OUT_LAYER}\"
}
}"
MMW_GWLFE_MODIFIED_OUTPUT=$(mmw_run_gwlfe "${MMW_GWLFE_MODIFIED_INPUT}")
echo "${MMW_GWLFE_MODIFIED_OUTPUT}" | jq -S . > $OUT_GWLFE_MODIFIED
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment