Skip to content

Instantly share code, notes, and snippets.

@robbibt
Last active September 26, 2022 06:47
Show Gist options
  • Save robbibt/1939b55898d84dff0bd90266fbd222cb to your computer and use it in GitHub Desktop.
Save robbibt/1939b55898d84dff0bd90266fbd222cb to your computer and use it in GitHub Desktop.
DE Africa Argo template workflow and example run
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: argo-coastlines-pvc
namespace: processing
annotations:
volume.beta.kubernetes.io/storage-class: efs-sc
spec:
accessModes:
- ReadWriteMany
resources:
requests:
# This doesn't actually set a limit on storage as efs doesn't support limits
storage: 50Gi
---
# otps-data volume
# NOTE: copy data manually from s3 (location: s3://deafrica-otps-data) after creation
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: otps-data-pvc
namespace: processing
annotations:
volume.beta.kubernetes.io/storage-class: efs-sc
spec:
accessModes:
- ReadWriteMany
resources:
requests:
# This doesn't actually set a limit on storage as efs doesn't support limits
storage: 1Mi
---
# tide-model volume
# NOTE: copy data manually from s3 (location: s3://deafrica-data-staging-af/coastlines/tide-model/) after creation
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: tide-model-pvc
namespace: processing
annotations:
volume.beta.kubernetes.io/storage-class: efs-sc
spec:
accessModes:
- ReadWriteMany
resources:
requests:
# This doesn't actually set a limit on storage as efs doesn't support limits
storage: 5Gi
---
apiVersion: argoproj.io/v1alpha1
kind: WorkflowTemplate
metadata:
name: deafrica-coastlines
namespace: processing
spec:
entrypoint: coastlines
podGC:
strategy: OnPodSuccess
# Set "tolerations" on the WORKFLOW manifest to override this property!
tolerations:
- key: "dedicated"
operator: "Equal"
value: "coastlines-processing"
effect: "NoSchedule"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nodegroup
operator: In
values:
- coastlines-r5-8xl
workflowMetadata:
labels:
app: deafrica-coastlines
arguments:
parameters:
- name: image-tag
value: "0.4.2"
- name: result-version
value: "v0.4.0"
- name: subset
value: "value should be 'all' or a list, like '[1,3,5,7,113]'"
- name: result-bucket
value: deafrica-data-dev-af
- name: start-year
value: 2000
- name: end-year
value: 2021
- name: baseline-year
value: 2021
- name: config
value: configs/deafrica_coastlines_config.yaml
- name: tiles-uri
value: https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/deafrica-coastlines/32km_coastal_grid_deafrica.geojson
- name: cleanup
value: "true"
- name: safe-to-retry
value: "true"
- name: overwrite_raster
value: "--no-overwrite"
- name: overwrite_vector
value: "--no-overwrite"
# Processing Pod Resource Allocation Configurations
- name: cpu-limit
value: 9
- name: mem-limit
value: 120Gi
templates:
- name: coastlines
dag:
tasks:
- name: generate-ids
template: generate
arguments:
parameters:
- name: tiles-uri
value: "{{workflow.parameters.tiles-uri}}"
- name: subset
value: "{{workflow.parameters.subset}}"
- name: process-id
depends: generate-ids.Succeeded
template: process
withParam: "{{tasks.generate-ids.outputs.result}}"
arguments:
parameters:
- name: id
value: "{{item}}"
- name: config
value: "{{workflow.parameters.config}}"
- name: result-version
value: "{{workflow.parameters.result-version}}"
- name: start-year
value: "{{workflow.parameters.start-year}}"
- name: end-year
value: "{{workflow.parameters.end-year}}"
- name: baseline-year
value: "{{workflow.parameters.baseline-year}}"
- name: safe-to-retry
value: "{{workflow.parameters.safe-to-retry}}"
- name: overwrite_raster
value: "{{workflow.parameters.overwrite_raster}}"
- name: overwrite_vector
value: "{{workflow.parameters.overwrite_vector}}"
- name: cpu-limit
value: "{{workflow.parameters.cpu-limit}}"
- name: mem-limit
value: "{{workflow.parameters.mem-limit}}"
- name: merge
depends: "process-id.AnySucceeded"
template: merge-continental
arguments:
parameters:
- name: result-version
value: "{{workflow.parameters.result-version}}"
- name: result-bucket
value: "{{workflow.parameters.result-bucket}}"
- name: baseline-year
value: "{{workflow.parameters.baseline-year}}"
- name: cleanup-storage
depends: "merge"
when: "{{workflow.parameters.cleanup}} == true"
template: cleanup
- name: generate
inputs:
parameters:
- name: tiles-uri
- name: subset
script:
image: 543785577597.dkr.ecr.af-south-1.amazonaws.com/digitalearthafrica/deafrica-coastlines:{{workflow.parameters.image-tag}}
command: [ python ]
source: |
import requests
import json
import sys
url = "{{inputs.parameters.tiles-uri}}"
response = requests.get(url)
data = response.json()
subset = {{inputs.parameters.subset}}
process_all = False
if subset == "all":
process_all = True
else:
subset = set(json.loads(subset))
ids = []
for feature in data["features"]:
this_id = int(feature["properties"]["id"])
if not process_all:
if this_id not in subset:
# Don't add it to the list if it's not in the subset list
continue
ids.append(str(this_id))
json.dump(ids, sys.stdout)
- name: process
metadata:
labels:
app: "argo-coastlines-process-{{workflow.name}}"
serviceAccountName: data-processing-staging
inputs:
parameters:
- name: id
- name: config
- name: result-version
- name: start-year
- name: end-year
- name: baseline-year
- name: safe-to-retry
- name: overwrite_raster
- name: overwrite_vector
- name: cpu-limit
- name: mem-limit
retryStrategy:
retryPolicy: "Always"
limit: "1"
# Retries on system-level errors or the last is exit code is greater than 1 and the input parameter "safe-to-retry" is true
expression: 'lastRetry.status == "Error" or (lastRetry.status == "Failed" and asInt(lastRetry.exitCode) > 1 and {{inputs.parameters.safe-to-retry}} == true)'
podSpecPatch: '{"containers":[{"name":"main", "resources":{"requests":{"cpu": "{{inputs.parameters.cpu-limit}}", "memory": "{{inputs.parameters.mem-limit}}"}, "limits":{"cpu": "{{inputs.parameters.cpu-limit}}", "memory": "{{inputs.parameters.mem-limit}}" }}}]}'
container:
image: 543785577597.dkr.ecr.af-south-1.amazonaws.com/digitalearthafrica/deafrica-coastlines:{{workflow.parameters.image-tag}}
imagePullPolicy: IfNotPresent
command: [bash, -c]
args:
- >-
ls -la /var/share/fes2014/
deafricacoastlines-raster
--config_path {{inputs.parameters.config}}
--study_area {{inputs.parameters.id}}
--raster_version {{inputs.parameters.result-version}}
--start_year {{inputs.parameters.start-year}}
--end_year {{inputs.parameters.end-year}}
{{inputs.parameters.overwrite_raster}}
&& deafricacoastlines-vector
--config_path {{inputs.parameters.config}}
--study_area {{inputs.parameters.id}}
--raster_version {{inputs.parameters.result-version}}
--start_year {{inputs.parameters.start-year}}
--end_year {{inputs.parameters.end-year}}
--baseline_year {{inputs.parameters.baseline-year}}
{{inputs.parameters.overwrite_vector}}
env:
- name: DB_USERNAME
valueFrom:
secretKeyRef:
key: postgres-username
name: odc-reader
- name: DB_PASSWORD
valueFrom:
secretKeyRef:
key: postgres-password
name: odc-reader
- name: DB_DATABASE
value: odc
- name: DB_HOSTNAME
value: db-reader
- name: AWS_S3_ENDPOINT
value: s3.af-south-1.amazonaws.com
volumeMounts:
- name: argo-coastlines
mountPath: /code/data
- name: otps-data
mountPath: /var/share/TPX08_atlas_compact/
- name: tide-model
mountPath: /var/share/fes2014/
volumes:
- name: argo-coastlines
persistentVolumeClaim:
claimName: argo-coastlines-pvc
- name: otps-data
persistentVolumeClaim:
claimName: otps-data-pvc
readOnly: True
- name: tide-model
persistentVolumeClaim:
claimName: tide-model-pvc
readOnly: True
- name: merge-continental
metadata:
labels:
app: "argo-coastlines-merge-{{workflow.name}}"
serviceAccountName: data-processing-staging
inputs:
parameters:
- name: result-version
- name: result-bucket
- name: baseline-year
container:
image: 543785577597.dkr.ecr.af-south-1.amazonaws.com/digitalearthafrica/deafrica-coastlines:{{workflow.parameters.image-tag}}
imagePullPolicy: IfNotPresent
resources:
requests:
memory: 20Gi
cpu: 1.0
limits:
cpu: 4.0
memory: 50Gi
volumeMounts:
- name: argo-coastlines
mountPath: /code/data
command: [bash, -c]
args:
- |
deafricacoastlines-continental \
--vector_version {{inputs.parameters.result-version}} \
--shorelines True \
--hotspots True \
--baseline_year {{inputs.parameters.baseline-year}}
gpkg_file=$(find /code/data/processed/{{inputs.parameters.result-version}}/ -name "*.gpkg" -printf '%f\n')
shp_file=$(find /code/data/processed/{{inputs.parameters.result-version}}/ -name "*.shp.zip" -printf '%f\n')
aws s3 cp /code/data/processed/{{inputs.parameters.result-version}}/$gpkg_file s3://{{inputs.parameters.result-bucket}}/coastlines/{{inputs.parameters.result-version}}/$gpkg_file
aws s3 cp /code/data/processed/{{inputs.parameters.result-version}}/$shp_file s3://{{inputs.parameters.result-bucket}}/coastlines/{{inputs.parameters.result-version}}/$shp_file
echo "Files uploaded to https://{{inputs.parameters.result-bucket}}.s3.af-south-1.amazonaws.com/coastlines/{{inputs.parameters.result-version}}/$gpkg_file and https://{{inputs.parameters.result-bucket}}.s3.af-south-1.amazonaws.com/coastlines/{{inputs.parameters.result-version}}/$shp_file"
volumes:
- name: argo-coastlines
persistentVolumeClaim:
claimName: argo-coastlines-pvc
- name: cleanup
container:
image: busybox:latest
imagePullPolicy: IfNotPresent
command: [sh, -c]
args: ["rm -rf /code/data/* ; ls -la /code/data/ ; echo DONE!"]
volumeMounts:
- name: argo-coastlines
mountPath: /code/data
volumes:
- name: argo-coastlines
persistentVolumeClaim:
claimName: argo-coastlines-pvc
---
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
name: coastlines-test
namespace: processing
annotations:
kustomize.toolkit.fluxcd.io/reconcile: disabled
spec:
entrypoint: coastlines-run
podGC:
strategy: OnPodSuccess
# Example limiting the parallelism. This can be increased too.
parallelism: 50
tolerations:
- key: "dedicated"
operator: "Equal"
value: "coastlines-processing"
effect: "NoSchedule"
affinity:
nodeAffinity:
requiredDuringSchedulingIgnoredDuringExecution:
nodeSelectorTerms:
- matchExpressions:
- key: nodegroup
operator: In
values:
- coastlines-r5-8xl
workflowMetadata:
labels:
app: deafrica-coastlines
arguments:
parameters:
- name: image-tag
value: "0.4.2"
- name: result-version
value: "v0.4.2_test"
- name: subset
value: "'[39, 45, 46, 135, 136, 137, 138, 139, 400, 402, 404, 407, 409, 434, 435, 439, 442, 443, 450, 451, 455, 462, 464, 467, 468, 473, 474, 478, 479, 483, 484, 485, 488, 489, 490, 491, 492, 495, 504, 505, 508, 593, 594, 595, 596, 610, 611, 612, 613, 614, 703, 704, 705, 706, 707, 709, 710, 711, 712, 713, 714, 715, 716, 717, 718, 720, 838, 840, 843, 845, 847, 849, 850, 853, 855, 856, 858, 919, 920, 923, 928, 929, 942, 958, 959, 965, 966, 1079, 1080, 1088, 1089, 1100, 1101, 1102, 1106, 1107, 1108, 1111, 1112, 1113, 1121, 1147, 1168, 1169, 1170, 1198, 1199, 1200, 1201, 1358, 1359, 1376, 1377, 1397, 1398, 1399, 1416, 1417, 1418, 1419, 1420, 1426, 1436, 1437, 1446, 1447, 1453, 1454, 1461, 1473, 469, 470, 475, 476, 477, 486, 487, 1332, 1333, 1334, 1348, 1349, 1350, 1351, 1366, 1367, 1388, 1409, 1410]'"
- name: result-bucket
value: deafrica-data-dev-af
- name: start-year
value: 2000
- name: end-year
value: 2021
- name: baseline-year
value: 2021
- name: config
value: configs/deafrica_coastlines_config.yaml
- name: tiles-uri
value: https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/deafrica-coastlines/32km_coastal_grid_deafrica.geojson
- name: cleanup
value: "false"
- name: safe-to-retry
value: "true"
- name: overwrite_raster
value: "--no-overwrite"
- name: overwrite_vector
value: "--no-overwrite"
- name: cpu-limit
value: 9
- name: mem-limit
value: 120Gi
templates:
- name: coastlines-run
steps:
- - name: coastlines-task
templateRef:
name: deafrica-coastlines
template: coastlines
module "m5_16xl_eks_nodes" {
source = "../../../modules/node_group"
# used for Tags
owner = local.owner
environment = local.environment
namespace = local.namespace
ami_image_id = local.ami_image_id
cluster_id = local.cluster_id
cluster_version = local.cluster_version
node_group_name = "general-purpose-m5-16xl"
node_instance_types = ["m5.16xlarge"] # list of mixed instance types of same - CPU and RAM, e.g. r5.large, r5a.large
node_size = "m5_16L" # e.g. "L", "2XL", "r5_L", "mixed_L" etc
node_security_group = local.node_security_group
# node taint configuration
enable_taint = true
node_purpose = "statistician"
desired_nodes = 0
max_nodes = 100
min_nodes = 0
max_spot_price = "1.50"
extra_userdata = local.extra_userdata
extra_kubelet_args = local.extra_kubelet_args
extra_bootstrap_args = local.extra_bootstrap_args
}
module "r5_16xl_eks_nodes" {
source = "../../../modules/node_group"
# used for Tags
owner = local.owner
environment = local.environment
namespace = local.namespace
ami_image_id = local.ami_image_id
cluster_id = local.cluster_id
cluster_version = local.cluster_version
node_group_name = "memory-optimised-r5-16xl"
node_instance_types = ["r5.16xlarge", "r5d.16xlarge"] # list of mixed instance types of same - CPU and RAM, e.g. r5.large, r5a.large
node_size = "r5_16L" # e.g. "L", "2XL", "r5_L", "mixed_L" etc
node_security_group = local.node_security_group
# node taint configuration
enable_taint = true
node_purpose = "statistician"
desired_nodes = 0
max_nodes = 200
min_nodes = 0
max_spot_price = "1.50"
extra_userdata = local.extra_userdata
extra_kubelet_args = local.extra_kubelet_args
extra_bootstrap_args = local.extra_bootstrap_args
}
module "r5_12xl_eks_nodes" {
source = "../../../modules/node_group"
# used for Tags
owner = local.owner
environment = local.environment
namespace = local.namespace
ami_image_id = local.ami_image_id
cluster_id = local.cluster_id
cluster_version = local.cluster_version
node_group_name = "memory-optimised-r5-12xl"
node_instance_types = ["r5.12xlarge", "r5d.12xlarge"] # list of mixed instance types of same - CPU and RAM, e.g. r5.large, r5a.large
node_size = "r5_12L" # e.g. "L", "2XL", "r5_L", "mixed_L" etc
node_security_group = local.node_security_group
# node taint configuration
enable_taint = true
node_purpose = "statistician"
desired_nodes = 0
max_nodes = 100
min_nodes = 0
max_spot_price = "1.00"
extra_userdata = local.extra_userdata
extra_kubelet_args = local.extra_kubelet_args
extra_bootstrap_args = local.extra_bootstrap_args
}
# Separate node group for Alchemist
module "c5_4xl_alchemist_nodes" {
source = "../../../modules/node_group"
# used for Tags
owner = local.owner
environment = local.environment
namespace = local.namespace
ami_image_id = local.ami_image_id
cluster_id = local.cluster_id
cluster_version = local.cluster_version
node_group_name = "compute-optimised-alchemist-c5-4xl"
node_instance_types = ["c5.4xlarge", "c5d.4xlarge"] # list of mixed instance types of same - CPU and RAM, e.g. r5.large, r5a.large
node_size = "c5_4L" # e.g. "L", "2XL", "r5_L", "mixed_L" etc
node_security_group = local.node_security_group
desired_nodes = 0
max_nodes = 150
min_nodes = 0
max_spot_price = "0.50"
extra_userdata = local.extra_userdata
extra_kubelet_args = local.extra_kubelet_args
extra_bootstrap_args = local.extra_bootstrap_args
# node taint configuration - optional
enable_taint = true
node_purpose = "alchemist-processing"
}
module "r5_16xl_crop_mask_eks_nodes" {
source = "../../../modules/node_group"
# used for Tags
owner = local.owner
environment = local.environment
namespace = local.namespace
ami_image_id = local.ami_image_id
cluster_id = local.cluster_id
cluster_version = local.cluster_version
node_group_name = "crop-mask-r5-16xl" # affinity.nodeAffinity...values
node_instance_types = ["r5.16xlarge", "r5d.16xlarge"] # list of mixed instance types of same - CPU and RAM, e.g. r5.large, r5a.large
node_size = "r5_16L" # e.g. "L", "2XL", "r5_L", "mixed_L" etc
node_root_volume_mappings = {
delete_on_termination = true
encrypted = true
iops = 3000
volume_type = "gp3"
volume_size = 50
throughput = 125
}
node_security_group = local.node_security_group
desired_nodes = 0
max_nodes = 300
min_nodes = 0
max_spot_price = "2.00"
extra_userdata = local.extra_userdata
extra_kubelet_args = local.extra_kubelet_args
extra_bootstrap_args = local.extra_bootstrap_args
# node taint configuration
enable_taint = true
node_purpose = "crop-mask-processing" # tolerations.value
}
module "web_services_nodes" {
source = "../../../modules/node_group"
# used for Tags
owner = local.owner
environment = local.environment
namespace = local.namespace
ami_image_id = local.ami_image_id
cluster_id = local.cluster_id
cluster_version = local.cluster_version
node_group_name = "web-services-nodes"
node_instance_types = ["r5.2xlarge"] # list of mixed instance types of same - CPU and RAM, e.g. r5.large, r5a.large
node_size = "r5_2XL" # e.g. "L", "2XL", "r5_L", "mixed_L" etc
node_security_group = local.node_security_group
desired_nodes = 1
max_nodes = 5
min_nodes = 1
enable_taint = true
node_purpose = "web"
node_type = "ondemand"
on_demand_percentage_above_base_capacity = 100
extra_userdata = local.extra_userdata
extra_kubelet_args = local.extra_kubelet_args
extra_bootstrap_args = local.extra_bootstrap_args
}
module "r5_4xl_argo_eks_nodes" {
source = "../../../modules/node_group"
# used for Tags
owner = local.owner
environment = local.environment
namespace = local.namespace
ami_image_id = local.ami_image_id
cluster_id = local.cluster_id
cluster_version = local.cluster_version
node_group_name = "argo-r5-4xl" # affinity.nodeAffinity...values
node_instance_types = ["r5d.4xlarge", "r5.4xlarge"] # list of mixed instance types of same - CPU and RAM, e.g. r5.large, r5a.large
node_size = "r5_4L" # e.g. "L", "2XL", "r5_L", "mixed_L" etc
node_root_volume_mappings = {
delete_on_termination = true
encrypted = true
iops = 3000
volume_type = "gp3"
volume_size = 30
throughput = 125
}
node_security_group = local.node_security_group
desired_nodes = 0
max_nodes = 100
min_nodes = 0
max_spot_price = "0.35"
extra_userdata = local.extra_userdata
extra_kubelet_args = local.extra_kubelet_args
extra_bootstrap_args = local.extra_bootstrap_args
# node taint configuration. Toleration set to this purpose required.
enable_taint = true
node_purpose = "argo-processing"
}
module "r5_8xl_coastlines_nodes" {
source = "../../../modules/node_group"
# used for Tags
owner = local.owner
environment = local.environment
namespace = local.namespace
ami_image_id = local.ami_image_id
cluster_id = local.cluster_id
cluster_version = local.cluster_version
node_group_name = "coastlines-r5-8xl" # affinity.nodeAffinity...values
node_instance_types = ["r5d.8xlarge", "r5.8xlarge"] # list of mixed instance types of same - CPU and RAM, e.g. r5.large, r5a.large
node_size = "r5_8L" # e.g. "L", "2XL", "r5_L", "mixed_L" etc
node_security_group = local.node_security_group
desired_nodes = 0
max_nodes = 150
min_nodes = 0
max_spot_price = "0.65"
extra_userdata = local.extra_userdata
extra_kubelet_args = local.extra_kubelet_args
extra_bootstrap_args = local.extra_bootstrap_args
# node taint configuration. Toleration set to this purpose required.
enable_taint = true
node_purpose = "coastlines-processing"
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment