Last active
March 6, 2024 12:15
-
-
Save cartershanklin/3c3eb6ec830be01a1f07aaaa0ea71ce0 to your computer and use it in GitHub Desktop.
OCI Data Flow Tutorial Example 1 using the OCI CLI
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
# Copyright (c) 2020, Oracle and/or its affiliates. All rights reserved. | |
# | |
# This script executes Example 1 of the Data Flow Tutorial | |
# https://docs.cloud.oracle.com/en-us/iaas/data-flow/data-flow-tutorial/tutorial/dfs_tut_etl_java.htm#etl_with_java | |
# | |
# For more help with specific Data Flow commands, see: | |
# oci data-flow -h | |
# | |
# Requirements for running this script: | |
# - OCI CLI v2.9.1 or later (you can check this by running oci --version) | |
# - jq (https://stedolan.github.io/jq/) for JSON querying and manipulation of CLI output. This may be a useful utility in general | |
# and may help cater to scenarios which can't be wholly addressed by the --query option in the CLI | |
# Environment variables need to be set: | |
# - COMPARTMENT_ID - Your compartment OCID where you want the Data Flow Application and Run created. | |
# - OUTPUT_PATH - The OCI object store path for ETLed data. The bucket must exist the script will not create it. | |
set -e | |
if [[ -z "$COMPARTMENT_ID" ]]; then | |
echo "COMPARTMENT_ID must be defined in your environment" | |
exit 1 | |
fi | |
if [[ -z "$OUTPUT_PATH" ]]; then | |
echo "OUTPUT_PATH must be defined in your environment" | |
exit 1 | |
fi | |
echo "Creating the Data Flow Application" | |
{ | |
result=$( | |
oci data-flow application create \ | |
--compartment-id $COMPARTMENT_ID \ | |
--display-name 'Data Flow Tutorial App 1 CLI' \ | |
--driver-shape VM.Standard2.1 \ | |
--executor-shape VM.Standard2.1 \ | |
--num-executors 1 \ | |
--spark-version 2.4.4 \ | |
--file-uri oci://oow_2019_dataflow_lab@bigdatadatasciencelarge/usercontent/oow-lab-2019-java-etl-1.0-SNAPSHOT.jar \ | |
--language Java \ | |
--class-name convert.Convert \ | |
--arguments '${input} ${output}' \ | |
--parameters "input=oci://oow_2019_dataflow_lab@bigdatadatasciencelarge/usercontent/kaggle_berlin_airbnb_listings_summary.csv output=\"$OUTPUT_PATH\"" | |
) | |
APPLICATION_ID=$(echo $result | jq -r '[.data] | .[].id') | |
} || { | |
echo "Could not create the Data Flow Application or get its ID." | |
echo $result | |
exit 1 | |
} | |
echo "Running the Data Flow Application (ID = $APPLICATION_ID)" | |
{ | |
result=$( | |
oci data-flow run create \ | |
--compartment-id $COMPARTMENT_ID \ | |
--application-id $APPLICATION_ID \ | |
--display-name 'Data Flow Tutorial App 1 CLI' | |
) | |
RUN_ID=$(echo $result | jq -r '[.data] | .[].id') | |
} || { | |
echo "Could not run the Data Flow application" | |
echo $result | |
exit 1 | |
} | |
STATUS=1 | |
INTERVAL=15 | |
PROBE_COUNT=0 | |
while [ $STATUS -ne 0 ]; do | |
echo "Waiting $INTERVAL seconds for the Data Flow Application to Complete" | |
sleep $INTERVAL | |
{ | |
result=$(oci data-flow run get --run-id $RUN_ID) | |
STATE=$(echo $result | jq -r '[.data] | .[]."lifecycle-state"') | |
echo "Run state of run ID $RUN_ID is $STATE" | |
PROBE_COUNT=$(expr $PROBE_COUNT + 1) | |
if [ $PROBE_COUNT -gt 100 ]; then | |
echo "Run is taking too long, exiting" | |
exit 1 | |
fi | |
if [ $STATE = "ACCEPTED" -o $STATE = "IN_PROGRESS" ]; then | |
echo "Run is in progress, waiting" | |
elif [ $STATE = "SUCCEEDED" ]; then | |
echo "Run is finished" | |
STATUS=0 | |
elif [ $STATE = "FAILED" ]; then | |
echo "Run failed, more information:" | |
echo $result | jq -r '[.data] | .[]."lifecycle-details"' | |
exit 1 | |
else | |
echo "Unexpected state, stopping" | |
exit 1 | |
fi | |
} || { | |
echo "Could not get status of the Data Flow run" | |
echo $result | |
exit 1 | |
} | |
done | |
echo "Output of the Data Flow Run follows:" | |
oci data-flow run get-log --run-id $RUN_ID --name spark_application_stdout.log.gz --file - |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment