Created
April 5, 2021 05:02
-
-
Save lordlinus/9f8f4e4041ea764751113f57ca62e075 to your computer and use it in GitHub Desktop.
Bash script to deploy Databricks Cluster and other dependencies
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/bash | |
set -o errexit | |
set -o nounset | |
set -o pipefail | |
export ARM_SUBSCRIPTION_ID= XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX | |
export ARM_TENANT_ID= XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX | |
export ARM_CLIENT_ID=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX | |
export ARM_CLIENT_SECRET=XXXXXXXXXXXXX | |
export MANAGEMENT_RESOURCE_ENDPOINT="https://management.core.windows.net/" # This is Fixed value (DO NOT CHANGE) | |
export AZURE_DATABRICKS_APP_ID="2ff814a6-3304-4ab8-85cb-cd0e6f879c1d" # This is Fixed value (DO NOT CHANGE) | |
export RESOURCE_GROUP="rg-test-01" | |
export LOCATION="southeastasia" | |
export DATABRICKS_WORKSPACE="TestWorkspace" | |
export DATABRICKS_CLUSTER_NAME="test-cluster-01" | |
export DATABRICKS_SPARK_VERSION="7.3.x-scala2.12" | |
export DATABRICKS_NODE_TYPE="Standard_D3_v2" | |
export DATABRICKS_NUM_WORKERS=3 # Need to be number | |
export DATABRICKS_SPARK_CONF='{"spark.speculation":"true","spark.databricks.delta.preview.enabled":"true"}' # Needs to be valid JSON | |
export DATABRICKS_AUTO_TERMINATE_MINUTES=60 # Need to be a number | |
# Login using service principle | |
echo "Logging in using Azure service priciple" | |
az login --service-principal -u $ARM_CLIENT_ID -p $ARM_CLIENT_SECRET --tenant $ARM_TENANT_ID | |
az account set -s $ARM_SUBSCRIPTION_ID | |
# Create Resource Group if not exists | |
# NOTE: you can get list of az location from "az account list-locations | jq .[].name" | |
if [[ $(az group exists --resource-group $RESOURCE_GROUP) = "false" ]]; then | |
echo "Resource Group does not exists, so creating.." | |
az group create --name $RESOURCE_GROUP --location $LOCATION | |
fi | |
# Enable install of extensions without prompt | |
az config set extension.use_dynamic_install=yes_without_prompt | |
# Create databricks workspace using extenstion | |
# The extension will automatically install the first time you run an az databricks workspace command | |
# Ref: https://docs.microsoft.com/en-us/cli/azure/ext/databricks/databricks?view=azure-cli-latest | |
if [[ $(az databricks workspace list | jq .[].name | grep -w $DATABRICKS_WORKSPACE) = $DATABRICKS_WORKSPACE ]]; then | |
echo "Databricks workspace does not exists, so creating.." | |
az databricks workspace create \ | |
--location $LOCATION \ | |
--name $DATABRICKS_WORKSPACE \ | |
--sku trial \ | |
--resource-group $RESOURCE_GROUP \ | |
--enable-no-public-ip \ | |
--tags environment=demo level=level3 | |
fi | |
# Get workspace id in the given resource group e.g. /subscriptions/(subscription_id)/resourceGroups/(rg)/providers/Microsoft.Databricks/workspaces/(databricks_workspace) | |
wsId=$(az resource show --resource-type Microsoft.Databricks/workspaces -g $RESOURCE_GROUP -n "$DATABRICKS_WORKSPACE" --query id -o tsv) | |
echo "Workspce ID: $wsId" | |
# Get workspace url e.g. adb-xxxxxxxxxxxxxxxx.x.azuredatabricks.net | |
workspaceUrl=$(az resource show --resource-type Microsoft.Databricks/workspaces -g "$RESOURCE_GROUP" -n "$DATABRICKS_WORKSPACE" --query properties.workspaceUrl --output tsv) | |
echo "Workspce URL: $workspaceUrl" | |
# token response for the azure databricks app | |
token_response=$(az account get-access-token --resource $AZURE_DATABRICKS_APP_ID) | |
echo $token_response | |
# Extract accessToken value | |
token=$(jq .accessToken -r <<< "$token_response") | |
echo "Token: $token" | |
# Get the Azure Management Resource endpoint token | |
# https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/aad/service-prin-aad-token#--get-the-azure-management-resource-endpoint-token | |
az_mgmt_resource_endpoint=$(curl -X GET -H 'Content-Type: application/x-www-form-urlencoded' \ | |
-d 'grant_type=client_credentials&client_id='$ARM_CLIENT_ID'&resource='$MANAGEMENT_RESOURCE_ENDPOINT'&client_secret='$ARM_CLIENT_SECRET \ | |
https://login.microsoftonline.com/$ARM_TENANT_ID/oauth2/token) | |
# Extract the access_token value | |
mgmt_access_token=$(jq .access_token -r <<< "$az_mgmt_resource_endpoint" ) | |
echo "Management Access Token: $mgmt_access_token" | |
# Create PAT token valid for 5 min (300 sec) | |
pat_token_response=$(curl -X POST \ | |
-H "Authorization: Bearer $token" \ | |
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \ | |
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \ | |
-d '{"lifetime_seconds": 300,"comment": "this is an example token"}' \ | |
https://$workspaceUrl/api/2.0/token/create | |
) | |
# Print PAT token | |
pat_token=$(jq .token_value -r <<< "$pat_token_response") | |
echo $pat_token | |
# List PAT tokens (OPTIONAL) | |
curl -X GET \ | |
-H "Authorization: Bearer $token" \ | |
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \ | |
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \ | |
https://$workspaceUrl/api/2.0/token/list | |
# List current clusters (OPTIONAL) and could be used to determine the next command e.g. create,restart,terminate etc | |
curl -X GET \ | |
-H "Authorization: Bearer $token" \ | |
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \ | |
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \ | |
https://$workspaceUrl/api/2.0/clusters/list | |
# Create Cluster config from variables | |
JSON_STRING=$( jq -n -c \ | |
--arg cn "$DATABRICKS_CLUSTER_NAME" \ | |
--arg sv "$DATABRICKS_SPARK_VERSION" \ | |
--arg nt "$DATABRICKS_NODE_TYPE" \ | |
--arg nw "$DATABRICKS_NUM_WORKERS" \ | |
--arg sc "$DATABRICKS_SPARK_CONF" \ | |
--arg at "$DATABRICKS_AUTO_TERMINATE_MINUTES" \ | |
'{cluster_name: $cn, | |
spark_version: $sv, | |
node_type_id: $nt, | |
num_workers: ($nw|tonumber), | |
autotermination_minutes: ($at|tonumber), | |
spark_conf: ($sc|fromjson)}' ) | |
# Create a new Cluster | |
# Reference: https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/ | |
cluster_id_response=$(curl -X POST \ | |
-H "Authorization: Bearer $token" \ | |
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \ | |
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \ | |
-d $JSON_STRING \ | |
https://$workspaceUrl/api/2.0/clusters/create) | |
# Print cluster_id | |
cluster_id=$(jq .cluster_id -r <<< "$cluster_id_response") | |
echo "Cluster id: $cluster_id" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment