Skip to content

Instantly share code, notes, and snippets.

@lordlinus
Created April 5, 2021 05:02
Show Gist options
  • Save lordlinus/9f8f4e4041ea764751113f57ca62e075 to your computer and use it in GitHub Desktop.
Save lordlinus/9f8f4e4041ea764751113f57ca62e075 to your computer and use it in GitHub Desktop.
Bash script to deploy Databricks Cluster and other dependencies
#! /usr/bin/bash
set -o errexit
set -o nounset
set -o pipefail
export ARM_SUBSCRIPTION_ID= XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
export ARM_TENANT_ID= XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
export ARM_CLIENT_ID=XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
export ARM_CLIENT_SECRET=XXXXXXXXXXXXX
export MANAGEMENT_RESOURCE_ENDPOINT="https://management.core.windows.net/" # This is Fixed value (DO NOT CHANGE)
export AZURE_DATABRICKS_APP_ID="2ff814a6-3304-4ab8-85cb-cd0e6f879c1d" # This is Fixed value (DO NOT CHANGE)
export RESOURCE_GROUP="rg-test-01"
export LOCATION="southeastasia"
export DATABRICKS_WORKSPACE="TestWorkspace"
export DATABRICKS_CLUSTER_NAME="test-cluster-01"
export DATABRICKS_SPARK_VERSION="7.3.x-scala2.12"
export DATABRICKS_NODE_TYPE="Standard_D3_v2"
export DATABRICKS_NUM_WORKERS=3 # Need to be number
export DATABRICKS_SPARK_CONF='{"spark.speculation":"true","spark.databricks.delta.preview.enabled":"true"}' # Needs to be valid JSON
export DATABRICKS_AUTO_TERMINATE_MINUTES=60 # Need to be a number
# Login using service principle
echo "Logging in using Azure service priciple"
az login --service-principal -u $ARM_CLIENT_ID -p $ARM_CLIENT_SECRET --tenant $ARM_TENANT_ID
az account set -s $ARM_SUBSCRIPTION_ID
# Create Resource Group if not exists
# NOTE: you can get list of az location from "az account list-locations | jq .[].name"
if [[ $(az group exists --resource-group $RESOURCE_GROUP) = "false" ]]; then
echo "Resource Group does not exists, so creating.."
az group create --name $RESOURCE_GROUP --location $LOCATION
fi
# Enable install of extensions without prompt
az config set extension.use_dynamic_install=yes_without_prompt
# Create databricks workspace using extenstion
# The extension will automatically install the first time you run an az databricks workspace command
# Ref: https://docs.microsoft.com/en-us/cli/azure/ext/databricks/databricks?view=azure-cli-latest
if [[ $(az databricks workspace list | jq .[].name | grep -w $DATABRICKS_WORKSPACE) = $DATABRICKS_WORKSPACE ]]; then
echo "Databricks workspace does not exists, so creating.."
az databricks workspace create \
--location $LOCATION \
--name $DATABRICKS_WORKSPACE \
--sku trial \
--resource-group $RESOURCE_GROUP \
--enable-no-public-ip \
--tags environment=demo level=level3
fi
# Get workspace id in the given resource group e.g. /subscriptions/(subscription_id)/resourceGroups/(rg)/providers/Microsoft.Databricks/workspaces/(databricks_workspace)
wsId=$(az resource show --resource-type Microsoft.Databricks/workspaces -g $RESOURCE_GROUP -n "$DATABRICKS_WORKSPACE" --query id -o tsv)
echo "Workspce ID: $wsId"
# Get workspace url e.g. adb-xxxxxxxxxxxxxxxx.x.azuredatabricks.net
workspaceUrl=$(az resource show --resource-type Microsoft.Databricks/workspaces -g "$RESOURCE_GROUP" -n "$DATABRICKS_WORKSPACE" --query properties.workspaceUrl --output tsv)
echo "Workspce URL: $workspaceUrl"
# token response for the azure databricks app
token_response=$(az account get-access-token --resource $AZURE_DATABRICKS_APP_ID)
echo $token_response
# Extract accessToken value
token=$(jq .accessToken -r <<< "$token_response")
echo "Token: $token"
# Get the Azure Management Resource endpoint token
# https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/aad/service-prin-aad-token#--get-the-azure-management-resource-endpoint-token
az_mgmt_resource_endpoint=$(curl -X GET -H 'Content-Type: application/x-www-form-urlencoded' \
-d 'grant_type=client_credentials&client_id='$ARM_CLIENT_ID'&resource='$MANAGEMENT_RESOURCE_ENDPOINT'&client_secret='$ARM_CLIENT_SECRET \
https://login.microsoftonline.com/$ARM_TENANT_ID/oauth2/token)
# Extract the access_token value
mgmt_access_token=$(jq .access_token -r <<< "$az_mgmt_resource_endpoint" )
echo "Management Access Token: $mgmt_access_token"
# Create PAT token valid for 5 min (300 sec)
pat_token_response=$(curl -X POST \
-H "Authorization: Bearer $token" \
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \
-d '{"lifetime_seconds": 300,"comment": "this is an example token"}' \
https://$workspaceUrl/api/2.0/token/create
)
# Print PAT token
pat_token=$(jq .token_value -r <<< "$pat_token_response")
echo $pat_token
# List PAT tokens (OPTIONAL)
curl -X GET \
-H "Authorization: Bearer $token" \
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \
https://$workspaceUrl/api/2.0/token/list
# List current clusters (OPTIONAL) and could be used to determine the next command e.g. create,restart,terminate etc
curl -X GET \
-H "Authorization: Bearer $token" \
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \
https://$workspaceUrl/api/2.0/clusters/list
# Create Cluster config from variables
JSON_STRING=$( jq -n -c \
--arg cn "$DATABRICKS_CLUSTER_NAME" \
--arg sv "$DATABRICKS_SPARK_VERSION" \
--arg nt "$DATABRICKS_NODE_TYPE" \
--arg nw "$DATABRICKS_NUM_WORKERS" \
--arg sc "$DATABRICKS_SPARK_CONF" \
--arg at "$DATABRICKS_AUTO_TERMINATE_MINUTES" \
'{cluster_name: $cn,
spark_version: $sv,
node_type_id: $nt,
num_workers: ($nw|tonumber),
autotermination_minutes: ($at|tonumber),
spark_conf: ($sc|fromjson)}' )
# Create a new Cluster
# Reference: https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/latest/
cluster_id_response=$(curl -X POST \
-H "Authorization: Bearer $token" \
-H "X-Databricks-Azure-SP-Management-Token: $mgmt_access_token" \
-H "X-Databricks-Azure-Workspace-Resource-Id: $wsId" \
-d $JSON_STRING \
https://$workspaceUrl/api/2.0/clusters/create)
# Print cluster_id
cluster_id=$(jq .cluster_id -r <<< "$cluster_id_response")
echo "Cluster id: $cluster_id"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment