Skip to content

Instantly share code, notes, and snippets.

@mndrake
Last active May 22, 2020 19:05
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mndrake/7cf020ec8349239fa11c4a19dde375fa to your computer and use it in GitHub Desktop.
Save mndrake/7cf020ec8349239fa11c4a19dde375fa to your computer and use it in GitHub Desktop.
Databricks cluster creation and config for Databricks Connect
#!python
import functools
import json
import os
import requests
import urllib
import uuid
import configparser
# TODO: CURRENTLY ONLY WORKS FOR AWS, NEED TO ADD ADDITIONAL PARSING FOR AZURE
# Assumes that the Databricks CLI is installed and configured
DATABRICKS_PROFILE = 'DEFAULT'
CLUSTER_DEFINITION = {
"cluster_name": "dc-demo",
"idempotency_token": str(uuid.getnode()),
"spark_version": "6.4.x-cpu-ml-scala2.11",
"driver_node_type_id": "i3.xlarge",
"node_type_id": "i3.xlarge",
"spark_conf": {},
"autoscale": {
"min_workers": 2,
"max_workers": 8
},
"aws_attributes": {
"first_on_demand": 1,
"availability": "SPOT_WITH_FALLBACK",
"zone_id": "us-west-2c",
"spot_bid_price_percent": 100,
"ebs_volume_count": 0
},
"autotermination_minutes": 60
}
# retrieve databricks config for the profile
cli_config = configparser.ConfigParser()
cli_config.read(os.path.expanduser('~/.databrickscfg'))
profile_config = cli_config[DATABRICKS_PROFILE]
# set databricks constants for the profile
DATABRICKS_HOST = profile_config['host']
DATABRICKS_TOKEN = profile_config['token']
def api_request(route, body=None):
"""
Databricks API request wrapper.
doc page: https://docs.databricks.com/dev-tools/api/latest/index.html
"""
url = urllib.parse.urljoin(DATABRICKS_HOST, os.path.join('api', route))
headers = {'Authorization': f'Bearer {DATABRICKS_TOKEN}'}
if body is None:
response = requests.get(url, headers=headers)
else:
response = requests.post(url, headers=headers, json=body)
return response.json()
# create cluster
cluster_info = api_request('2.0/clusters/create', CLUSTER_DEFINITION)
# create cluster config
cluster_config = {
"host": DATABRICKS_HOST,
"token": DATABRICKS_TOKEN,
"cluster_id": cluster_info['cluster_id'],
"port": "15001"
}
# update databricks-connect with cluster config
with open(os.path.expanduser('~/.databricks-connect'), 'w') as f:
json.dump(cluster_config, f)
print('-- cluster info --\n', cluster_info)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment