Skip to content

Instantly share code, notes, and snippets.

@burnsie7
Created March 12, 2021 21:52
Show Gist options
  • Save burnsie7/fd0c9a75ade1da98ad89d530bb2a97a3 to your computer and use it in GitHub Desktop.
Save burnsie7/fd0c9a75ade1da98ad89d530bb2a97a3 to your computer and use it in GitHub Desktop.
databricks_install_agent_7
#!/bin/bash
echo "Running on the driver? $DB_IS_DRIVER"
echo "Driver ip: $DB_DRIVER_IP"
cat <<EOF >> /tmp/start_datadog.sh
#!/bin/bash
echo "Running script"
if [ \$DB_IS_DRIVER ]; then
echo "On the driver. Installing Datadog ..."
# install the Datadog agent
DD_AGENT_MAJOR_VERSION=7 DD_API_KEY=<YOUR_API_KEY> DD_SITE="datadoghq.com" bash -c "\$(curl -L https://s3.amazonaws.com/dd-agent/scripts/install_script.sh)"
while [ -z \$datadoginstalled ]; do
if [ -e "/etc/datadog-agent/datadog.yaml" ]; then
datadoginstalled=TRUE
fi
sleep 2
done
echo "Agent is installed"
# WAITING UNTIL MASTER PARAMS ARE LOADED, THEN GRABBING IP AND PORT
while [ -z \$gotparams ]; do
if [ -e "/tmp/master-params" ]; then
DB_DRIVER_PORT=\$(cat /tmp/master-params | cut -d' ' -f2)
gotparams=TRUE
fi
sleep 2
done
current=\$(hostname -I | xargs)
echo "use_dogstatsd: true
# bind on all interfaces so it's accessible from executors
bind_host: 0.0.0.0
dogstatsd_non_local_traffic: true
dogstatsd_stats_enable: false
logs_enabled: true" >> /etc/datadog-agent/datadog.yaml
# WRITING SPARK CONFIG FILE FOR STREAMING SPARK METRICS
echo "init_config:
instances:
- resourcemanager_uri: http://\$DB_DRIVER_IP:\$DB_DRIVER_PORT
spark_cluster_mode: spark_standalone_mode
cluster_name: \$current
logs:
- type: file
path: /databricks/driver/logs/*.log
source: databricks
service: databricks
log_processing_rules:
- type: multi_line
name: new_log_start_with_date
pattern: \d{2,4}[\-\/]\d{2,4}[\-\/]\d{2,4}.*
- type: file
path: /databricks/driver/logs/stderr
source: databricks
service: databricks
log_processing_rules:
- type: multi_line
name: new_log_start_with_date
pattern: \d{2,4}[\-\/](0?[1-9]|1[012])[\-\/](0?[1-9]|[12][0-9]|3[01])
- type: file
path: /databricks/driver/logs/stdout
source: databricks
service: databricks
log_processing_rules:
- type: multi_line
name: new_log_start_with_date
pattern: \d{2,4}[\-\/](0?[1-9]|1[012])[\-\/](0?[1-9]|[12][0-9]|3[01])" > /etc/datadog-agent/conf.d/spark.d/conf.yaml
# RESTARTING AGENT
sudo service datadog-agent restart
fi
EOF
## CLEANING UP
if [ \$DB_IS_DRIVER ]; then
chmod a+x /tmp/start_datadog.sh
/tmp/start_datadog.sh >> /tmp/datadog_start.log 2>&1 & disown
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment