Last active
April 29, 2024 11:29
-
-
Save louwersj/63a32107cc709951b21bccb9ba22d034 to your computer and use it in GitHub Desktop.
Script to deploy spark nodes in OCI. Deploy them in the same subnet, ensure a rule is in place for communication between nodes on TCP port 7077. Ensure the master node is called 'master' and the worker nodes are named worker-node-n. The same script can be used for both masters and workers. First deploy a master node, all worker nodes will join t…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
# GPL License Disclaimer | |
# This script is free software: you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation, either version 3 of the License, or | |
# (at your option) any later version. | |
# | |
# This script is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License | |
# along with this script. If not, see <https://www.gnu.org/licenses/>. | |
get_domain() { | |
local fqdn="$1" | |
local domain | |
domain=$(echo "$fqdn" | sed 's/^[^.]*\.//') | |
echo "$domain" | |
} | |
get_hostname_prefix() { | |
local hostname="$1" | |
local prefix | |
prefix=$(echo "$hostname" | cut -d'.' -f1) | |
echo "$prefix" | |
} | |
get_role() { | |
local hostname="$1" | |
if [[ $hostname == master* ]]; then | |
echo "master" | |
else | |
echo "worker" | |
fi | |
} | |
set_ipv6() { | |
sudo sysctl -w net.ipv6.conf.all.disable_ipv6=1 | |
sudo sysctl -w net.ipv6.conf.default.disable_ipv6=1 | |
sudo sysctl -w net.ipv6.conf.ens3.disable_ipv6=1 | |
} | |
set_open_fwports() { | |
local ports=("$@") | |
for port in "${ports[@]}" | |
do | |
sudo firewall-cmd --zone=public --add-port=$port/tcp --permanent | |
done | |
sudo firewall-cmd --reload | |
} | |
get_ipv4_address() { | |
local interface="ens3" | |
local ipv4_address | |
ipv4_address=$(ip -4 addr show "$interface" | grep -oP '(?<=inet\s)\d+(\.\d+){3}') | |
echo "$ipv4_address" | |
} | |
update_system() { | |
sudo yum update -y | |
sudo ksplice -y user upgrade | |
sudo ksplice -y kernel upgrade | |
} | |
# print basic information to screen | |
hostname=$(hostname -f) | |
domain_result=$(get_domain "$hostname") | |
echo "Domain and subdomains: $domain_result" | |
prefix_result=$(get_hostname_prefix "$hostname") | |
echo "Hostname prefix: $prefix_result" | |
role_result=$(get_role "$hostname") | |
echo "Node role: $role_result" | |
# Disable IPv6 | |
set_ipv6 | |
# Define variables | |
SPARK_VERSION="3.5.1" # Change this to the desired Spark version | |
HADOOP_VERSION="3" # Change this to the compatible Hadoop version | |
JAVA_HOME="/usr/lib/jvm/java-11-openjdk" # Change this to the desired Java version | |
DOWNLOAD_URL="https://dlcdn.apache.org/spark/spark-$SPARK_VERSION/spark-$SPARK_VERSION-bin-hadoop$HADOOP_VERSION.tgz" | |
SPARK_SERVICE_FILE="/etc/systemd/system/spark.service" | |
SPARK_CONF_DIR="/opt/spark/conf" # Spark configuration directory | |
SPARK_DEFAULTS_CONF="$SPARK_CONF_DIR/spark-defaults.conf" # Spark defaults configuration file | |
# Get IPv4 address from interface ens3 | |
IPv4_ADDRESS=$(get_ipv4_address) | |
echo "interface ens3: $IPv4_ADDRESS" | |
# Get domain and add master prefix | |
master_fqdn_address="master.$domain_result" | |
echo "Master node FQDN: $master_fqdn_address" | |
# Resolve MASTER_IPv4_ADDRESS based on master_fqdn_address | |
MASTER_IPv4_ADDRESS=$(dig +short "$master_fqdn_address" | grep -oP '\d+(\.\d+){3}') | |
echo "Master node FQDN: $MASTER_IPv4_ADDRESS" | |
# Ports to open | |
PORTS=(7077 8080 8081 8980 8981 2304 18080 18480 7337 7222 5181 8032 5660 5692) | |
# Set boolean variables | |
masternode=false | |
workernode=false | |
UPDATE=true # Change this to true to perform updates | |
# Set masternode and workernode based on the role | |
node_role=$(get_role "$hostname") | |
if [[ $node_role == "master" ]]; then | |
masternode=true | |
workernode=true | |
elif [[ $node_role == "worker" ]]; then | |
masternode=false | |
workernode=true | |
fi | |
# Perform updates if UPDATE is true | |
if [[ "$UPDATE" == true ]]; then | |
update_system | |
fi | |
sudo yum install -y java-11-openjdk-devel | |
# Open firewall ports | |
set_open_fwports "${PORTS[@]}" | |
# Create directories | |
sudo mkdir -p /opt/spark | |
# Download and extract Spark | |
sudo wget $DOWNLOAD_URL -O /tmp/spark-$SPARK_VERSION.tgz | |
sudo tar -xzf /tmp/spark-$SPARK_VERSION.tgz -C /opt/spark --strip-components=1 | |
sudo rm /tmp/spark-$SPARK_VERSION.tgz | |
# Set ownership to opc user and group | |
sudo chown -R opc:opc /opt/spark | |
start_command="" | |
if [[ "$masternode" == "true" ]]; then | |
start_command+="/opt/spark/sbin/start-master.sh" | |
fi | |
if [[ "$workernode" == "true" ]]; then | |
if [[ "$masternode" == "true" ]]; then | |
start_command+=" && " | |
fi | |
start_command+="/opt/spark/sbin/start-slave.sh spark://$MASTER_IPv4_ADDRESS:7077 -p 7078" | |
fi | |
sudo tee $SPARK_SERVICE_FILE > /dev/null <<EOF | |
[Unit] | |
Description=Apache Spark | |
After=network.target | |
[Service] | |
Type=forking | |
ExecStart=/bin/bash -c '$start_command' | |
ExecStop=/opt/spark/sbin/stop-all.sh | |
User=opc | |
Group=opc | |
Restart=on-failure | |
Environment="SPARK_MASTER_IP=$IPv4_ADDRESS" | |
[Install] | |
WantedBy=multi-user.target | |
EOF | |
# Reload systemd to load the new service unit file | |
sudo systemctl daemon-reload | |
# Enable Spark service to start on boot | |
sudo systemctl enable spark.service | |
# Set up environment variables | |
echo "export SPARK_HOME=/opt/spark" >> ~/.bashrc | |
echo "export PATH=\$PATH:/opt/spark/bin" >> ~/.bashrc | |
echo "export JAVA_HOME=$JAVA_HOME" >> ~/.bashrc | |
echo "export SPARK_LOCAL_IP=$IPv4_ADDRESS" >> ~/.bashrc # Set the IPv4 address here | |
source ~/.bashrc | |
# Set IPv4 in Spark configuration | |
echo "spark.master spark://$IPv4_ADDRESS:7077" >> $SPARK_DEFAULTS_CONF | |
# Enable the dynamic resource allocation in a explicit way | |
echo "spark.dynamicAllocation.enabled true" >> $SPARK_DEFAULTS_CONF | |
# Set the rest interface on the master | |
if [[ "$masternode" == "true" ]]; then | |
echo "spark.master.rest.enabled true" >> $SPARK_DEFAULTS_CONF | |
fi | |
# Start Spark service | |
sudo systemctl start spark.service | |
# Output Spark version for verification | |
/opt/spark/bin/spark-submit --version | |
echo "Apache Spark $SPARK_VERSION has been successfully installed, started, and configured as a service." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment