Skip to content

Instantly share code, notes, and snippets.

@paraita
Last active July 12, 2018 12:19
Show Gist options
  • Save paraita/4a48c78cc9e6afeaa6eb6640c36e6434 to your computer and use it in GitHub Desktop.
Save paraita/4a48c78cc9e6afeaa6eb6640c36e6434 to your computer and use it in GitHub Desktop.
Azure bench deployment script for the 600 nodes on Azure with 8.2.0
#!/bin/bash
set -x
SSH_USERNAME="hpcpeps"
SSH_PORT=22
function debug {
DEBUGCONTENT=`echo $1 | base64 -w 0`
DEBUGMESSAGE="<QueueMessage><MessageText>$DEBUGCONTENT</MessageText></QueueMessage>"
curl -X POST -d "$DEBUGMESSAGE" "https://$STORAGEACCOUNT.queue.core.windows.net/debug/messages?$SASKEY"
}
# Downloads and install ProActive Node as a Systemd service
mkdir -p /opt/proactive
cd /opt/proactive
if apt-get --help; then
apt-get update
if ! apt-get install -y wget curl jq vim tree; then
sleep 10
apt-get update
if ! apt-get install -y wget curl jq vim tree; then
>&2 echo "Fatal error: Unable to run apt-get"
halt
fi
fi
else
yum install epel-release -y
if ! yum -y install wget curl jq vim tree; then
sleep 10
if ! yum -y install wget curl jq vim tree; then
>&2 echo "Fatal error: Unable to run yum"
halt
fi
fi
fi
wget --no-clobber https://s3.amazonaws.com/ci-materials/Latest_jre/jre-8u131-linux-x64.tar.gz
tar zxf jre-8u131-linux-x64.tar.gz
ln -s /opt/proactive/jre1.8.0_131 /opt/proactive/java
JSONCONFIG=`base64 -d /var/lib/waagent/CustomData`
echo $JSONCONFIG
export HOST=`echo $JSONCONFIG | jq '.rmurl' -r | sed 's/http[s]\?:\?\/\{2\}//g'`
export RMURL=`echo $JSONCONFIG | jq '.rmurl' -r | sed 's/http[s]\?:\?\/\{2\}//g'`
export CREDVALUE=`echo $JSONCONFIG | jq '.credentials' -r`
export NODESOURCENAME=`echo $JSONCONFIG | jq '.nodesourcename' -r`
export STORAGEACCOUNT=`echo $JSONCONFIG | jq '.storageaccount' -r`
export SASKEY=`echo $JSONCONFIG | jq '.saskey' -r`
export USERCUSTOMSCRIPTURL=`echo $JSONCONFIG | jq '.usercustomscripturl' -r`
export EXT_STORACC_RAW=`echo $JSONCONFIG | jq '.exernalstorageaccount' -r`
export EXT_STOR_ACC=`echo $EXT_STORACC_RAW | sed 's/\:.*$//'`
export EXT_STOR_ACC_SAS=`echo $EXT_STORACC_RAW | sed 's/^.*\:?//'`
debug "Azure scaleset script $HOSTNAME INFO: CustomData read properly (as I can write this message)"
if [ ! -z "$USERCUSTOMSCRIPTURL" ]; then
curl -X GET "$USERCUSTOMSCRIPTURL" > user_custom_script.sh
chmod +x user_custom_script.sh
./user_custom_script.sh
if [ $? -ne 0 ]; then
debug "Azure scaleset script $HOSTNAME FATAL: User custom script exited with error: $?"
halt #sleep 9999 #exit -1
fi
fi
# PNP
PNP_PROPERTIES="-Dproactive.net.nolocal=true -Dproactive.communication.protocol=pnp"
#PAMR
PAMRHOST=`echo $HOST | cut -d: -f1`
PAMR_PROPERTIES="-Dproactive.net.nolocal=false -Dproactive.communication.protocol=pamr \
-Dproactive.pamr.router.address=$RMURL \
-Dproactive.pamrssh.address=localhost \
-Dproactive.pamrssh.connect_timeout=300000 \
-Dproactive.pamr.socketfactory=ssh -Dproactive.pamrssh.port=$SSH_PORT \
-Dproactive.pamrssh.username=$SSH_USERNAME \
-Dproactive.pamrssh.key_directory=/opt/proactive/.ssh \
-Dproactive.process.builder.cleanup.time.seconds=120 \
-Dpa.rm.node.source.ping.frequency=75000"
PROPERTIES=$PAMR_PROPERTIES
debug "Azure scaleset script $HOSTNAME INFO: $PROPERTIES"
# Getting node.jar
#wget --timestamping --tries=10 --timeout=10 --waitretry=10 http://$HOST/rest/node.jar
curl -X GET "https://$EXT_STOR_ACC.blob.core.windows.net/nodefiles/node820.jar?$EXT_STOR_ACC_SAS" > node.jar
# Getting the key
mkdir -p /opt/proactive/.ssh
curl -X GET "https://$EXT_STOR_ACC.blob.core.windows.net/nodefiles/azure_rsa?$EXT_STOR_ACC_SAS" > /opt/proactive/.ssh/id_rsa
curl -X GET "https://$EXT_STOR_ACC.blob.core.windows.net/nodefiles/azure_rsa.pub?$EXT_STOR_ACC_SAS" > /opt/proactive/.ssh/id_rsa.pub
typeset -r pepsuser=hpcpeps
typeset -i pepsuid=8298
typeset -r pepsgroup=peps
typeset -i pepsgid=16000
groupadd -g $pepsgid $pepsgroup
useradd -u $pepsuid -g $pepsgid $pepsuser
#chown -R activeeon /opt/proactive
chown -R hpcpeps:peps /opt/proactive
chmod 600 /opt/proactive/.ssh/id_rsa
chmod 600 /opt/proactive/.ssh/id_rsa.pub
# Read Azure Queue to get node name Deletion is performed on NS Side when nodes are properly registered
msg=$(curl "https://$STORAGEACCOUNT.queue.core.windows.net/nodeconfig/messages?visibilitytimeout=300&$SASKEY")
JSONMSG=`echo $msg | grep -oP '<MessageText>\K[^<]+' | base64 -d`
msgId=`echo $msg | grep -oP '<MessageId>\K[^<]+'`
popReceipt=`echo $msg | grep -oP '<PopReceipt>\K[^<]+'`
NODEBASENAME=`echo $JSONMSG | jq '.nodebasename' -r`
NODEINSTANCES=`echo $JSONMSG | jq '.nodeinstances' -r`
if [ -z "$NODEBASENAME" ]; then
debug "Azure scaleset script $HOSTNAME FATAL: Unable to retrieve node configuration from 'nodeconfig' queue"
halt #sleep 9999 #exit -1
fi
NOW=`date`
debug "$HOSTNAME INFO: Start filling the Table on $NOW"
ENTITY="{'PartitionKey':'$HOSTNAME','RowKey':'$NODEBASENAME', 'NodesCount':'$NODEINSTANCES'}"
curl -H "Content-Type: application/json" -d "$ENTITY" -X POST "https://$STORAGEACCOUNT.table.core.windows.net/nodesperhost?$SASKEY"
if [ $? -ne 0 ]; then
debug "$HOSTNAME FATAL: Unable to register the host into 'nodesperhost' table"
halt #sleep 9999 #exit -1
fi
NOW=`date`
debug "$HOSTNAME INFO: Terminated to fill the Table on $NOW"
# Generate proactive-node service description
cat > /etc/systemd/system/proactive-node.service <<EOL
[Unit]
After=sshd.service
[Service]
WorkingDirectory=/opt/proactive
ExecStart=/opt/proactive/java/bin/java -jar /opt/proactive/node.jar ${PROPERTIES} -v ${CREDVALUE} -w ${NODEINSTANCES} -r pamr://0/ -n ${NODEBASENAME} -s ${NODESOURCENAME}
User=hpcpeps
[Install]
WantedBy=default.target
EOL
chmod 664 /etc/systemd/system/proactive-node.service
# Install ProActive Node Service
systemctl daemon-reload
systemctl enable proactive-node.service
sysctl fs.inotify.max_user_watches=524288 # Support for large numnber of nodes
# Until here, if script fails, another host could reuse this nodeconfig message.
# Once the service is started, if something goes wrong, this node configuration will not be reusable from another host
curl -X DELETE "https://${STORAGEACCOUNT}.queue.core.windows.net/nodeconfig/messages/${msgId}?popreceipt=${popReceipt}&${SASKEY}"
if [ $? -ne 0 ]; then
debug "$HOSTNAME FATAL: Unable to delete nodeconfig from the queue"
halt #sleep 9999 #exit -1
fi
# Debug message posted on debug queue
IP=`hostname -i`
debug "Azure scaleset script $HOSTNAME INFO: Service is ready to start: $IP , $NODEBASENAME , $NODEINSTANCES"
# Let's start the service
systemctl start proactive-node.service
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment