Skip to content

Instantly share code, notes, and snippets.

@mattjj
Created September 11, 2012 15:01
Show Gist options
  • Save mattjj/3699457 to your computer and use it in GitHub Desktop.
Save mattjj/3699457 to your computer and use it in GitHub Desktop.
Starter/stopper script for iPython clusters
#!/bin/bash
usage()
{
echo "usage: ${0##*/} [--reuse]"
}
if [ $# -gt 1 -o "$1" = "--help" ]
then
usage
exit 0
fi
if [ -n "$1" ]
then
reuse="--reuse"
else
reuse=""
fi
##############################
# Parameters (CHECK THESE) #
##############################
# hostfile should look like
# hostname1 4
# hostname2 8
# ...
# hostnamen nengines
hostnamefile="~/.ipcluster-hostnames"
enginelocalscratch='/scratch/'
ipcontroller="ipcontroller" # path to local ipcontroller, or just "ipcontroller" to use env
ipcluster="ipcluster" # path to remote ipcluster, or just "ipcluster" to use remote env
######################
# Cluster Starting #
######################
hostnames=$(<$hostnamefile)
ip=$(ip address | grep 'inet[^6]' | sed -n '2p' | awk '{print $2;}' | sed 's/\/.*$//')
mylogfile=$(tempfile)
### start controller on this machine
nohup ${ipcontroller} --ip="${ip}" "${reuse}" --log-to-file=False 2>&1 >${mylogfile} &
controllerpid=$!
echo -e "\n\t*** started controller locally, listening on ip ${ip} ***\t\n"
sleep 2
### launch engines
while read hostname nprocs
do
if [ -z "${reuse}" ]
then
scp -q "~/.config/ipython/profile_${profile}/security/ipcontroller-engine.json" \
"${hostname}:~/.config/ipython/profile_${profile}/security/"
echo "copied security file to ${hostname}"
fi
ssh -fn "${hostname}" "nohup ${ipcluster} engines start --n=${nprocs} --log-to-file=False --work-dir=${enginelocalscratch} 2>&1 > /dev/null"
echo "engines started on ${hostname}"
done <<< ${hostnames}
######################
# Cluster Stopping #
######################
### set up exit strategy
ctrl_c()
{
echo -e "\n\t*** Caught SIGINT, killing babies... ***\t\n"
while read hostname nprocs
do
ssh ${hostname} "pkill -f -u ${USER} 'ipcluster'"
done <<< ${hostnames}
kill ${controllerpid}
echo -e "\n\t*** ALL DONE ***\t\n"
}
trap ctrl_c SIGINT
### follow the controller log
echo -e "\n\t*** joining ipcontroller logfile... ***\t\n"
tail -f ${mylogfile}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment