Created
September 11, 2012 15:01
-
-
Save mattjj/3699457 to your computer and use it in GitHub Desktop.
Starter/stopper script for iPython clusters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
usage() | |
{ | |
echo "usage: ${0##*/} [--reuse]" | |
} | |
if [ $# -gt 1 -o "$1" = "--help" ] | |
then | |
usage | |
exit 0 | |
fi | |
if [ -n "$1" ] | |
then | |
reuse="--reuse" | |
else | |
reuse="" | |
fi | |
############################## | |
# Parameters (CHECK THESE) # | |
############################## | |
# hostfile should look like | |
# hostname1 4 | |
# hostname2 8 | |
# ... | |
# hostnamen nengines | |
hostnamefile="~/.ipcluster-hostnames" | |
enginelocalscratch='/scratch/' | |
ipcontroller="ipcontroller" # path to local ipcontroller, or just "ipcontroller" to use env | |
ipcluster="ipcluster" # path to remote ipcluster, or just "ipcluster" to use remote env | |
###################### | |
# Cluster Starting # | |
###################### | |
hostnames=$(<$hostnamefile) | |
ip=$(ip address | grep 'inet[^6]' | sed -n '2p' | awk '{print $2;}' | sed 's/\/.*$//') | |
mylogfile=$(tempfile) | |
### start controller on this machine | |
nohup ${ipcontroller} --ip="${ip}" "${reuse}" --log-to-file=False 2>&1 >${mylogfile} & | |
controllerpid=$! | |
echo -e "\n\t*** started controller locally, listening on ip ${ip} ***\t\n" | |
sleep 2 | |
### launch engines | |
while read hostname nprocs | |
do | |
if [ -z "${reuse}" ] | |
then | |
scp -q "~/.config/ipython/profile_${profile}/security/ipcontroller-engine.json" \ | |
"${hostname}:~/.config/ipython/profile_${profile}/security/" | |
echo "copied security file to ${hostname}" | |
fi | |
ssh -fn "${hostname}" "nohup ${ipcluster} engines start --n=${nprocs} --log-to-file=False --work-dir=${enginelocalscratch} 2>&1 > /dev/null" | |
echo "engines started on ${hostname}" | |
done <<< ${hostnames} | |
###################### | |
# Cluster Stopping # | |
###################### | |
### set up exit strategy | |
ctrl_c() | |
{ | |
echo -e "\n\t*** Caught SIGINT, killing babies... ***\t\n" | |
while read hostname nprocs | |
do | |
ssh ${hostname} "pkill -f -u ${USER} 'ipcluster'" | |
done <<< ${hostnames} | |
kill ${controllerpid} | |
echo -e "\n\t*** ALL DONE ***\t\n" | |
} | |
trap ctrl_c SIGINT | |
### follow the controller log | |
echo -e "\n\t*** joining ipcontroller logfile... ***\t\n" | |
tail -f ${mylogfile} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment