It is hourly checking bash. checking and send email in issue cases.
- the connection of each EC2. Find host unreachable issue in one hour
- disk space usage. Get warn of disk space in one hour
- status of log rotation and log files. Monitor log rotation and rotated log files
- Update the variable
mail_receiver
of the bash filediskmonoitor
and create a cron job with it like
35 * * * * ~/diskmonoitor
- Add a line in
~/.profile
. Assume there is not~/.bash_profile
,~/.bash_login
.
source $HOME/aws/prepare-aws-keys.sh
The content of diskmonoitor
is
#!/bin/bash
# set -x
mail_receiver=bzu@gmail.com
export SSH_AUTH_SOCK=${HOME}/.ssh/ssh-agent.$HOSTNAME.sock
if [ $(ssh-add -l | sed '/^$/d' | wc -l) -lt 3 ]; then
echo -e "$(ssh-add -l) \nTry: ssh-add -l" | mail -s "==SSH key is not ready" ${mail_receiver}
exit 1
fi
command='echo && hostname && df -h && sort -k2 /var/lib/logrotate.status |grep mongo && ls -htl /log'
ec2_u=ec2-user
hop_u=ubuntu
ec2_checking() {
connect="ssh ${2}@${3} ssh -o StrictHostKeyChecking=no ${4}@${5} 2>/dev/null"
status=$(echo "$1" | ${connect})
if [ $? -ne 0 ]; then
echo -e "Find: $status.\n Try: echo \" $1 \" | ${connect}" | mail -s "Mongodb issue from ${6}" $mail_receiver
return
fi
warnings=$(echo 'df -h' | ${connect} | awk '0+$5 >= 80 {print}')
warn_size=$(echo $warnings | sed '/^$/d' | wc -l)
if [ $warn_size -ne 0 ]; then
echo -e "some disk usage is over 80%:\n${warnings} " | mail -s "mongodb issue from ${6}" ${mail_receiver}
return
fi
clock=$(date +%k)
if [[ $(date +%u) -eq 3 && $clock -ge 17 && $clock -lt 18 ]]; then
echo -e "${status} " | mail -s "mongodb status from ${6}" ${mail_receiver}
fi
}
e_ore_hop=34.213.181.50
e_ore_arbiter_2b=172.31.30.184
e_ore_2b=172.31.25.35
e_ore_2a=172.31.45.79
e_ore_pri_2a=172.31.2.95
# "product - extender - Oregon"
for host in ${e_ore_arbiter_2b} ${e_ore_2b} ${e_ore_2a} ${e_ore_pri_2a}; do
ec2_checking "$command" ${hop_u} ${e_ore_hop} ${ec2_u} ${host} "product - Oregon"
done
the content of prepare-aws-keys.sh
is
#!/bin/bash
sockfile=${HOME}/.ssh/ssh-agent.${HOSTNAME}.sock
ps -u${LOGNAME} | grep ssh-agent | grep -v grep | awk '{print $1}' | xargs kill &>/dev/null
if [ -e $sockfile ]; then /bin/rm $sockfile; fi
eval $(ssh-agent -a ~/.ssh/ssh-agent.${HOSTNAME}.sock) &>/dev/null
~/aws/response-to.sh ~/aws/load-pem.sh &>/dev/null
the content of response-to.sh
and load-pem.sh
are like
$ cat ~/aws/{response-to.sh,load-pem.sh}
#!/usr/bin/expect -f
spawn ssh-add /home/bzu/aws/dev/dev.pem
expect -exact "Enter passphrase for /home/bzu/aws/dev/dev.pem: "
send -- "PASSWORD\r"
expect -exact "\r
Identity added: /home/bzu/aws/dev/dev.pem (/home/bzu/aws/dev/dev.pem)\r"
spawn ssh-add /home/bzu/aws/product-A/devops@prod.A-enc.pem
expect -exact "Enter passphrase for /home/bzu/aws/product-A/devops@prod.A-enc.pem: "
send -- "PASSWORD\r"
expect -exact "\r
Identity added: /home/bzu/aws/product-A/devops@prod.A-enc.pem (/home/bzu/aws/product-A/devops@prod.A-enc.pem)\r"
spawn ssh-add /home/bzu/aws/product-B/devops@prod.B-enc.pem
expect -exact "Enter passphrase for /home/bzu/aws/product-B/devops@prod.B-enc.pem: "
send -- "PASSWORD\r"
expect -exact "\r
Identity added: /home/bzu/aws/product-B/devops@prod.B-enc.pem (/home/bzu/aws/B/devops@prod.B-enc.pem)\r"
spawn ssh-add /home/bzu/.ssh/id_rsa
expect -exact "\r
Identity added: /home/bzu/.ssh/id_rsa (/home/bzu/.ssh/id_rsa)\r"
#!/bin/bash
ssh-add ~/aws/dev/cloudmanager_dev.pem
ssh-add ~/aws/product-A/devops@prod.A-enc.pem
ssh-add ~/aws/product-B/devops@prod.B-enc.pem
ssh-add ~/.ssh/id_rsa
[ refer ] (http://hints.macworld.com/article.php?story=20081217161612647)
By default crond mail to job owner when the job failed (exit 1 or not 0 number) or there some output of the cron job.
by default, the mailto
is $USER@$HOSTNAME. it is wrong. Fixed by add a entry to /etc/aliases
, and then run command 'newaliases'. like
$ sudo vim /etc/aliases
$ sudo newaliases
$ cat /etc/aliases
bzu: bzu@gmail.com
$ echo test | mail -s "testing alias for cron email" bzu
mailx
can be used to check mail in failed cases.
related log rotation
$ cat /etc/logrotate.d/mongod /log/mongod.log { maxsize 400M rotate 7 missingok delaycompress dateext dateformat -%Y-%m-%d_%s ifempty postrotate /bin/kill -SIGUSR1 $(cat /data/mongod.lock) endscript }