Last active
June 21, 2019 16:07
-
-
Save fljdin/421ea23cff3e12848ba381e178acf2ed to your computer and use it in GitHub Desktop.
Perform health check on clusterware and associated databases
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env bash | |
# | |
# comment : perform health check on clusterware and associated databases | |
# deploy : chmod +x /usr/local/bin/check_cluster | |
export version="20170426.0930" | |
cif () { printf "[INFO]\t%s\n" "$1" ; } | |
ttl () { printf "\033[1;34m----\t%s\033[0m\n" "$1" ; } | |
cok () { printf "\033[1;32m[OK]\033[0m\t%s\n" "$1" ; } | |
cko () { printf "\033[1;31m[KO]\t%s\033[0m\n" "$1" ; } | |
cwa () { printf "\033[1;33m[WARN]\t%s\033[0m\n" "$1" ; } | |
log () { printf "\033[1;35m%s\033[0m\n" $(tail -n 5 $1) ; } | |
declare -a tips | |
add_tips () { tips=("${tips[@]}" "[$1]\t$2") ; } | |
show_tips() { | |
[[ ${#tips[@]} -eq 0 ]] && return | |
echo | |
ttl "Please apply these commands on following nodes one by one:" | |
for i in "${tips[@]}" ; do | |
printf "$i\n" | |
done | |
ttl "End of script" | |
echo | |
exit | |
} | |
# list log file destination | |
# usage: check_cluster -l [cluster,asm,listener,scan,db,all] | |
show_logs() { | |
TYPE=${1:all} | |
patterns="" | |
if [[ "$TYPE" == "cluster" || "$TYPE" == "all" ]] ; then | |
patterns="$HOME/log/*/alert*.log $HOME/log/*/crsd/crsd.log $HOME/log/*/ohasd/ohasd.log" | |
fi | |
if [[ "$TYPE" == "scan" || "$TYPE" == "all" ]] ; then | |
patterns="$patterns $HOME/log/diag/tnslsnr/*/*/trace/*.log" | |
fi | |
if [[ "$TYPE" == "asm" || "$TYPE" == "all" ]] ; then | |
patterns="$patterns $BASE/diag/asm/*/*/trace/alert_*.log" | |
fi | |
if [[ "$TYPE" == "listener" || "$TYPE" == "all" ]] ; then | |
patterns="$patterns $BASE/diag/tnslsnr/*/*/trace/listener.log" | |
fi | |
if [[ "$TYPE" == "db" || "$TYPE" == "all" ]] ; then | |
for db in $($SRVCTL config) ; do | |
. oraenv -s <<< $db > /dev/null | |
patterns="$patterns $ORACLE_BASE/diag/rdbms/*/$db*/trace/alert_$db*.log" | |
done | |
fi | |
for node in $NODES ; do | |
ttl "Log files on $node" | |
$SSH $node ls -1 "$patterns" | |
done | |
exit | |
} | |
show_help () { | |
echo "usage: check_cluster -l [cluster,asm,listener,scan,db,all]" | |
exit | |
} | |
# set variables environnement | |
HOME=$(grep [+]ASM /etc/oratab | cut -d: -f2) | |
BASE=$(grep ^ORACLE_BASE $HOME/crs/install/crsconfig_params | cut -d= -f2 | tr ',' ' ') | |
NODES=$(grep ^NODE_NAME_LIST $HOME/crs/install/crsconfig_params | cut -d= -f2 | tr ',' ' ') | |
lastup="" | |
# set global commands | |
CRSCTL="$HOME/bin/crsctl" | |
SRVCTL="$HOME/bin/srvctl" | |
AVAGENT="/usr/local/avamar/ora_rac/etc/avagent.d" | |
SSH="sudo -u grid ssh -o ConnectTimeout=1 -T" | |
if [[ "$1" == "-l" ]] ; then | |
show_logs $2 | |
fi | |
if [[ "$1" == "-h" ]] ; then | |
show_help | |
fi | |
# - Cluster | |
ttl "1. Check cluster availibility" | |
for node in $NODES ; do | |
result=$($SSH $node $CRSCTL check crs) | |
if [[ $? -eq 0 ]] ; then | |
if [[ "$result" =~ CRS-4535 ]] ; then | |
cko "Cluster is not ready on $node" | |
add_tips "root@anyserver" "$CRSCTL start cluster -n $node" | |
elif [[ "$result" =~ CRS-4639 ]] ; then | |
cko "Cluster is down on $node" | |
add_tips "root@$node" "$CRSCTL start crs # (and wait few minutes)" | |
else | |
cok "Cluster is ready on $node" | |
lastup=$node | |
fi | |
else | |
cko "Cluster is down on $node" | |
add_tips "$node" "Boot the server" | |
fi | |
done | |
if [[ "$lastup" == "" ]] ; then | |
show_tips | |
exit 1 | |
fi | |
function check_multiple_resources () { | |
result=$($SSH $lastup <<DATA | |
$CRSCTL status resource -w "((TYPE = $1) AND (TARGET_SERVER != ''))" | sed -e 's/^$/|/g' | tr '\n' ' ' | |
DATA | |
) | |
IFS='|' read -ra msg <<< "$result" | |
for i in "${msg[@]}" ; do | |
[[ "$i" == " " ]] && continue | |
name=$(echo $i | cut -d= -f2 | cut -d' ' -f1) | |
state=$(echo $i | rev | cut -d= -f1 | rev) | |
status=0 | |
if [[ ! "$state" =~ ONLINE ]] ; then | |
status=2 | |
elif [[ "$state" =~ OFFLINE ]] ; then | |
status=1 | |
else | |
cok "$name is $state" | |
continue | |
fi | |
nodeno=1 | |
for node in $NODES ; do | |
if [[ ! "$state" =~ "$node" ]] ; then | |
state=$(echo $state | sed -e "s/OFFLINE/OFFLINE on $node/$nodeno") | |
add_tips "root@anyserver" "$CRSCTL start resource $name -n $node" | |
nodeno=$(($nodeno+1)) | |
fi | |
done | |
if [[ $status -eq 1 ]] ; then | |
cwa "$name is $state" | |
[[ "$2" != "" ]] && cwa "=> $2" | |
elif [[ $status -eq 2 ]] ; then | |
cko "$name is $state" | |
[[ "$2" != "" ]] && cko "=> $2" | |
fi | |
done | |
} | |
function check_avagent () { | |
test -f $AVAGENT || return | |
rcode=0 | |
rmess="avagent.d" | |
for node in $NODES ; do | |
result=$($SSH $node $AVAGENT status) | |
rc=$? | |
if [[ $rc -eq 0 ]] ; then | |
rmess="$rmess is ONLINE on $node" | |
else | |
rmess="$rmess is OFFLINE on $node" | |
fi | |
rcode=$(( $rcode + ! $rc )) | |
done | |
if [[ $rcode -eq 1 ]] ; then | |
cok "$rmess" | |
elif [[ $rcode -eq 0 ]] ; then | |
cko "$rmess" | |
add_tips "root@$(echo $NODES | cut -d' ' -f1)" "$AVAGENT start" | |
elif [[ $rcode -ge 2 ]] ; then | |
cko "$rmess" | |
add_tips "root@others" "$AVAGENT stop" | |
fi | |
} | |
function check_asmlib () { | |
test -f /etc/init.d/oracleasm || return | |
rcode=0 | |
rmess="asmlib" | |
for node in $NODES ; do | |
result=$($SSH $node PATH=$PATH:/usr/sbin oracleasm status) | |
rc=$? | |
if [[ $rc -ne 0 ]] ; then | |
rmess="$rmess is ABSENT on $node" | |
rc=0 | |
elif [[ "$result" =~ "no" ]] ; then | |
rmess="$rmess is OFFLINE on $node" | |
add_tips "root@$node" "oracleasm configure -i # (with autostart)" | |
add_tips "root@$node" "/etc/init.d/oracleasm start" | |
rc=1 | |
else | |
rmess="$rmess is ONLINE on $node" | |
rc=0 | |
fi | |
rcode=$(( $rcode + ! $rc )) | |
done | |
if [[ $rcode -eq 2 ]] ; then | |
cok "$rmess" | |
else | |
cko "$rmess" | |
fi | |
} | |
echo # - ASM and Diskgroups | |
ttl "2. Check ASM and diskgroups availibility" | |
check_asmlib | |
check_multiple_resources "ora.asm.type" | |
check_multiple_resources "ora.diskgroup.type" "Please Contact dba team." | |
echo # - VIP and SCAN | |
ttl "3. Check VIP and SCAN addresses" | |
check_multiple_resources "ora.cluster_vip_net1.type" | |
check_multiple_resources "ora.scan_vip.type" | |
check_multiple_resources "ora.scan_listener.type" | |
echo # - Instances | |
ttl "4. Check instance availibility" | |
ttl "Everything must be OK - Warning means Degraded Mode" | |
check_multiple_resources "ora.listener.type" | |
check_multiple_resources "ora.database.type" | |
check_avagent | |
show_tips |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment