jab416171/monitorFreeSpace.sh

## monitorFreeSpace.sh
#!/bin/sh
#
# � 2010 Western Digital Technologies, Inc. All rights reserved.
#
# monitorVolume.sh
#  Note: this is called by cron
#
#
PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin

. /usr/local/sbin/share-param.sh
. /etc/nas/alert-param.sh
. /etc/system.conf
. /etc/wdcomp.d/wd-nas/wd-nas.conf 2> /dev/null

MAX_USAGE_THRESH=95
MIN_USAGE_THRESH=93

# check DataVolume percent used
percentUsed=`getDataVolumePercentUsed.sh`
echo "% used=${percentUsed} MAX=${MAX_USAGE_THRESH}"
if [ -f /tmp/tst_freespace ] || [ "${percentUsed}" -gt "${MAX_USAGE_THRESH}" ]; then
	if [ ! -f ${FREESPACE_STATUS_FILE} ]; then
		sendAlert.sh "${diskNearCapacity}"
	fi
	if [ ! -f ${FREESPACE_STATUS_FILE} ]; then
		touch ${FREESPACE_STATUS_FILE}
		incUpdateCount.pm system_state
	fi
else
	if [ "${percentUsed}" -le "${MIN_USAGE_THRESH}" ]; then
		if [ -f ${FREESPACE_STATUS_FILE} ]; then
			rm -f ${FREESPACE_STATUS_FILE}
			incUpdateCount.pm system_state
		fi
	fi
fi


## monitorio.sh
#!/bin/bash
#
# (c) 2013 Western Digital Technologies, Inc. All rights reserved.
#
# monitorio - Monitor disk activity, and put system into standby.  Also, monitor to trigger file tally process
##
PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin
. /lib/lsb/init-functions
source /etc/priority.conf
source /etc/system.conf
source /usr/local/sbin/drive_helper.sh
source /etc/wdcomp.d/wd-nas/wd-nas.conf 2> /dev/null
[ -f /usr/local/sbin/ledConfig.sh ] && . /usr/local/sbin/ledConfig.sh

MIN_SINCE_DISK_ACCESS=/tmp/minutes_since_disk_access
TALLY_PIDFILE=/var/run/tally.pid
TALLY_DAEMON=/usr/local/bin/tally
TALLY_PIPE=/var/local/nas_file_tally/tallyd.pipe
MEDIACRAWLER_REWALK=/tmp/mediacrawler_rewalk

# Only give monitorio 20% of the CPU Max
MONITORIO_CPU_SHARE=20
CGROUP_MONITORIO=/sys/fs/cgroup/monitorio
mkdir -p $CGROUP_MONITORIO
echo $MONITORIO_CPU_SHARE > $CGROUP_MONITORIO/cpu.shares
echo $$ > $CGROUP_MONITORIO/tasks

total_df_file=$WD_NAS_VAR_DIR/total_df

# trigger tally (or share size) when df result changes by TALLY_TRIGGER_THRESH_KB
TALLY_TRIGGER_THRESH_KB=1000000

file_tally() {
        if [ ! -p $TALLY_PIPE ]; then
                mkfifo $TALLY_PIPE
        fi
        start-stop-daemon --start --quiet --oknodo --nicelevel $monitorio_nice --pidfile $TALLY_PIDFILE --make-pidfile --background --exec $TALLY_DAEMON --
        ls -s1NRA --block-size=1 /shares | awk '
        {
                if ($1 ~ /^[0-9]+$/) {
#                       printf("#4:%s:%s/%s\0\0\0\0",$1,current_dir,substr($0,index($0,$2)));
                        printf("#4:%s:%s/%s~~~~",$1,current_dir,substr($0,index($0,$2)));
                }
                else {
                        if ($1 != "total") {
                                current_dir = (substr($0,1,length($0)-1));
                        }
                }
        }
        END {
                printf("#0:0:/tmp/TALLYEND.DONE~~~~");
        }
        ' > $TALLY_PIPE
#       ' > /var/local/nas_file_tally/tallyd.txt
#       cat /var/local/nas_file_tally/tallyd.txt > $TALLY_PIPE
}

wait_system_ready() {
    while [ ! -f "/tmp/ready" ]; do
        logger -s "$0: waiting for system to become ready.."
        sleep 5
    done
}

tmp_share_size=/tmp/share_size
tmp_internal_share_size=/tmp/internal_share_size
tmp_external_share_size=/tmp/external_share_size
calculate_share_size() {
        find /shares -maxdepth 1 -mindepth 1 -type d -not -name ".*" -print0 | xargs -0 -I {} getShareSize.sh {} > ${tmp_internal_share_size}
		cat $tmp_internal_share_size $tmp_external_share_size  > ${tmp_share_size}
}

calculate_external_share_size() {
        find /shares -maxdepth 1 -mindepth 1 -type l -print0 | xargs -0 -I {} getShareSize.sh {} > ${tmp_external_share_size}
		cat $tmp_internal_share_size $tmp_external_share_size  > ${tmp_share_size}
}

checkDataTrigger() {
	result="trigger"
	if [ -f ${total_df_file} ]; then
		total_df=`cat ${total_df_file}`
		result=`df | grep /DataVolume | awk -v total_df=${total_df} -v thresh=${TALLY_TRIGGER_THRESH_KB} '{x=$3 - total_df; abs_x=(x >= 0) ? x : -x; if(abs_x >= thresh) printf("trigger")}'`
	fi
	if [ "$result" == "trigger" ]; then
		df | grep /DataVolume | awk '{print $3}' > ${total_df_file}
	fi
	echo $result
}

mkdir -p `dirname ${SHARE_SIZE_CACHE}`
declare -i sleepcount
declare -i rootdisk_thresh
declare -i enterStandbyTime=0
rm -f /tmp/standby
rm -f ${MEDIACRAWLER_REWALK}
source /etc/standby.conf

resetSleepCount() {
	sleepcount=0

	# if in emergency run level, set standby threshold to 1 minute, since drive should go into standby as early as possible, otherwise, read config file
	if [ "`getRunLevel.pl`" == "emergency" ]; then
		standby_time=1
		rootdisk_thresh=1
		standby_enable="enabled"
	else
		source /etc/standby.conf
		rootdisk_thresh=`expr $standby_time - 1`
	fi
}

currentRootDevice=`cat /proc/cmdline | awk -F= 'BEGIN{RS=" "}{ if ($1=="root") print $2 }'`
rootDisk=`basename ${currentRootDevice}`
dataVolumeDisk=`basename ${dataVolumeDevice}`
drivelist=(`internalDrives`)

echo "0" > ${MIN_SINCE_DISK_ACCESS}

# wait for system to become ready
wait_system_ready

# run file tally at startup (in the background)
if [ ! -f $TALLY_DAEMON ]; then
	logger "Tally daemon not installed, exiting tally function"

	## if tally not present, then call calculate_share_size
	calculate_share_size
	calculate_external_share_size
	rm ${SHARE_SIZE_CACHE}
	ln -s ${tmp_share_size} ${SHARE_SIZE_CACHE}
else
	file_tally &
fi

if [ "$1" == "debug" ]; then
        echo "1" > /proc/sys/vm/block_dump
        dmesg -c > /dev/null
fi

while :; do

    for i in ${drivelist[@]}; do
            hdparm -C $i | grep -q "standby"
            standby_test=$?
            [ "$standby_test" -eq "1" ] && break
    done

    if [ "$standby_test" -eq "0" ]; then
        sleep 5
        continue
    else
        if [ -f /tmp/standby ]; then
	    standby_since=`stat --format %z /tmp/standby`
            rm -f /tmp/standby
            # Cancel blue color and turn on green if applicable
            ledCtrl.sh LED_EV_DISK_STBY LED_STAT_OK
            ### This will allow individual components to register for wakupevents
            run-parts /etc/nas/wakeup.d
            ###
            touch ${MEDIACRAWLER_REWALK}
            currentTime=`date +%s`
            timeInStandby=`expr $currentTime - $enterStandbyTime`
            echo "exit standby after $timeInStandby (since $standby_since)"
            logger "exit standby after $timeInStandby (since $standby_since)"
            if [ "$1" == "debug" ]; then
                    dmesg -c
            fi
        fi

		resetSleepCount

        echo $sleepcount > ${MIN_SINCE_DISK_ACCESS}
        trigger_tally=0
        iow_root=`awk -v disk="${rootDisk}" '{if ($3==disk) print $10}' /proc/diskstats`
        ior_datavol=`awk -v disk="${dataVolumeDisk}" '{if ($3==disk) print $6}' /proc/diskstats`
        iow_datavol=`awk -v disk="${dataVolumeDisk}" '{if ($3==disk) print $10}' /proc/diskstats`
        if [ "$1" == "debug" ]; then
                echo "Init          ior_datavol=$ior_datavol ior_datavol2=$ior_datavol2"
                echo "              iow_datavol=$iow_datavol iow_datavol2=$iow_datavol2"
                echo "              iow_root=$iow_root       iow_root2=$iow_root2"
                dmesg -c
        fi

        while :; do
            # Wait for 60 seconds
            sleep 60
            iow_root2=`awk -v disk="${rootDisk}" '{if ($3==disk) print $10}' /proc/diskstats`
            ior_datavol2=`awk -v disk="${dataVolumeDisk}" '{if ($3==disk) print $6}' /proc/diskstats`
            iow_datavol2=`awk -v disk="${dataVolumeDisk}" '{if ($3==disk) print $10}' /proc/diskstats`

            # check for file tally sync
            if [ "$iow_datavol" -ne "$iow_datavol2" ] && [ "`checkDataTrigger`" == "trigger" ]; then
				incUpdateCount.pm data_volume_write
				monitorFreeSpace.sh

				if [ -f $TALLY_DAEMON ]; then
					# also run tally if installed
					pidofproc -p $TALLY_PIDFILE $TALLY_DAEMON >/dev/null

					if [ $? -ne 0 ]; then
							file_tally
					fi
					createBackupTally.sh

				else
					## if tally not present, then call calculate_share_size
					calculate_share_size
				fi
            fi

			# calculate size of external shares.  Note that this must be done outside of "checkDataTrigger" so that it is done more often.
			calculate_external_share_size

            # use data volume writes until near sleep threshold, then check all disk writes
            old_sleepcount=sleepcount
            if [ $((sleepcount)) -eq $((rootdisk_thresh)) ] && [ "$iow_root" -eq "$iow_root2" ]; then
                sleepcount=$((sleepcount+1))
            elif  [ $((sleepcount)) -lt $((rootdisk_thresh)) ] && [ "$ior_datavol" -eq "$ior_datavol2" ] && [ "$iow_datavol" -eq "$iow_datavol2" ]; then
                sleepcount=$((sleepcount+1))
            else
                resetSleepCount
            fi
            echo $sleepcount > ${MIN_SINCE_DISK_ACCESS}
            if [ "$1" == "debug" ]; then

                [ "$sleepcount" != "0" ] &&  echo "sleepcount: $sleepcount"
                [ "$sleepcount" == "0" ] && echo "Disk activity:"
                echo "... ior_datavol=$ior_datavol      ior_datavol2=$ior_datavol2"
                echo "... iow_datavol=$iow_datavol      iow_datavol2=$iow_datavol2"
                echo "... iow_root=$iow_root    iow_root2=$iow_root2"
                # dmesg -c
            fi
            ior_datavol=$ior_datavol2
            iow_datavol=$iow_datavol2
            iow_root=$iow_root2

            smartTestStatus=`getSmartTestStatus.sh | awk '{print $1}'`
            if [ "$standby_enable" == "enabled" ] && [ "$sleepcount" -eq "$standby_time" ] && [ "$smartTestStatus" != "inprogress" ]; then
                touch /tmp/standby
                enterStandbyTime=`date +%s`
                echo "Enter standby"
                if [ "$1" == "debug" ]; then
                        echo "`date`: Enter standby "
                        dmesg -c > /dev/null
                fi
                for i in ${drivelist[@]}; do
                        hdparm -y $i >/dev/null
                done

                # turn on solid blue if applicable
                ledCtrl.sh LED_EV_DISK_STBY LED_STAT_IN_PROG
                sleep 5
                break
            fi
        done
    fi
done

## monitorSmartStatus.sh
#!/bin/sh
#
# � 2010 Western Digital Technologies, Inc. All rights reserved.
#
# monitorSmartStatus.sh
#  Note: this is called by cron
#
#
PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin

. /etc/system.conf
. /usr/local/sbin/share-param.sh
. /etc/nas/alert-param.sh
. /usr/local/sbin/drive_helper.sh
[ -f /usr/local/sbin/ledConfig.sh ] && . /usr/local/sbin/ledConfig.sh

# exit if in standby
if [ -f /tmp/standby ]; then
	exit 0;
fi

# exit if system with no internal drives
if [ "${DVC_DRIVE_COUNT}" == "0" ]; then
	exit 0
fi

atLeastOneDriveFailed=FALSE

driveList=( `internalDrives` )
for drive in "${driveList[@]}"
do
    smartctl -d ata -H ${drive} | grep -q PASSED
    if [ $? -ne 0 ]; then
        atLeastOneDriveFailed=TRUE
    fi
done

if [ "$atLeastOneDriveFailed" = "TRUE" ] || [ -f /tmp/tst_smart ]; then
	if [ ! -f /tmp/smart_fail ]; then
		sendAlert.sh "${driveSmartFail}"
		ledCtrl.sh LED_EV_DISK_SMART LED_STAT_ERR
		incUpdateCount.pm system_state
	fi
	touch /tmp/smart_fail
else
	rm -f /tmp/smart_fail
fi

## monitorTemperature.sh
#!/bin/sh
#
# (c) 2012 Western Digital Technologies, Inc. All rights reserved.
#
# monitorTemperature.sh
# Note: This is called by init-script monitorTemperature
#
# This script is responsible to monitor temperature of internal drives
# and take actions if temperture is not normal
#
# It takes following actions depending on temperature of internal drives
# if temperature of any drive > TF
#       - change led to RED
#       - send shutdown alert
#       - change run-level to emergency
#       - exit
#
# if temperature of any drive between T2 & TF
#       - send shutdown-warning alert
#       - start a shutdown-warning timer of 1 HR
#       - if timer expires change run-level to emergency
#       - exit
#
# if temperature of any drive between T1 & T2
#       - send high-temperature warning alert
#       - exit
#
# To restart all services & get back to normal
#   if temperature of all drives <= T2 - Hysterisis
#       - send normal temperature alert
#       - change led to GREEN
#       - change run-level to application
#       - exit
#

## --- Includes
PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin

source /usr/local/sbin/share-param.sh
source /etc/system.conf
source /etc/nas/alert-param.sh         # ( for alerts )
source /usr/local/sbin/drive_helper.sh # ( for internalDrives() )
source /usr/local/sbin/wdStatus.sh     # ( for $WDST_XXX status codes )
source /etc/wdcomp.d/wd-nas/temperature-monitor.conf
[ -f /usr/local/sbin/ledConfig.sh ] && . /usr/local/sbin/ledConfig.sh

## --- Constants


## Acronyms
NM=${STATE_NORMAL}
WR=${STATE_WARNING}
SW=${STATE_SHUTDOWN_WARNING}
SI=${STATE_SHUTDOWN_IMMEDIATE}
UK=${STATE_UNKNOWN}

## Internal Constants
## NB: TURN OFF BEFORE CHECKING-IN
DEBUG=0 ## for debugging

## For Testing
##      - set TEST=1
##      - set DEBUG=1
##      - disable the infinite "for MONITOR_TIMER" loop
##      - enter different temperatures on input
##      - to test with infinite "for MONITOR_TIMER" loop set different values for TEMP_TX
TEST=0

## Logger facility
FAC=local2

## Table of allowed actions based on last & curr state
## last | curr --       NM            WR               SW                SI              UK
##   |
eval ACTION${NM}="( act_noop      act_warning      act_start_timer   act_emergency   act_noop        )"
eval ACTION${WR}="( act_normal    act_noop         act_start_timer   act_emergency   act_noop        )"
eval ACTION${SW}="( act_normal    act_hysterisis   act_check_timer   act_emergency   act_check_timer )"
eval ACTION${SI}="( act_restart   act_hysterisis   act_cooldown      act_cooldown    act_cooldown    )"
eval ACTION${UK}="( act_UK_2_NM   act_warning      act_start_timer   act_emergency   act_noop        )" ## should never be called as UK state is never saved


## --- Global Variables

drive_list=
last_state=
curr_state=
curr_temp=


## --- Functions

## Get the drive temperature
##
## Input:
## drive device (e.g. sda, sdb)
##
## Output:
## on success - drive temperature
## on failure - ""(empty)
##
## E.g. getDriveTemperature "/dev/sda"
getDriveTemperature()
{
    local drive
    local temp

    ## pass arguments
    drive=${1}

    ## get the drive temperature using smart
    temp=`smartctl -d ata -A "${drive}" | \
          awk '{if ($2 == "Temperature_Celsius") print $10}'`
    echo "${temp}" > "${SMART_STATE}"

    ## return not found if number is not returned
    if ! [[ "${temp}" =~ ^[0-9]+$ ]]; then
        logger -p ${FAC}.err "$0: Non-numeric drive temperature \"${temp}\" obtained"
        return ${WDST_NOTFOUND}
    fi

    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Drive ${drive} temperature is ${temp}"
    fi

    ## output the temperature
    echo "${temp}"
    return ${WDST_OK}
}


## Get the current temperature state
##
## Input:
## drive list (e.g. /dev/sda /dev/sdb)
##
## Output:
## on success - temperature state (e.g. NM, WR, SW, SI)
## on failure - ""(empty)
##
## E.g. determineCurrentState "/dev/sda /dev/sdb /dev/sdc"
determineCurrentState()
{
    local drive
    local temp
    local drive_temp
    local drive_state
    local prev_state
    local transition

    ## allowed temperature states transitions across all drives
    ## NB: "TRANSITION" is treated as a 2-D array
    ##  prev | next ---       NM    WR    SW    SI    UK
    ##    |
    eval TRANSITION${NM}="( ${NM} ${WR} ${SW} ${SI} ${UK} )"
    eval TRANSITION${WR}="( ${WR} ${WR} ${SW} ${SI} ${WR} )"
    eval TRANSITION${SW}="( ${SW} ${SW} ${SW} ${SI} ${SW} )"
    eval TRANSITION${SI}="( ${SI} ${SI} ${SI} ${SI} ${SI} )"
    eval TRANSITION${UK}="( ${UK} ${WR} ${SW} ${SI} ${UK} )"

    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Getting current temperature state"
    fi

    ## init
    drive_state=${NM}
    drive_temp=0

    ## loop through the drive list & finalize temperature state using
    ## TRANSITION table
    for drive in ${drive_list[@]}
    do
        ## save prev state & temp
        prev_state=${drive_state}

        ## get the current drive temperature
        temp=`getDriveTemperature "${drive}"`

        ## TEST ONLY
        if [ ${TEST} -ne 0 ]; then
            echo -n "Enter temperature: "
            read temp
            logger -p ${FAC}.debug "$0: INPUT temperature is ${temp}"
        fi

        ## reset temperature to 0 if not defined
        if [ $? -ne ${WDST_OK} ] || [ -z "${temp}" ]; then
            temp=0
        fi

        ## NB: ${temp} is integer value

        ## determine the temperature state of this drive
        if [ ${temp} -eq 0 ]; then
            state=${UK}
        elif [ ${temp} -le ${TEMP_T1} ]; then
            state=${NM}
        elif [ ${temp} -gt ${TEMP_T1} ] && [ ${temp} -le ${TEMP_T2} ]; then
            state=${WR}
        elif [ ${temp} -gt ${TEMP_T2} ] && [ ${temp} -le ${TEMP_TF} ]; then
            state=${SW}
        elif [ ${temp} -gt ${TEMP_TF} ]; then
            state=${SI}
        fi

        ## get the actual drive state using the TRANSITION table
        transition=TRANSITION${prev_state}[${state}]
        drive_state=${!transition}

        ## update drive temperature if state changes or temperature increases
        if [ ${drive_temp} -eq 0 ] || [ ${drive_state} -ne ${prev_state} ] || [ ${drive_temp} -lt ${temp} ]; then
			drive_temp=${temp}
        fi

        ## optimization: break the loop if current state is SI (shutdown immediate)
        if [ ${drive_state} -eq ${SI} ]; then break; fi
    done

    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Current Temperature - ${drive_temp}, Current State - ${drive_state}"
    fi

    ## pass to global variables
    curr_state=${drive_state}
    curr_temp=${drive_temp}

    return ${WDST_OK}
}


## --- Action Handlers

act_emergency()
{
    ## NB: curr_state is always ${SI} in this action

    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Shutdown, Temperature - ${curr_temp}"
    fi

    ## update the temp state file
    echo "${SI}" > "${TEMP_STATE}"

    ## set over temperature state
    touch "${OVER_TEMP_FLAG}"

    ## change led color to red
    ledCtrl.sh LED_EV_THERMO LED_STAT_ERR

    ## send over-temperature with immediate shutdown alert
    sendAlert.sh "${thermalShutdownImmediate}"

    ## stop the timer
    echo 0 > "${TEMP_SHUTDOWN_TIMER}"

    ## notify system for thermal state-change
    incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

    ## log emergency
    logger -p ${FAC}.emerg "$0: Current temperature(${curr_temp}) is over max-threshold, stopping all services"

    ## change run-level to emergency
    changeRunLevel.pl --level=emergency

    return ${WDST_OK}
}

act_restart()
{
    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Restart, Temperature - ${curr_temp}"
    fi

    ## reset over-temperature state
    rm -f "${OVER_TEMP_FLAG}"

    ## change led color to green only if system health is good
    ledCtrl.sh LED_EV_THERMO LED_STAT_OK

    ## send normal temperature alert
    sendAlert.sh "${temperatureNormal}"

    ## update the temp state file
    echo "${NM}" > "${TEMP_STATE}"

    ## notify system for thermal state-change
    incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

    ## log notice
    logger -p ${FAC}.notice "$0: Temperature of all drives(${curr_temp}) is now normal, restarting all services"

    ## change run-level to application mode
    changeRunLevel.pl --level=app

    return ${WDST_OK}
}

act_cooldown()
{
    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Cooldown, Temperature - ${curr_temp}"
    fi

    ## log notice
    logger -p ${FAC}.notice "$0: Current temperature(${curr_temp}) is still hot, maintaining shutdown"

    return ${WDST_OK}
}

act_noop()
{
    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action noop, Temperature - ${curr_temp}"
    fi

    ## no action taken

    return ${WDST_OK}
}

act_start_timer()
{
    ## NB: curr_state is always ${SW} in this action

    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Shutdown-Warning, Starting Timer, Temperature - ${curr_temp}"
    fi

    ## update the temp state file
    echo "${SW}" > "${TEMP_STATE}"

    ## change led color blinking yellow
    ledCtrl.sh LED_EV_THERMO LED_STAT_WARN

    ## send over-temperature with pending shutdown alert
    sendAlert.sh "${thermalShutdownPending}"

    ## start the shutdown timer
    date -u +%s > "${TEMP_SHUTDOWN_TIMER}"

    ## notify system for thermal state-change
    incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

    ## log critical
    logger -p ${FAC}.crit "$0: Over-Temperature condition(${curr_temp}), Shutdown-Warning, Timer started"

    return ${WDST_OK}
}

act_check_timer()
{
    ## NB: curr_state is always ${SW} in this action

    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Shutdown-Warning, Checking Timer, Temperature - ${curr_temp}"
    fi

    ## get start timer, should never be 0
    start_time=( `cat "${TEMP_SHUTDOWN_TIMER}"` )
    if [ ${start_time} -eq 0 ]; then return ${WDST_FAILED}; fi

    ## get current time
    curr_time=( `date -u +%s` )

    ## no action if timer has not expired
    ## NB: Temperature state shall remain SW
    if [ $(( ${curr_time} - ${start_time} )) -le ${MAX_SW_TIME} ]; then
        ## debug
        if [ ${DEBUG} -ne 0 ]; then
            logger -p ${FAC}.debug "$0: Timer has not expired"
        fi

        return ${WDST_OK}
    fi

    ## --- Timer has expired

    ## log critical
    logger -p ${FAC}.notice "$0: Over-Temperature condition(${curr_temp}), Timer expired"

    ## stop the timer
    echo 0 > "${TEMP_SHUTDOWN_TIMER}"

    ## reset disk-smart led event
    ledCtrl.sh LED_EV_THERMO LED_STAT_OK

    ## modify state to SI
    curr_state=${SI}

    ## initiate last_state->SI action

    ## execute the action based on last & current state
    action_hdlr=ACTION${last_state}[${curr_state}]
    ${!action_hdlr}

    return $?
}

act_warning()
{
    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Normal -> Warning, Temperature - ${curr_temp}"
    fi

    ## update the temp state file
    echo "${WR}" > "${TEMP_STATE}"

    ## send high-temperature warning
    sendAlert.sh "${systemTemperatureHigh}"

    ## notify system for thermal state-change
    incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

    ## log
    logger -p ${FAC}.warning "$0: High-Temperature(${curr_temp}) condition detected"

    return ${WDST_OK}
}

act_normal()
{
    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Shutdown-Warning -> Normal, Temperature - ${curr_temp}"
    fi

    ## stop the timer
    echo 0 > "${TEMP_SHUTDOWN_TIMER}"

    ## reset disk-smart led event
    ledCtrl.sh LED_EV_THERMO LED_STAT_OK

    ## send normal temperature alert
    sendAlert.sh "${temperatureNormal}"

    ## update the temp state file
    ## NB: Update state "after" all actions when switching to normal are completed
    echo "${NM}" > "${TEMP_STATE}"

    ## notify system for thermal state-change
    incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

    ## log notice
    logger -p ${FAC}.notice "$0: Temperature of all drives(${curr_temp}) is now normal"

    return ${WDST_OK}
}

act_hysterisis()
{
    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Hysterisis, Temperature - ${curr_temp}"
    fi

    ## remain in last state if within hysterisis; otherwise update current state to normal
    if [ ${curr_temp} -gt $(( ${TEMP_T2} - ${HYSTERISIS} )) ]; then
        curr_state=${last_state}
    else
        curr_state=${NM}
    fi

    ## execute the action based on last & current state
    action_hdlr=ACTION${last_state}[${curr_state}]
    ${!action_hdlr}

    return $?
}

act_UK_2_NM()
{
    ## debug
    if [ ${DEBUG} -ne 0 ]; then
        logger -p ${FAC}.debug "$0: Action Unknown -> Normal, Temperature - ${curr_temp}"
    fi

    ## NB: No need to notify system state-change as last state was unknown

    ## update the temp state file
    echo "${NM}" > "${TEMP_STATE}"

    return ${WDST_OK}
}


## --- Main script
{
    ## exit if system has no internal drives
    if [ "${DVC_DRIVE_COUNT}" == "0" ]; then
        exit 0
    fi

    ## get list of drives
    drive_list=( `internalDrives` )

    ## exit if no drives are found
    if [ -z "${drive_list}" ]; then
        exit 0
    fi

    logger -p ${FAC}.info "$0: Starting Temperature Monitor"

    ## init temp state to normal if not over temperature
    if [ ! -f "${TEMP_STATE}" ]; then
        echo "${NM}" > "${TEMP_STATE}"
        if [ -f "${OVER_TEMP_FLAG}" ]; then
            echo "${SI}" > "${TEMP_STATE}"
        fi
    fi

    ## init shutdown timer if not in shutdown-warning state
    last_state=( `cat ${TEMP_STATE}` )
    if [ ${last_state} -ne ${SW} ]; then
        echo 0 > "${TEMP_SHUTDOWN_TIMER}"
    fi

    ## loop every MONITOR_TIMER seconds
    ## NB: disable loop for if TEST=1
    for (( ; ; `sleep ${MONITOR_TIMER}`)); do
        ## debug
        if [ ${DEBUG} -ne 0 ]; then
            logger -p ${FAC}.debug "$0: Starting Temperature Monitor Run"
        fi

        ## get the last saved temperature state
        ## NB: This state was saved in an earlier run of this script
        last_state=( `cat ${TEMP_STATE}` )
        if [ $? -ne ${WDST_OK} ] || [ -z "${last_state}" ] || ! [[ "${last_state}" =~ ^[0-9]+$ ]] || [ ${last_state} -ge ${N_STATES} ]; then
            last_state=${NM}

            ## initialize the last state file
            echo "${NM}" > "${TEMP_STATE}"
        fi

        ## skip run if in standby & last state is normal
        if [ -f "${STANDBY_STATE}" ] && [ ${last_state} -eq ${NM} ]; then
            ## debug
            if [ ${DEBUG} -ne 0 ]; then
                logger -p ${FAC}.debug "$0: Skipping run as system is in standby"
            fi

            continue
        fi

        ## determine the current temperature state
        ## NB: This function shall set $curr_state & $curr_temp global vars
        determineCurrentState
        if [ $? -ne ${WDST_OK} ] || [ -z "${curr_state}" ]; then
            ## debug
            if [ ${DEBUG} -ne 0 ]; then
                logger -p ${FAC}.debug "$0: Failed to determine current state; continuing"
            fi

            continue
        fi

        ## execute the action based on last & current state
        ## NB: Cannot execute ${array${last_state}[${curr_state}]} directly
        ##     It must be saved in to a variable x & executed using ${!x}
        action_hdlr=ACTION${last_state}[${curr_state}]

        ## debug
        if [ ${DEBUG} -ne 0 ]; then
            logger -p ${FAC}.debug "$0: Last state=${last_state}; action=`echo ${!action_hdlr}`"
        fi

        ${!action_hdlr}

        if [ $? -ne ${WDST_OK} ]; then
            logger -p ${FAC}.err "$0: Failed to execute the action `echo ${!action_hdlr}`; last state=${last_state}; curr temp=${curr_temp}"
            continue
        fi

        ## debug
        if [ ${DEBUG} -ne 0 ]; then
            logger -p ${FAC}.debug "$0: Finished Temperature Monitor Run"
        fi

        ## break if TEST
        if [ ${TEST} -ne 0 ]; then
            logger -p ${FAC}.debug "$0: Finished Test Run"
            break
        fi
    done ## for MONITOR_TIMER infinite loop

    logger -p ${FAC}.info "$0: Finished Temperature Monitor"

    exit 0
}
## --- End of Main script

## monitorVolume.sh
#!/bin/sh
#
# � 2010 Western Digital Technologies, Inc. All rights reserved.
#
# monitorVolume.sh
#  Note: this is called by cron
#
#
PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin

. /usr/local/sbin/share-param.sh
. /etc/nas/alert-param.sh
. /etc/system.conf
. /etc/wdcomp.d/wd-nas/wd-nas.conf 2> /dev/null
[ -f /usr/local/sbin/ledConfig.sh ] && . /usr/local/sbin/ledConfig.sh

lockFile="/tmp/monitorVolume"

# exit if in standby, or factory restore in progress
if [ -f /tmp/standby ] || [ -f ${reformatDataVolume} ]; then
	exit 0;
fi
# exit if system with no internal drives
if [ "${DVC_DRIVE_COUNT}" == "0" ]; then
	exit 0
fi
# exit if already another instance of script is in progress
lockfile-create --retry 0 "${lockFile}" >/dev/null 2>&1
if [ $? -ne 0 ]; then
    exit 0
fi

# If script were to take longer than 5 minutes
lockfile-touch ${lockFile} &
pid="$!"

df | grep -q ${dataVolumeDevice}
if [ $? -ne 0 ] || [ -f /tmp/tst_volume ]; then
	if [ ! -f /tmp/volume_failed ]; then
		sendAlert.sh "${volumeFailure}"
		incUpdateCount.pm system_state
	fi
    ledCtrl.sh LED_EV_VOLUME LED_STAT_ERR
	touch /tmp/volume_failed

    # clean up mutual exclusion
    kill "${pid}" >/dev/null 2>&1
    lockfile-remove ${lockFile} >/dev/null 2>&1

	exit 0
else
	rm -f /tmp/volume_failed
fi


# clean up mutual exclusion
kill "${pid}" >/dev/null 2>&1
lockfile-remove ${lockFile} >/dev/null 2>&1
	#!/bin/sh
	#
	# � 2010 Western Digital Technologies, Inc. All rights reserved.
	#
	# monitorVolume.sh
	# Note: this is called by cron
	#
	#
	PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin

	. /usr/local/sbin/share-param.sh
	. /etc/nas/alert-param.sh
	. /etc/system.conf
	. /etc/wdcomp.d/wd-nas/wd-nas.conf 2> /dev/null

	MAX_USAGE_THRESH=95
	MIN_USAGE_THRESH=93

	# check DataVolume percent used
	percentUsed=`getDataVolumePercentUsed.sh`
	echo "% used=${percentUsed} MAX=${MAX_USAGE_THRESH}"
	if [ -f /tmp/tst_freespace ] \|\| [ "${percentUsed}" -gt "${MAX_USAGE_THRESH}" ]; then
	if [ ! -f ${FREESPACE_STATUS_FILE} ]; then
	sendAlert.sh "${diskNearCapacity}"
	fi
	if [ ! -f ${FREESPACE_STATUS_FILE} ]; then
	touch ${FREESPACE_STATUS_FILE}
	incUpdateCount.pm system_state
	fi
	else
	if [ "${percentUsed}" -le "${MIN_USAGE_THRESH}" ]; then
	if [ -f ${FREESPACE_STATUS_FILE} ]; then
	rm -f ${FREESPACE_STATUS_FILE}
	incUpdateCount.pm system_state
	fi
	fi
	fi
	#!/bin/bash
	#
	# (c) 2013 Western Digital Technologies, Inc. All rights reserved.
	#
	# monitorio - Monitor disk activity, and put system into standby. Also, monitor to trigger file tally process
	##
	PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin
	. /lib/lsb/init-functions
	source /etc/priority.conf
	source /etc/system.conf
	source /usr/local/sbin/drive_helper.sh
	source /etc/wdcomp.d/wd-nas/wd-nas.conf 2> /dev/null
	[ -f /usr/local/sbin/ledConfig.sh ] && . /usr/local/sbin/ledConfig.sh

	MIN_SINCE_DISK_ACCESS=/tmp/minutes_since_disk_access
	TALLY_PIDFILE=/var/run/tally.pid
	TALLY_DAEMON=/usr/local/bin/tally
	TALLY_PIPE=/var/local/nas_file_tally/tallyd.pipe
	MEDIACRAWLER_REWALK=/tmp/mediacrawler_rewalk

	# Only give monitorio 20% of the CPU Max
	MONITORIO_CPU_SHARE=20
	CGROUP_MONITORIO=/sys/fs/cgroup/monitorio
	mkdir -p $CGROUP_MONITORIO
	echo $MONITORIO_CPU_SHARE > $CGROUP_MONITORIO/cpu.shares
	echo $$ > $CGROUP_MONITORIO/tasks

	total_df_file=$WD_NAS_VAR_DIR/total_df

	# trigger tally (or share size) when df result changes by TALLY_TRIGGER_THRESH_KB
	TALLY_TRIGGER_THRESH_KB=1000000

	file_tally() {
	if [ ! -p $TALLY_PIPE ]; then
	mkfifo $TALLY_PIPE
	fi
	start-stop-daemon --start --quiet --oknodo --nicelevel $monitorio_nice --pidfile $TALLY_PIDFILE --make-pidfile --background --exec $TALLY_DAEMON --
	ls -s1NRA --block-size=1 /shares \| awk '
	{
	if ($1 ~ /^[0-9]+$/) {
	# printf("#4:%s:%s/%s\0\0\0\0",$1,current_dir,substr($0,index($0,$2)));
	printf("#4:%s:%s/%s~~~~",$1,current_dir,substr($0,index($0,$2)));
	}
	else {
	if ($1 != "total") {
	current_dir = (substr($0,1,length($0)-1));
	}
	}
	}
	END {
	printf("#0:0:/tmp/TALLYEND.DONE~~~~");
	}
	' > $TALLY_PIPE
	# ' > /var/local/nas_file_tally/tallyd.txt
	# cat /var/local/nas_file_tally/tallyd.txt > $TALLY_PIPE
	}

	wait_system_ready() {
	while [ ! -f "/tmp/ready" ]; do
	logger -s "$0: waiting for system to become ready.."
	sleep 5
	done
	}

	tmp_share_size=/tmp/share_size
	tmp_internal_share_size=/tmp/internal_share_size
	tmp_external_share_size=/tmp/external_share_size
	calculate_share_size() {
	find /shares -maxdepth 1 -mindepth 1 -type d -not -name ".*" -print0 \| xargs -0 -I {} getShareSize.sh {} > ${tmp_internal_share_size}
	cat $tmp_internal_share_size $tmp_external_share_size > ${tmp_share_size}
	}

	calculate_external_share_size() {
	find /shares -maxdepth 1 -mindepth 1 -type l -print0 \| xargs -0 -I {} getShareSize.sh {} > ${tmp_external_share_size}
	cat $tmp_internal_share_size $tmp_external_share_size > ${tmp_share_size}
	}

	checkDataTrigger() {
	result="trigger"
	if [ -f ${total_df_file} ]; then
	total_df=`cat ${total_df_file}`
	result=`df \| grep /DataVolume \| awk -v total_df=${total_df} -v thresh=${TALLY_TRIGGER_THRESH_KB} '{x=$3 - total_df; abs_x=(x >= 0) ? x : -x; if(abs_x >= thresh) printf("trigger")}'`
	fi
	if [ "$result" == "trigger" ]; then
	df \| grep /DataVolume \| awk '{print $3}' > ${total_df_file}
	fi
	echo $result
	}

	mkdir -p `dirname ${SHARE_SIZE_CACHE}`
	declare -i sleepcount
	declare -i rootdisk_thresh
	declare -i enterStandbyTime=0
	rm -f /tmp/standby
	rm -f ${MEDIACRAWLER_REWALK}
	source /etc/standby.conf

	resetSleepCount() {
	sleepcount=0

	# if in emergency run level, set standby threshold to 1 minute, since drive should go into standby as early as possible, otherwise, read config file
	if [ "`getRunLevel.pl`" == "emergency" ]; then
	standby_time=1
	rootdisk_thresh=1
	standby_enable="enabled"
	else
	source /etc/standby.conf
	rootdisk_thresh=`expr $standby_time - 1`
	fi
	}

	currentRootDevice=`cat /proc/cmdline \| awk -F= 'BEGIN{RS=" "}{ if ($1=="root") print $2 }'`
	rootDisk=`basename ${currentRootDevice}`
	dataVolumeDisk=`basename ${dataVolumeDevice}`
	drivelist=(`internalDrives`)

	echo "0" > ${MIN_SINCE_DISK_ACCESS}

	# wait for system to become ready
	wait_system_ready

	# run file tally at startup (in the background)
	if [ ! -f $TALLY_DAEMON ]; then
	logger "Tally daemon not installed, exiting tally function"

	## if tally not present, then call calculate_share_size
	calculate_share_size
	calculate_external_share_size
	rm ${SHARE_SIZE_CACHE}
	ln -s ${tmp_share_size} ${SHARE_SIZE_CACHE}
	else
	file_tally &
	fi

	if [ "$1" == "debug" ]; then
	echo "1" > /proc/sys/vm/block_dump
	dmesg -c > /dev/null
	fi

	while :; do

	for i in ${drivelist[@]}; do
	hdparm -C $i \| grep -q "standby"
	standby_test=$?
	[ "$standby_test" -eq "1" ] && break
	done

	if [ "$standby_test" -eq "0" ]; then
	sleep 5
	continue
	else
	if [ -f /tmp/standby ]; then
	standby_since=`stat --format %z /tmp/standby`
	rm -f /tmp/standby
	# Cancel blue color and turn on green if applicable
	ledCtrl.sh LED_EV_DISK_STBY LED_STAT_OK
	### This will allow individual components to register for wakupevents
	run-parts /etc/nas/wakeup.d
	###
	touch ${MEDIACRAWLER_REWALK}
	currentTime=`date +%s`
	timeInStandby=`expr $currentTime - $enterStandbyTime`
	echo "exit standby after $timeInStandby (since $standby_since)"
	logger "exit standby after $timeInStandby (since $standby_since)"
	if [ "$1" == "debug" ]; then
	dmesg -c
	fi
	fi

	resetSleepCount

	echo $sleepcount > ${MIN_SINCE_DISK_ACCESS}
	trigger_tally=0
	iow_root=`awk -v disk="${rootDisk}" '{if ($3==disk) print $10}' /proc/diskstats`
	ior_datavol=`awk -v disk="${dataVolumeDisk}" '{if ($3==disk) print $6}' /proc/diskstats`
	iow_datavol=`awk -v disk="${dataVolumeDisk}" '{if ($3==disk) print $10}' /proc/diskstats`
	if [ "$1" == "debug" ]; then
	echo "Init ior_datavol=$ior_datavol ior_datavol2=$ior_datavol2"
	echo " iow_datavol=$iow_datavol iow_datavol2=$iow_datavol2"
	echo " iow_root=$iow_root iow_root2=$iow_root2"
	dmesg -c
	fi

	while :; do
	# Wait for 60 seconds
	sleep 60
	iow_root2=`awk -v disk="${rootDisk}" '{if ($3==disk) print $10}' /proc/diskstats`
	ior_datavol2=`awk -v disk="${dataVolumeDisk}" '{if ($3==disk) print $6}' /proc/diskstats`
	iow_datavol2=`awk -v disk="${dataVolumeDisk}" '{if ($3==disk) print $10}' /proc/diskstats`

	# check for file tally sync
	if [ "$iow_datavol" -ne "$iow_datavol2" ] && [ "`checkDataTrigger`" == "trigger" ]; then
	incUpdateCount.pm data_volume_write
	monitorFreeSpace.sh

	if [ -f $TALLY_DAEMON ]; then
	# also run tally if installed
	pidofproc -p $TALLY_PIDFILE $TALLY_DAEMON >/dev/null

	if [ $? -ne 0 ]; then
	file_tally
	fi
	createBackupTally.sh

	else
	## if tally not present, then call calculate_share_size
	calculate_share_size
	fi
	fi

	# calculate size of external shares. Note that this must be done outside of "checkDataTrigger" so that it is done more often.
	calculate_external_share_size

	# use data volume writes until near sleep threshold, then check all disk writes
	old_sleepcount=sleepcount
	if [ $((sleepcount)) -eq $((rootdisk_thresh)) ] && [ "$iow_root" -eq "$iow_root2" ]; then
	sleepcount=$((sleepcount+1))
	elif [ $((sleepcount)) -lt $((rootdisk_thresh)) ] && [ "$ior_datavol" -eq "$ior_datavol2" ] && [ "$iow_datavol" -eq "$iow_datavol2" ]; then
	sleepcount=$((sleepcount+1))
	else
	resetSleepCount
	fi
	echo $sleepcount > ${MIN_SINCE_DISK_ACCESS}
	if [ "$1" == "debug" ]; then

	[ "$sleepcount" != "0" ] && echo "sleepcount: $sleepcount"
	[ "$sleepcount" == "0" ] && echo "Disk activity:"
	echo "... ior_datavol=$ior_datavol ior_datavol2=$ior_datavol2"
	echo "... iow_datavol=$iow_datavol iow_datavol2=$iow_datavol2"
	echo "... iow_root=$iow_root iow_root2=$iow_root2"
	# dmesg -c
	fi
	ior_datavol=$ior_datavol2
	iow_datavol=$iow_datavol2
	iow_root=$iow_root2

	smartTestStatus=`getSmartTestStatus.sh \| awk '{print $1}'`
	if [ "$standby_enable" == "enabled" ] && [ "$sleepcount" -eq "$standby_time" ] && [ "$smartTestStatus" != "inprogress" ]; then
	touch /tmp/standby
	enterStandbyTime=`date +%s`
	echo "Enter standby"
	if [ "$1" == "debug" ]; then
	echo "`date`: Enter standby "
	dmesg -c > /dev/null
	fi
	for i in ${drivelist[@]}; do
	hdparm -y $i >/dev/null
	done

	# turn on solid blue if applicable
	ledCtrl.sh LED_EV_DISK_STBY LED_STAT_IN_PROG
	sleep 5
	break
	fi
	done
	fi
	done
	#!/bin/sh
	#
	# � 2010 Western Digital Technologies, Inc. All rights reserved.
	#
	# monitorSmartStatus.sh
	# Note: this is called by cron
	#
	#
	PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin

	. /etc/system.conf
	. /usr/local/sbin/share-param.sh
	. /etc/nas/alert-param.sh
	. /usr/local/sbin/drive_helper.sh
	[ -f /usr/local/sbin/ledConfig.sh ] && . /usr/local/sbin/ledConfig.sh

	# exit if in standby
	if [ -f /tmp/standby ]; then
	exit 0;
	fi

	# exit if system with no internal drives
	if [ "${DVC_DRIVE_COUNT}" == "0" ]; then
	exit 0
	fi

	atLeastOneDriveFailed=FALSE

	driveList=( `internalDrives` )
	for drive in "${driveList[@]}"
	do
	smartctl -d ata -H ${drive} \| grep -q PASSED
	if [ $? -ne 0 ]; then
	atLeastOneDriveFailed=TRUE
	fi
	done

	if [ "$atLeastOneDriveFailed" = "TRUE" ] \|\| [ -f /tmp/tst_smart ]; then
	if [ ! -f /tmp/smart_fail ]; then
	sendAlert.sh "${driveSmartFail}"
	ledCtrl.sh LED_EV_DISK_SMART LED_STAT_ERR
	incUpdateCount.pm system_state
	fi
	touch /tmp/smart_fail
	else
	rm -f /tmp/smart_fail
	fi
	#!/bin/sh
	#
	# (c) 2012 Western Digital Technologies, Inc. All rights reserved.
	#
	# monitorTemperature.sh
	# Note: This is called by init-script monitorTemperature
	#
	# This script is responsible to monitor temperature of internal drives
	# and take actions if temperture is not normal
	#
	# It takes following actions depending on temperature of internal drives
	# if temperature of any drive > TF
	# - change led to RED
	# - send shutdown alert
	# - change run-level to emergency
	# - exit
	#
	# if temperature of any drive between T2 & TF
	# - send shutdown-warning alert
	# - start a shutdown-warning timer of 1 HR
	# - if timer expires change run-level to emergency
	# - exit
	#
	# if temperature of any drive between T1 & T2
	# - send high-temperature warning alert
	# - exit
	#
	# To restart all services & get back to normal
	# if temperature of all drives <= T2 - Hysterisis
	# - send normal temperature alert
	# - change led to GREEN
	# - change run-level to application
	# - exit
	#

	## --- Includes
	PATH=/sbin:/bin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin

	source /usr/local/sbin/share-param.sh
	source /etc/system.conf
	source /etc/nas/alert-param.sh # ( for alerts )
	source /usr/local/sbin/drive_helper.sh # ( for internalDrives() )
	source /usr/local/sbin/wdStatus.sh # ( for $WDST_XXX status codes )
	source /etc/wdcomp.d/wd-nas/temperature-monitor.conf
	[ -f /usr/local/sbin/ledConfig.sh ] && . /usr/local/sbin/ledConfig.sh

	## --- Constants


	## Acronyms
	NM=${STATE_NORMAL}
	WR=${STATE_WARNING}
	SW=${STATE_SHUTDOWN_WARNING}
	SI=${STATE_SHUTDOWN_IMMEDIATE}
	UK=${STATE_UNKNOWN}

	## Internal Constants
	## NB: TURN OFF BEFORE CHECKING-IN
	DEBUG=0 ## for debugging

	## For Testing
	## - set TEST=1
	## - set DEBUG=1
	## - disable the infinite "for MONITOR_TIMER" loop
	## - enter different temperatures on input
	## - to test with infinite "for MONITOR_TIMER" loop set different values for TEMP_TX
	TEST=0

	## Logger facility
	FAC=local2

	## Table of allowed actions based on last & curr state
	## last \| curr -- NM WR SW SI UK
	## \|
	eval ACTION${NM}="( act_noop act_warning act_start_timer act_emergency act_noop )"
	eval ACTION${WR}="( act_normal act_noop act_start_timer act_emergency act_noop )"
	eval ACTION${SW}="( act_normal act_hysterisis act_check_timer act_emergency act_check_timer )"
	eval ACTION${SI}="( act_restart act_hysterisis act_cooldown act_cooldown act_cooldown )"
	eval ACTION${UK}="( act_UK_2_NM act_warning act_start_timer act_emergency act_noop )" ## should never be called as UK state is never saved


	## --- Global Variables

	drive_list=
	last_state=
	curr_state=
	curr_temp=


	## --- Functions

	## Get the drive temperature
	##
	## Input:
	## drive device (e.g. sda, sdb)
	##
	## Output:
	## on success - drive temperature
	## on failure - ""(empty)
	##
	## E.g. getDriveTemperature "/dev/sda"
	getDriveTemperature()
	{
	local drive
	local temp

	## pass arguments
	drive=${1}

	## get the drive temperature using smart
	temp=`smartctl -d ata -A "${drive}" \| \
	awk '{if ($2 == "Temperature_Celsius") print $10}'`
	echo "${temp}" > "${SMART_STATE}"

	## return not found if number is not returned
	if ! [[ "${temp}" =~ ^[0-9]+$ ]]; then
	logger -p ${FAC}.err "$0: Non-numeric drive temperature \"${temp}\" obtained"
	return ${WDST_NOTFOUND}
	fi

	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Drive ${drive} temperature is ${temp}"
	fi

	## output the temperature
	echo "${temp}"
	return ${WDST_OK}
	}


	## Get the current temperature state
	##
	## Input:
	## drive list (e.g. /dev/sda /dev/sdb)
	##
	## Output:
	## on success - temperature state (e.g. NM, WR, SW, SI)
	## on failure - ""(empty)
	##
	## E.g. determineCurrentState "/dev/sda /dev/sdb /dev/sdc"
	determineCurrentState()
	{
	local drive
	local temp
	local drive_temp
	local drive_state
	local prev_state
	local transition

	## allowed temperature states transitions across all drives
	## NB: "TRANSITION" is treated as a 2-D array
	## prev \| next --- NM WR SW SI UK
	## \|
	eval TRANSITION${NM}="( ${NM} ${WR} ${SW} ${SI} ${UK} )"
	eval TRANSITION${WR}="( ${WR} ${WR} ${SW} ${SI} ${WR} )"
	eval TRANSITION${SW}="( ${SW} ${SW} ${SW} ${SI} ${SW} )"
	eval TRANSITION${SI}="( ${SI} ${SI} ${SI} ${SI} ${SI} )"
	eval TRANSITION${UK}="( ${UK} ${WR} ${SW} ${SI} ${UK} )"

	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Getting current temperature state"
	fi

	## init
	drive_state=${NM}
	drive_temp=0

	## loop through the drive list & finalize temperature state using
	## TRANSITION table
	for drive in ${drive_list[@]}
	do
	## save prev state & temp
	prev_state=${drive_state}

	## get the current drive temperature
	temp=`getDriveTemperature "${drive}"`

	## TEST ONLY
	if [ ${TEST} -ne 0 ]; then
	echo -n "Enter temperature: "
	read temp
	logger -p ${FAC}.debug "$0: INPUT temperature is ${temp}"
	fi

	## reset temperature to 0 if not defined
	if [ $? -ne ${WDST_OK} ] \|\| [ -z "${temp}" ]; then
	temp=0
	fi

	## NB: ${temp} is integer value

	## determine the temperature state of this drive
	if [ ${temp} -eq 0 ]; then
	state=${UK}
	elif [ ${temp} -le ${TEMP_T1} ]; then
	state=${NM}
	elif [ ${temp} -gt ${TEMP_T1} ] && [ ${temp} -le ${TEMP_T2} ]; then
	state=${WR}
	elif [ ${temp} -gt ${TEMP_T2} ] && [ ${temp} -le ${TEMP_TF} ]; then
	state=${SW}
	elif [ ${temp} -gt ${TEMP_TF} ]; then
	state=${SI}
	fi

	## get the actual drive state using the TRANSITION table
	transition=TRANSITION${prev_state}[${state}]
	drive_state=${!transition}

	## update drive temperature if state changes or temperature increases
	if [ ${drive_temp} -eq 0 ] \|\| [ ${drive_state} -ne ${prev_state} ] \|\| [ ${drive_temp} -lt ${temp} ]; then
	drive_temp=${temp}
	fi

	## optimization: break the loop if current state is SI (shutdown immediate)
	if [ ${drive_state} -eq ${SI} ]; then break; fi
	done

	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Current Temperature - ${drive_temp}, Current State - ${drive_state}"
	fi

	## pass to global variables
	curr_state=${drive_state}
	curr_temp=${drive_temp}

	return ${WDST_OK}
	}


	## --- Action Handlers

	act_emergency()
	{
	## NB: curr_state is always ${SI} in this action

	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Shutdown, Temperature - ${curr_temp}"
	fi

	## update the temp state file
	echo "${SI}" > "${TEMP_STATE}"

	## set over temperature state
	touch "${OVER_TEMP_FLAG}"

	## change led color to red
	ledCtrl.sh LED_EV_THERMO LED_STAT_ERR

	## send over-temperature with immediate shutdown alert
	sendAlert.sh "${thermalShutdownImmediate}"

	## stop the timer
	echo 0 > "${TEMP_SHUTDOWN_TIMER}"

	## notify system for thermal state-change
	incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

	## log emergency
	logger -p ${FAC}.emerg "$0: Current temperature(${curr_temp}) is over max-threshold, stopping all services"

	## change run-level to emergency
	changeRunLevel.pl --level=emergency

	return ${WDST_OK}
	}

	act_restart()
	{
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Restart, Temperature - ${curr_temp}"
	fi

	## reset over-temperature state
	rm -f "${OVER_TEMP_FLAG}"

	## change led color to green only if system health is good
	ledCtrl.sh LED_EV_THERMO LED_STAT_OK

	## send normal temperature alert
	sendAlert.sh "${temperatureNormal}"

	## update the temp state file
	echo "${NM}" > "${TEMP_STATE}"

	## notify system for thermal state-change
	incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

	## log notice
	logger -p ${FAC}.notice "$0: Temperature of all drives(${curr_temp}) is now normal, restarting all services"

	## change run-level to application mode
	changeRunLevel.pl --level=app

	return ${WDST_OK}
	}

	act_cooldown()
	{
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Cooldown, Temperature - ${curr_temp}"
	fi

	## log notice
	logger -p ${FAC}.notice "$0: Current temperature(${curr_temp}) is still hot, maintaining shutdown"

	return ${WDST_OK}
	}

	act_noop()
	{
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action noop, Temperature - ${curr_temp}"
	fi

	## no action taken

	return ${WDST_OK}
	}

	act_start_timer()
	{
	## NB: curr_state is always ${SW} in this action

	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Shutdown-Warning, Starting Timer, Temperature - ${curr_temp}"
	fi

	## update the temp state file
	echo "${SW}" > "${TEMP_STATE}"

	## change led color blinking yellow
	ledCtrl.sh LED_EV_THERMO LED_STAT_WARN

	## send over-temperature with pending shutdown alert
	sendAlert.sh "${thermalShutdownPending}"

	## start the shutdown timer
	date -u +%s > "${TEMP_SHUTDOWN_TIMER}"

	## notify system for thermal state-change
	incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

	## log critical
	logger -p ${FAC}.crit "$0: Over-Temperature condition(${curr_temp}), Shutdown-Warning, Timer started"

	return ${WDST_OK}
	}

	act_check_timer()
	{
	## NB: curr_state is always ${SW} in this action

	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Shutdown-Warning, Checking Timer, Temperature - ${curr_temp}"
	fi

	## get start timer, should never be 0
	start_time=( `cat "${TEMP_SHUTDOWN_TIMER}"` )
	if [ ${start_time} -eq 0 ]; then return ${WDST_FAILED}; fi

	## get current time
	curr_time=( `date -u +%s` )

	## no action if timer has not expired
	## NB: Temperature state shall remain SW
	if [ $(( ${curr_time} - ${start_time} )) -le ${MAX_SW_TIME} ]; then
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Timer has not expired"
	fi

	return ${WDST_OK}
	fi

	## --- Timer has expired

	## log critical
	logger -p ${FAC}.notice "$0: Over-Temperature condition(${curr_temp}), Timer expired"

	## stop the timer
	echo 0 > "${TEMP_SHUTDOWN_TIMER}"

	## reset disk-smart led event
	ledCtrl.sh LED_EV_THERMO LED_STAT_OK

	## modify state to SI
	curr_state=${SI}

	## initiate last_state->SI action

	## execute the action based on last & current state
	action_hdlr=ACTION${last_state}[${curr_state}]
	${!action_hdlr}

	return $?
	}

	act_warning()
	{
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Normal -> Warning, Temperature - ${curr_temp}"
	fi

	## update the temp state file
	echo "${WR}" > "${TEMP_STATE}"

	## send high-temperature warning
	sendAlert.sh "${systemTemperatureHigh}"

	## notify system for thermal state-change
	incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

	## log
	logger -p ${FAC}.warning "$0: High-Temperature(${curr_temp}) condition detected"

	return ${WDST_OK}
	}

	act_normal()
	{
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Shutdown-Warning -> Normal, Temperature - ${curr_temp}"
	fi

	## stop the timer
	echo 0 > "${TEMP_SHUTDOWN_TIMER}"

	## reset disk-smart led event
	ledCtrl.sh LED_EV_THERMO LED_STAT_OK

	## send normal temperature alert
	sendAlert.sh "${temperatureNormal}"

	## update the temp state file
	## NB: Update state "after" all actions when switching to normal are completed
	echo "${NM}" > "${TEMP_STATE}"

	## notify system for thermal state-change
	incUpdateCount.pm ${THERMAL_STATE_NFY_ID}

	## log notice
	logger -p ${FAC}.notice "$0: Temperature of all drives(${curr_temp}) is now normal"

	return ${WDST_OK}
	}

	act_hysterisis()
	{
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Hysterisis, Temperature - ${curr_temp}"
	fi

	## remain in last state if within hysterisis; otherwise update current state to normal
	if [ ${curr_temp} -gt $(( ${TEMP_T2} - ${HYSTERISIS} )) ]; then
	curr_state=${last_state}
	else
	curr_state=${NM}
	fi

	## execute the action based on last & current state
	action_hdlr=ACTION${last_state}[${curr_state}]
	${!action_hdlr}

	return $?
	}

	act_UK_2_NM()
	{
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Action Unknown -> Normal, Temperature - ${curr_temp}"
	fi

	## NB: No need to notify system state-change as last state was unknown

	## update the temp state file
	echo "${NM}" > "${TEMP_STATE}"

	return ${WDST_OK}
	}


	## --- Main script
	{
	## exit if system has no internal drives
	if [ "${DVC_DRIVE_COUNT}" == "0" ]; then
	exit 0
	fi

	## get list of drives
	drive_list=( `internalDrives` )

	## exit if no drives are found
	if [ -z "${drive_list}" ]; then
	exit 0
	fi

	logger -p ${FAC}.info "$0: Starting Temperature Monitor"

	## init temp state to normal if not over temperature
	if [ ! -f "${TEMP_STATE}" ]; then
	echo "${NM}" > "${TEMP_STATE}"
	if [ -f "${OVER_TEMP_FLAG}" ]; then
	echo "${SI}" > "${TEMP_STATE}"
	fi
	fi

	## init shutdown timer if not in shutdown-warning state
	last_state=( `cat ${TEMP_STATE}` )
	if [ ${last_state} -ne ${SW} ]; then
	echo 0 > "${TEMP_SHUTDOWN_TIMER}"
	fi

	## loop every MONITOR_TIMER seconds
	## NB: disable loop for if TEST=1
	for (( ; ; `sleep ${MONITOR_TIMER}`)); do
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Starting Temperature Monitor Run"
	fi

	## get the last saved temperature state
	## NB: This state was saved in an earlier run of this script
	last_state=( `cat ${TEMP_STATE}` )
	if [ $? -ne ${WDST_OK} ] \|\| [ -z "${last_state}" ] \|\| ! [[ "${last_state}" =~ ^[0-9]+$ ]] \|\| [ ${last_state} -ge ${N_STATES} ]; then
	last_state=${NM}

	## initialize the last state file
	echo "${NM}" > "${TEMP_STATE}"
	fi

	## skip run if in standby & last state is normal
	if [ -f "${STANDBY_STATE}" ] && [ ${last_state} -eq ${NM} ]; then
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Skipping run as system is in standby"
	fi

	continue
	fi

	## determine the current temperature state
	## NB: This function shall set $curr_state & $curr_temp global vars
	determineCurrentState
	if [ $? -ne ${WDST_OK} ] \|\| [ -z "${curr_state}" ]; then
	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Failed to determine current state; continuing"
	fi

	continue
	fi

	## execute the action based on last & current state
	## NB: Cannot execute ${array${last_state}[${curr_state}]} directly
	## It must be saved in to a variable x & executed using ${!x}
	action_hdlr=ACTION${last_state}[${curr_state}]

	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Last state=${last_state}; action=`echo ${!action_hdlr}`"
	fi

	${!action_hdlr}

	if [ $? -ne ${WDST_OK} ]; then
	logger -p ${FAC}.err "$0: Failed to execute the action `echo ${!action_hdlr}`; last state=${last_state}; curr temp=${curr_temp}"
	continue
	fi

	## debug
	if [ ${DEBUG} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Finished Temperature Monitor Run"
	fi

	## break if TEST
	if [ ${TEST} -ne 0 ]; then
	logger -p ${FAC}.debug "$0: Finished Test Run"
	break
	fi
	done ## for MONITOR_TIMER infinite loop

	logger -p ${FAC}.info "$0: Finished Temperature Monitor"

	exit 0
	}
	## --- End of Main script