Skip to content

Instantly share code, notes, and snippets.

@dmccombs
Forked from jinnko/collectd-smartmon.sh
Last active December 2, 2015 10:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save dmccombs/54a9ad2f650c9a5a64ac to your computer and use it in GitHub Desktop.
Save dmccombs/54a9ad2f650c9a5a64ac to your computer and use it in GitHub Desktop.
#!/bin/dash
###
# ABOUT : collectd monitoring script for smartmontools (using smartctl)
# AUTHOR : Samuel B. <samuel_._behan_(at)_dob_._sk> (c) 2012
# LICENSE: GNU GPL v3
# SOURCE: http://devel.dob.sk/collectd-scripts/
#
# This script monitors SMART pre-fail attributes of disk drives using smartmon tools.
# Generates output suitable for Exec plugin of collectd.
###
# Requirements:
# * smartmontools installed (and smartctl binary)
# * User & group for collector:collector
# groupadd collector
# useradd -d /var/lib/collector -g collector -l -m -s /bin/sh collector
# * sudo entry for binary (ie. for sys account):
# collector ALL=(root) NOPASSWD:/usr/sbin/smartctl *
# * Configuration for collectd.conf
# LoadPlugin exec
# <Plugin exec>
# Exec "collector:collector" "/usr/local/bin/collectd-smartmon" "sda" "sdb"
# </Plugin>
###
# Parameters:
# <disk>[:<driver>,<id> ] ...
###
# Typical usage:
# /etc/collect/smartmon.sh "sda:megaraid,4" "sdb"
#
# Will monitor disk 4, of megaraid adapter mapped as /dev/sda and additionaly
# normal disk /dev/sdb. See smartctl manual for more info about adapter driver names.
###
# Typical output:
# PUTVAL <host>/smartmon-sda4/gauge-raw_read_error_rate interval=300 N:30320489
# PUTVAL <host>/smartmon-sda4/gauge-spin_up_time interval=300 N:0
# PUTVAL <host>/smartmon-sda4/gauge-reallocated_sector_count interval=300 N:472
# PUTVAL <host>/smartmon-sda4/gauge-end_to_end_error interval=300 N:0
# PUTVAL <host>/smartmon-sda4/gauge-reported_uncorrect interval=300 N:1140
# PUTVAL <host>/smartmon-sda4/gauge-command_timeout interval=300 N:85900918876
# PUTVAL <host>/smartmon-sda4/temperature-airflow interval=300 N:31
# PUTVAL <host>/smartmon-sda4/temperature-temperature interval=300 N:31
# PUTVAL <host>/smartmon-sda4/gauge-offline_uncorrectable interval=300 N:5
# PUTVAL <host>/smartmon-sdb/gauge-raw_read_error_rate interval=300 N:0
# PUTVAL <host>/smartmon-sdb/gauge-spin_up_time interval=300 N:4352
# ...
#
# Monitoring additional attributes:
# If it is needed to monitor additional SMART attributes provided by smartctl, you
# can do it simply by echoing SMART_<Attribute-Name> environment variable as its output
# by smartctl -A. It's nothing complicated ;)
#
# History:
# 2012-04-17 v0.1.0 - public release
# 2012-09-03 v0.1.1 - fixed dash replacement (thx to R.Buehl)
# 2013-08-28 v0.2.0 - Fix sudo command.
# Use dash as it's lower overhead.
# Improve docs.
# Add a few metrics to output.
# Re-order & standardise output lines for easier review & updating.
# 2014-12-09 v0.2.1 - Fix basename calls.
###
if [ -z "$*" ]; then
echo "Usage: $(basename $0) <disk> <disk>..." >&2
exit 1
fi
for disk in "$@"; do
disk=${disk%:*}
if ! [ -e "/dev/$disk" ]; then
echo "$(basename $0): disk /dev/$disk not found !" >&2
exit 1
fi
done
HOST=`hostname -f`
INTERVAL=300
while true; do
for disk in "$@"; do
dsk=${disk%:*}
drv=${disk#*:}
id=
if [ "$disk" != "$drv" ]; then
drv="-d $drv"
id=${drv#*,}
else
drv=
fi
eval `sudo /usr/sbin/smartctl $drv -A "/dev/$dsk" | awk '$3 ~ /^0x/ && $2 ~ /^[a-zA-Z0-9_-]+$/ { gsub(/-/, "_"); print "SMART_" $2 "=" $10 }' 2>/dev/null`
[ -n "$SMART_Command_Timeout" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-command_timeout interval=$INTERVAL N:${SMART_Command_Timeout:-U}"
[ -n "$SMART_Current_Pending_Sector" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-current_pending_sector interval=$INTERVAL N:${SMART_Current_Pending_Sector:-U}"
[ -n "$SMART_End_to_End_Error" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-end_to_end_error interval=$INTERVAL N:${SMART_End_to_End_Error:-U}"
[ -n "$SMART_Hardware_ECC_Recovered" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-hardware_ecc_recovered interval=$INTERVAL N:${SMART_Hardware_ECC_Recovered:-U}"
[ -n "$SMART_Offline_Uncorrectable" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-offline_uncorrectable interval=$INTERVAL N:${SMART_Offline_Uncorrectable:-U}"
[ -n "$SMART_Raw_Read_Error_Rate" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-raw_read_error_rate interval=$INTERVAL N:${SMART_Raw_Read_Error_Rate:-U}"
[ -n "$SMART_Reallocated_Sector_Ct" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-reallocated_sector_count interval=$INTERVAL N:${SMART_Reallocated_Sector_Ct:-U}"
[ -n "$SMART_Reported_Uncorrect" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-reported_uncorrect interval=$INTERVAL N:${SMART_Reported_Uncorrect:-U}"
[ -n "$SMART_Spin_Up_Time" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/gauge-spin_up_time interval=$INTERVAL N:${SMART_Spin_Up_Time:-U}"
[ -n "$SMART_Airflow_Temperature_Cel" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/temperature-airflow interval=$INTERVAL N:${SMART_Airflow_Temperature_Cel:-U}"
[ -n "$SMART_Temperature_Celsius" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/temperature-temperature interval=$INTERVAL N:${SMART_Temperature_Celsius:-U}"
[ -n "$SMART_Media_Wearout_Indicator" ] &&
echo "PUTVAL $HOST/smartmon-$dsk$id/guage-media_wearout_indicator interval=$INTERVAL N:${SMART_Media_Wearout_Indicator:-U}"
done
sleep $INTERVAL || true
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment