Last active
June 23, 2022 16:52
-
-
Save Grissess/1d8d2cd915b79c018ac7ac5d07a64d3b to your computer and use it in GitHub Desktop.
A little dialog wrapper for smartctl
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
smartctl="${SMARTCTL:-smartctl}" | |
dialog="${DIALOG:-dialog}" | |
jq="${JQ:-jq}" | |
# Debian workaround | |
blockdev="${BLOCKDEV:-$(PATH="$PATH:/sbin:/usr/sbin" which blockdev)}" | |
sce_parse_bit=1 | |
sce_open_bit=2 | |
sce_cmd_bit=4 | |
sce_fail_bit=8 | |
sce_badattr_bit=16 | |
sce_histfail_bit=32 | |
sce_errlog_bit=64 | |
sce_stfail_bit=128 | |
poll_interval=10 #sec | |
temp_file=/tmp/smartout | |
dle_extra=3 | |
ui() { | |
$dialog --backtitle check_disk --colors "$@" 3>&1 1>&2 2>&3 3>&- | |
} | |
errui() { | |
DIALOGRC=<(echo "screen_color=(RED,RED,ON)") ui "$@" | |
} | |
warnui() { | |
DIALOGRC=<(echo "screen_color=(YELLOW,YELLOW,ON)") ui "$@" | |
} | |
dangui() { | |
DIALOGRC=<(echo "screen_color=(MAGENTA,MAGENTA,ON)") ui "$@" | |
} | |
testui() { | |
DIALOGRC=<(echo "screen_color=(GREEN,GREEN,ON)") ui "$@" | |
} | |
progrui() { | |
DIALOGRC=<(echo "screen_color=(CYAN,CYAN,ON)") ui "$@" | |
} | |
flash() { | |
local times="$1" | |
local wait="$2" | |
shift 2 | |
for i in $(seq 1 $times); do | |
local color="RED" | |
if (( i % 2 == 0 )); then color="MAGENTA"; fi | |
DIALOGRC=<(echo "screen_color=($color,$color,ON)") ui "$@" | |
sleep "$wait" | |
done | |
} | |
exec_test() { | |
local dev="$1" | |
local test="$2" | |
local esttime="$3" | |
while true; do | |
local out | |
out="$($smartctl $dev -t "$test" 2>&1)" | |
local ret="$?" | |
if (( ret & sce_cmd_bit )); then | |
local choice | |
if ! choice=$(warnui --title "$dev, $test test: Possibly Ongoing Test" --no-cancel --menu \ | |
"Failed to submit the command to begin the test (code $ret).\n\n"\ | |
"Ouput: $out\n\n"\ | |
"This could be due to an ongoing test. Would you like to:" \ | |
0 0 10 \ | |
abort "Abort the current test and try again?" \ | |
status "Monitor the status of the ongoing test?" \ | |
cancel "Cancel this test and return to the menu?" \ | |
); then | |
choice="cancel" | |
fi | |
case "$choice" in | |
abort) | |
if ! $smartctl $dev -X; then | |
warnui --title "$dev, $test test: Abort Failed" --msgbox "Abort failed! (The test may not be cancellable.)" 0 0 | |
fi | |
continue | |
;; | |
status) | |
break # from the while | |
;; | |
cancel) | |
return 3 | |
;; | |
esac | |
elif (( ret )); then | |
errui --title "$dev, $test test: Failed to Start" --msgbox "Failed to start test of $dev (exit code $ret)" 0 0 | |
return 2 | |
fi | |
break | |
done | |
local start="$(date +"%s")" | |
while true; do | |
local stat="$($jq -c '.ata_smart_data.self_test.status' <<< "$($smartctl $dev -j -c)")" | |
case "$($jq '.passed' <<< "$stat")" in | |
true) | |
testui --title "$dev, $test test: Passed" --timeout 5 --msgbox "$test test of $dev passed!" 0 0 | |
return 0 | |
;; | |
false) | |
errui --title "$dev, $test test: Failed" --timeout 5 --msgbox "$test test of $dev failed!" 0 0 | |
return 1; | |
;; | |
null) ;; | |
*) | |
echo "unexpected stat: $stat" | |
exit 1 | |
;; | |
esac | |
local rempct="$($jq '.remaining_percent' <<< "$stat")" | |
local string="$($jq '.string' <<< "$stat")" | |
local now="$(date +"%s")" | |
string="$string\n$(( (now - start) / 60 )) min $(( (now - start) % 60 )) sec elapsed / $esttime min estimated" | |
if [ "$rempct" = "null" ]; then | |
warnui --title "$dev, $test test: Progress Unknown" --infobox "$string" 10 -1 | |
else | |
progrui --title "$dev, $test test: Ongoing" --gauge "$string" 10 -1 $(( 100 - rempct )) < /dev/null | |
fi | |
sleep "$poll_interval" | |
done | |
} | |
show_info() { | |
local info="$1" | |
shift 1 | |
cat > "$temp_file" <<< "$info" | |
ui --hline "HOME | END | PGUP | PGDN | Arrow keys | / to search" "$@" --exit-label Continue --textbox "$temp_file" -1 -1 | |
} | |
classify_smartctl_error() { | |
local stat="$1" | |
local out="$2" | |
local continue="$3" | |
local before="$4" | |
if (( stat & (sce_parse_bit | sce_open_bit) )); then | |
local oncemsg="" | |
if [ -n "$before" ]; then | |
oncemsg=$'\n\n'"This device worked before! Check your hardware (e.g. dock),"$'\n'"and perhaps reset or reconnect the device." | |
fi | |
local msg="$(printf "\\Z1smartctl could not open device (exit code %d)%s\\Zn\n\n%s" "$stat" "$oncemsg" "$out")" | |
errui --title "$dev: Open Failed" --msgbox "$msg" 25 75 | |
return 1 | |
fi | |
local failmsg=() faillvl=() | |
if (( stat & sce_fail_bit )); then faillvl+=( 2 ); failmsg+=( "- Bit 3: Device reports it is failing NOW" ); fi | |
if (( stat & sce_badattr_bit )); then faillvl+=( 2 ); failmsg+=( "- Bit 4: Device has an attribute below threshold (indicating imminent failure)" ); fi | |
if (( stat & sce_histfail_bit )); then faillvl+=( 1 ); failmsg+=( "- Bit 5: Device has detected an attribute previously below threshold (indicating failure is likely)" ); fi | |
if (( stat & sce_errlog_bit )); then faillvl+=( 2 ); failmsg+=( "- Bit 6: The error log is not empty" ); fi | |
if (( stat & sce_stfail_bit )); then faillvl+=( 2 ); failmsg+=( "- Bit 7: Device failed to self test" ); fi | |
local failmax=0 | |
for i in "${faillvl[@]}"; do if (( i > failmax )); then failmax=$i; fi; done | |
local failcmd="" failcolor=0 failtitle="" failbutton="" | |
case "$failmax" in | |
0) ;; | |
1) failcmd="warnui"; failcolor=3; failtitle="Caution"; failbutton="ok" ;; | |
*) failcmd="errui"; failcolor=1; failtitle="Warning"; failbutton="cancel" ;; | |
esac | |
if [ -n "$failcmd" ]; then | |
local msgs="" | |
for msg in "${failmsg[@]}"; do msgs="${msgs}$msg\n\n"; done | |
while true; do | |
# Bug with labels when using extra with yes/no? | |
if [ -n "$continue" ]; then | |
$failcmd --title "$dev: $failtitle" --extra-button --extra-label "Details" \ | |
--cancel-label "Yes" --ok-label "No" --default-button "$failbutton" \ | |
--yesno "\Z${failcolor}This device reports failures:\n\n${msgs}\ZnDo you want to abort tests?" 25 75 | |
stat="$?" | |
else | |
$failcmd --title "$dev: $failtitle" --extra-button --extra-label "Details" --default-button extra \ | |
--msgbox "This device reports failures:\n\n${msgs}" 25 75 | |
stat="$?" | |
fi | |
if (( stat == dle_extra )); then | |
show_info "$out" | |
continue | |
elif (( stat )); then | |
return 1 | |
fi | |
break | |
done | |
fi | |
} | |
run_tests() { | |
local dev="$1" | |
local once="" | |
local out stat # Predeclared, otherwise local eats the $? from the substitution | |
while true; do | |
out="$($smartctl $dev -a 2>&1)" | |
stat="$?" | |
if ! classify_smartctl_error "$stat" "$out" "1" "$once"; then | |
return 1 | |
fi | |
once="1" | |
local tests | |
declare -A testtimes=() | |
local timedata="$($jq -c '.ata_smart_data.self_test.polling_minutes' <<< "$($smartctl $dev -j -c)")" | |
for testkind in short extended conveyance; do | |
local tname="$testkind" | |
if [ "$tname" = "extended" ]; then tname="long"; fi | |
testtimes["$tname"]="$($jq -r ".$testkind" <<< "$timedata")" | |
done | |
tests=( $(testui --title "$dev" --extra-button --extra-label "Details" --cancel-label "Back" \ | |
--checklist "Select tests to perform. (Times given are estimates by the target device only.)" 0 0 0 \ | |
short "Short (${testtimes[short]} min)" on \ | |
long "Long / Extended (${testtimes[long]} min)" off \ | |
conveyance "Conveyance (${testtimes[conveyance]} min)" off \ | |
) ) | |
stat="$?" | |
if (( stat == dle_extra )); then | |
cat > "$temp_file" <<< "$out" | |
ui --title "$dev" --exit-label Continue --textbox "$temp_file" -1 -1 | |
continue | |
elif (( stat )); then | |
return | |
fi | |
local i=0 | |
local aborted="" | |
while (( i < ${#tests[@]} )); do | |
local res | |
exec_test "$dev" "${tests[$i]}" "${testtimes[${tests[$i]}]}" | |
res="$?" | |
if (( res )); then | |
case "$res" in | |
1) | |
errui --title "$dev Self-Test Failed" --msgbox "$dev failed a self-test; no further tests will be attempted." 0 0 | |
;; | |
*) | |
warnui --title "$dev Self-Test Cancelled" --msgbox "A test for $dev was cancelled or failed for an internal error (code $res)." 0 0 | |
;; | |
esac | |
aborted="1" | |
break | |
fi | |
i=$(( i + 1 )) | |
done | |
if [ -z "$aborted" ]; then | |
testui --title "$dev: All Tests Run" --msgbox "All scheduled tests for $dev were successfully run." 0 0 | |
fi | |
done | |
} | |
zero_warning="\Zb\ZuEVEN IF CANCELED,\Zn zeroing a disk is an essentially \Zb\ZuIRRECOVERABLE OPERATION.\Zn\n\n\ | |
The partition table and filesystem administrative structure, usually at the beginning of the disk, will be the first to be destroyed. Recovering even an aborted zero usually requires forensic methods.\n\n\ | |
\Z1Do \Zb\ZuNOT\Zn\Z1 proceed unless you are sure you want to erase \Zb\ZuALL\Zn\Z1 data on this device. This cannot be undone." | |
try_zero_disk() { | |
flash 5 0.2 --title "ZERO $dev" --infobox "$zero_warning" 25 75 | |
if dangui --title "ZERO $dev" --yes-label "Erase Disk" --no-label "Cancel" --default-button no \ | |
--yesno "$zero_warning" 25 75; then | |
if dangui --title "ZERO $dev: Confirm" --yes-label "Yes, Erase Disk" --no-label "Cancel" --default-button no \ | |
--yesno "$zero_warning\n\n\Z5\Zb\ZuAre you really sure?" 25 75; then | |
really_zero_disk "$dev" | |
fi | |
fi | |
} | |
mem_info() { | |
grep "$1" /proc/meminfo | while read nm val rest; do echo "$val"; done | |
} | |
really_zero_disk() { | |
local dev="$1" | |
local total="$($blockdev --getsize64 "$dev")" | |
local totalhr="" | |
if [ -n "$total" ]; then totalhr="$(numfmt --to=iec --format='%.1f' <<< "$total")"; fi | |
coproc dd if=/dev/zero of="$dev" bs=4M 2>&1 | |
echo | |
ps | |
local progmon | |
while true; do | |
kill -USR1 "$COPROC_PID" | |
sleep 0.25 | |
done & | |
progmon="$!" | |
while read bytes rest; do | |
if [[ $bytes =~ .*\+.* ]]; then continue; fi | |
if [ -z "$bytes" ]; then break; fi | |
local hr="$(numfmt --to=iec --format='%.1f' <<< "$bytes")" | |
local pc="$(bc <<< "100 * $bytes / $total")" | |
if [ -z "$total" ]; then | |
dangui --title "$dev: Zeroing" --infobox "$bytes bytes ($hr) written, progress unknown" 10 -1 | |
else | |
dangui --title "$dev: Zeroing" --gauge "$bytes/$total bytes ($hr/$totalhr) written" 10 -1 "$pc" < /dev/null | |
fi | |
done <&"${COPROC[0]}" | |
kill "$progmon" | |
local memtot="$(mem_info MemTotal)" | |
local dots=1 | |
local suffix | |
while true; do | |
if kill -0 "$COPROC_PID"; then | |
case dots in | |
1) suffix="."; dots=2 ;; | |
2) suffix=".."; dots=3 ;; | |
3) suffix="..."; dots=1 ;; | |
*) dots=1 ;; | |
esac | |
local membuf="$(mem_info Buffers)" | |
dangui --title "$dev: Waiting for Buffers" --gauge "Waiting for buffers to clear (this may take a while)$suffix\n\n$membuf/$memtot buffers/total memory" 10 -1 $(( 100 - 100 * membuf / memtot )) | |
sleep 1 | |
else | |
break | |
fi | |
done | |
dots=1 | |
while true; do | |
suffix="" | |
case dots in | |
1) suffix="."; dots=2 ;; | |
2) suffix=".."; dots=3 ;; | |
3) suffix="..."; dots=1 ;; | |
*) dots=1 ;; | |
esac | |
dangui --title "$dev: Syncing" --infobox "Syncing (this may take a moment)$suffix" | |
sleep 1 | |
done & | |
progmon="$!" | |
sync | |
kill "$progmon" | |
dangui --title "$dev: Zeroed" --msgbox "$dev has been zeroed." 0 0 | |
} | |
last_dmesg_check="" | |
check_dmesg() { | |
local buffer | |
if [ -z "$last_dmesg_check"]; then | |
while read line; do | |
buffer+=( "$line" ) | |
done <<< "$(dmesg)" | |
else | |
local time | |
while read line; do | |
time="${line#[}" | |
time="${line%%]*}" | |
time="${line/.*/}" | |
if (( time >= last_dmesg_check )); then | |
buffer+=( "$line" ) | |
fi | |
done <<< "$(dmesg)" | |
fi | |
last_dmesg_check="$(cat /proc/uptime)" | |
last_dmesg_check="${last_dmesg_check##.*}" | |
local problems=() | |
for line in "${buffer[@]}"; do | |
if [[ $line =~ .*Buffer\ I/O\ error\ on\ dev\ ([^,]*),.* ]]; then | |
problems+=( "${BASH_REMATCH[1]}" ) | |
fi | |
done | |
for dev in "${problems[@]}"; do echo "$dev"; done | sort | uniq | |
} | |
partitions() { | |
local dev="$1" | |
echo p | fdisk "$dev" 2>&1 | |
} | |
device_menu() { | |
local dev="$1" | |
local choice | |
if ! partitions "$dev"; then | |
while true; do | |
warnui --extra-button --extra-label "Dmesg" \ | |
--ok-label "Yes" --cancel-label "No" --default-button no \ | |
--title "$dev: Input/Output Errors" \ | |
--yesno "The device $dev appears to be experiencing I/O errors, which may make any further tests unreliable. More information may be available in the kernel debug message ('dmesg') logs.\n\nDo you want to continue?" \ | |
25 75 | |
local stat="$?" | |
if (( stat == dle_extra )); then | |
show_info "$(dmesg --time-format iso)" --title "Dmesg" | |
continue | |
elif (( stat )); then | |
return | |
fi | |
break | |
done | |
fi | |
while true; do | |
if ! choice="$(testui --title "$dev" --cancel-label "Back" \ | |
--menu "Choose an action for $dev:" 0 0 10 \ | |
"test" 'Test the disk' \ | |
"stat" 'Read the SMART status and details' \ | |
"part" 'Show the partition table' \ | |
"info" 'Show detailed drive information' \ | |
"zero" '\Z1\ZbZero the disk\Zn' \ | |
"erase" '\Z1\ZbSecure erase or sanitize the disk\Zn' \ | |
)"; then | |
return | |
fi | |
case "$choice" in | |
test) | |
run_tests "$dev" | |
;; | |
stat) | |
local stat out | |
out="$($smartctl $dev -a)" | |
stat="$?" | |
if (( stat )); then | |
classify_smartctl_error "$stat" "$out" | |
else | |
show_info "$out" | |
fi | |
;; | |
part) | |
show_info "$(partitions "$dev")" --title "$dev: Partitions" | |
;; | |
info) | |
show_info "$(hdparm -I "$dev")" --title "$dev: Info" | |
;; | |
zero) | |
try_zero_disk "$dev" | |
;; | |
erase) | |
errui --title "TODO" --timeout 5 --msgbox "Not yet implemented" 0 0 | |
;; | |
esac | |
done | |
} | |
enum_devices() { | |
declare -A devices | |
while read line; do | |
name="$($jq -r .name <<< "$line")" | |
local devnode="/dev/$name" | |
local sz | |
if ! sz="$($blockdev --getsize64 /dev/$name)"; then | |
extra='\Z1NOMEDIA\Zn' | |
else | |
extra='\Z5'"$(numfmt --to=iec --format='%.1f' <<< "$sz")"'\Zn' | |
fi | |
# Superfluous echo to trim ws | |
extra="$extra "'\Z4'"$(echo $(cat /sys/class/block/$name/device/model))"'\Zn' | |
local info | |
if info="$($smartctl -i /dev/$name)"; then | |
extra="$extra "'\Z6'"SN:$(grep Serial <<< "$info" | tr -d ' ' | cut -d: -f2)"'\Zn' | |
else | |
extra="$extra "'\Z6'"SN:"'\Z1\Zb'"???"'\Zn' | |
fi | |
if [ "$($jq '[.children?[]?.mountpoint? != null] | any' <<< "$line")" = "true" ]; then | |
extra="$extra "'\Z3\Zb(mounted)\Zn' | |
fi | |
if ! [ "$($jq -r '.type' <<< "$line")" = "disk" ]; then | |
extra="$extra "'\Z1\Zb(not a disk)\Zn' | |
fi | |
devices["/dev/$name"]="$extra" | |
done <<< "$(lsblk -J | $jq -c '.blockdevices[]')" | |
declare -p devices | |
} | |
main() { | |
if [ -z "$CD_NO_ROOT" ] && (( EUID )); then | |
errui --title "check_disk: Not Root" --msgbox \ | |
"This script is not running as root!\n\n"\ | |
"Try using 'sudo' or equivalent.\n\n"\ | |
"If you know what you're doing and can assure this script has sufficient privileges, you may set CD_NO_ROOT to a non-empty value." \ | |
0 0 | |
exit 1 | |
fi | |
while true; do | |
ui --title "check_disk" --infobox "Scanning devices, please wait..." 10 75 | |
eval "$(enum_devices)" | |
local args=() | |
local sorted=( $(for key in "${!devices[@]}"; do echo "$key"; done | sort) ) | |
for dev in "${sorted[@]}"; do | |
args+=( "$dev" "${devices[$dev]}" ) | |
done | |
local device | |
if ! device=$(ui --title "check_disk" --no-cancel --menu \ | |
"Choose the device to analyze." \ | |
0 0 10 \ | |
"${args[@]}" \ | |
"refresh" '\Z2Refresh this list' \ | |
"exit" '\Z2Exit' | |
); then break; fi | |
if [ "$device" = "refresh" ]; then continue; fi | |
if [ "$device" = "exit" ]; then break; fi | |
device_menu "$device" | |
done | |
} | |
main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment