Skip to content

Instantly share code, notes, and snippets.

@neggles
Last active July 19, 2023 06:53
Show Gist options
  • Save neggles/0e763ee6e0decc0261d193416cc28dca to your computer and use it in GitHub Desktop.
Save neggles/0e763ee6e0decc0261d193416cc28dca to your computer and use it in GitHub Desktop.
HDD Testing Helper Script
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0-only OR MIT
# bash profile script to make many-drive stress testing easier
# Copyright (c) 2018-2022 Andrew Powers Holmes
# Put this in your /etc/profile.d, or save it somewhere and run "source /path/to/file" to import the functions
# export these functions because I'm lazy
set -o allexport
# Config variables;
# dev_type - smartctl dev type, uses "sat" for UASP or local SATA drives
# block_sz - badblocks block size in bytes, need 4K or greater for drives >2TB
# block_cnt - how many blocks written per I/O request, 64 is default, increasing this uses more RAM but may result in faster performance
# formula to calculate maximum value: https://www.pantz.org/software/badblocks/badblocksusage.html
# remember to account for the number of drives you're testing!
dev_type="sat"
block_sz="4096"
block_cnt="64"
# Function to list drives for testing - assumes no drives are present other than a boot drive and drives to test!
# Identifies boot device and lists all other /dev/sdX devices.
list-test-devs () { lsblk -lpdnSo KNAME | grep -v $(lsblk -lpno KNAME,MOUNTPOINT | grep /boot | awk 'match($0, /(\/dev\/sd.)/) {print substr( $0, RSTART, RLENGTH )}'); }
# badblocks func for individual devices; this is a destructive test with four passes
badblocks-ws () { badblocks -ws -b ${block_sz} -c ${block_cnt} $@; }
# smartctl shortcuts for individual devices; smartlog only pulls selftest log
smartscan () { smartctl --scan; }
smartconvey () { smartctl -d ${dev_type} -t conveyance $@; }
smartshort () { smartctl -d ${dev_type} -t short $@; }
smartlong () { smartctl -d ${dev_type} -t long $@; }
smartabort () { smartctl -d ${dev_type} -X $@; }
smartlog () { smartctl -d ${dev_type} -l selftest $@; }
smarthealth () { smartctl -d ${dev_type} -A $@; }
smartinfo () { smartctl -d ${dev_type} --all $@; }
# smartctl shortcuts for all non-boot devices; just pass the output of the above function to the shortcuts we created before
smartconvey-all () { list-test-devs | xargs -n1 smartctl -d ${dev_type} -t conveyance $@; }
smartshort-all () { list-test-devs | xargs -n1 smartctl -d ${dev_type} -t short $@; }
smartlong-all () { list-test-devs | xargs -n1 smartctl -d ${dev_type} -t long $@; }
smartabort-all () { list-test-devs | xargs -n1 smartctl -d ${dev_type} -X $@; }
smartlog-all () { list-test-devs | xargs -n1 smartctl -d ${dev_type} -l selftest $@; }
smarthealth-all () { list-test-devs | xargs -n1 smartctl -d ${dev_type} -A $@; }
smartinfo-all () { list-test-devs | xargs -n1 smartctl -d ${dev_type} --all $@; }
# shortcut to poll selftest log every 60s for all drives under test
smartlog-watch () { watch -n 60 "lsblk -lpdnSo KNAME | grep -v $(lsblk -lpno KNAME,MOUNTPOINT | grep /boot | awk 'match($0, /(\/dev\/sd.)/) {print substr( $0, RSTART, RLENGTH )}') | xargs -n1 smartctl -d ${dev_type} -l selftest $@"; }
# TODO: Add a function to spawn a tmux session with a pane for every drive under test & execute badblocks-ws on it - maybe just use tmuxinator for now...
# turn export back off
set +o allexport
#!/usr/bin/env bash
# SPDX-License-Identifier: GPL-2.0-only OR MIT
# Substitute the aliases etc. from smarthelper.sh if you elected to use it.
# set as appropriate
dev_name="/dev/sdX"
dev_type="sat"
# badblocks block size, block count, and max bad sectors before giving up
block_sz=4096
block_cnt=64
max_bad=1
# how to check selftest results:
smartctl -d ${dev_type} -l selftest ${dev_name}
# --- ! actual test process begins here ! ---
# typically takes 5-10 min, nonblocking
smartctl -d ${dev_type} -t conveyance ${dev_name}
# this will take about 1.5 hours/TB usually, nonblocking
smartctl -d ${dev_type} -t long ${dev_name}
# run this in a tmux session, takes about 3x as long as the long test since it does 3 passes
badblocks -b ${block_sz} -o "${dev_name}_bad.txt" -c ${block_cnt} -e ${max_bad} -ws ${dev_name}
# !!! --------------- IMPORTANT! Don't skip these next steps. --------------- !!!
# !!! Your drive may only be one thermal cycle away from death at this point. !!!
# Asuming badblocks found no bad blocks, power off the drive and allow it to cool down to room temperature.
# after it's been nice and cool for a couple hours, ideally overnight, start it up and do a short test then a long test:
smartctl -d ${dev_type} -t short ${dev_name}
# wait for that one to finish first ofc
smartctl -d ${dev_type} -t long ${dev_name}
# Assuming those both passed, your drive is probably good.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment