Last active
April 20, 2022 16:03
-
-
Save amtseng/afa847828b3a38bff28bb8fdb506ee20 to your computer and use it in GitHub Desktop.
Simple commandline program to construct ASCII histograms
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
set -beEo pipefail | |
DEFAULTNUMBINS=10 | |
DEFAULTMAXWIDTH=75 | |
show_help() { | |
cat << EOF | |
Usage: ${0##*/} [OPTIONS] | |
Takes a set of numbers and plots an ASCII histogram. The input must be fed in | |
through stdin, and must consist of one number on each line. | |
Options: | |
-n|--numbins: the number of equally sized bins to use; defaults to $DEFAULTNUMBINS; | |
conflicts with binsize | |
-s|--binsize: the size of the bin to use; conflicts with numbins | |
-m|--minbin: the minimum edge of the first bin; defaults to the minimum | |
element | |
-w|--maxwidth: maximum length of the histogram bars in characters; defaults | |
to $DEFAULTMAXWIDTH | |
EOF | |
} | |
POSARGS="" # Positional arguments | |
while [ $# -gt 0 ] | |
do | |
case "$1" in | |
-h|--help) | |
show_help | |
exit 0 | |
;; | |
-n|--numbins) | |
numbins=$2 | |
shift 2 | |
;; | |
-s|--binsize) | |
binsize=$2 | |
shift 2 | |
;; | |
-m|--minbin) | |
minbin=$2 | |
shift 2 | |
;; | |
-w|--maxwidth) | |
maxwidth=$2 | |
shift 2 | |
;; | |
-*|--*) | |
echo "Unsupported flag error: $1" >&2 | |
show_help >&2 | |
exit 1 | |
;; | |
*) | |
POSARGS="$POSARGS $1" # Preserve positional arguments | |
shift | |
esac | |
done | |
eval set -- "$POSARGS" # Restore positional arguments to expected indices | |
if [[ ! -z $1 ]] | |
then | |
show_help | |
exit 1 | |
fi | |
if [[ ! -z $binsize ]] && [[ ! -z $numbins ]] | |
then | |
# Cannot specify both | |
show_help | |
exit 1 | |
fi | |
# Pipe stdin to temporary file | |
tempfile=$(mktemp) | |
cat - | sort -n > $tempfile | |
maxval=$(tail -n 1 $tempfile) | |
if [[ -z $minbin ]] | |
then | |
minval=$(head -n 1 $tempfile) | |
else | |
minval=$minbin | |
fi | |
if [[ -z $maxwidth ]] | |
then | |
maxwidth=$DEFAULTMAXWIDTH | |
fi | |
if [[ -z $binsize ]] | |
then | |
# Figure out the bin size | |
if [[ -z $numbins ]] | |
then | |
numbins=$DEFAULTNUMBINS | |
fi | |
binsize=$(echo "(${maxval}-(${minval}))/${numbins}" | bc -l) | |
else | |
numbins=$(echo "(${maxval}-(${minval}))/${binsize}" | bc) | |
fi | |
cat $tempfile | awk -v binsize=$binsize -v numbins=$numbins -v minval=$minval -v maxwidth=$maxwidth ' | |
BEGIN{ | |
# First, figure out how many digits of precision we need to display | |
maxval = minval + (numbins * binsize); | |
minabs = minval > 0 ? minval : minval * -1; | |
maxabs = maxval > 0 ? maxval : maxval * -1; | |
bestabs = maxval > minval ? maxval : minval; | |
leftdigits = int(log(bestabs) / log(10)) + 1; | |
rightdigits = int(-1 * log(binsize) / log(10)) + 1; | |
rightdigits += 1; # Extra padding for precision | |
totallength = leftdigits + 1 + rightdigits; | |
} | |
{ | |
# Count how many entries are in each bin | |
binindex = int(($1 - minval) / binsize); | |
counts[binindex] += 1; | |
} | |
END{ | |
# Figure out the maximum count to determine the mapping from count to bar length | |
maxcount = 0; | |
for (bin in counts) { | |
if (counts[bin] > maxcount) { | |
maxcount = counts[bin]; | |
} | |
} | |
widthfactor = maxwidth / maxcount; | |
# For each bin, print out the starting point and the bar | |
for (i = 0; i <= numbins; i++) { | |
binstart = minval + (i * binsize); | |
if (minval < 0) { | |
if (binstart < 0) { | |
printf "-"; | |
} else { | |
printf " "; | |
} | |
} | |
binstart *= binstart < 0 ? -1 : 1; | |
printf "%*.*f<|", totallength, rightdigits, binstart; | |
for (j = 0; j < (widthfactor * counts[i]); j++) { | |
printf "*"; | |
} | |
printf "\n"; | |
} | |
} | |
' | |
rm $tempfile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment