Skip to content

Instantly share code, notes, and snippets.

@wchargin
Last active March 14, 2023 23:26
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wchargin/926a1bb7bb01313a83d2b8c4c5e9e1e1 to your computer and use it in GitHub Desktop.
Save wchargin/926a1bb7bb01313a83d2b8c4c5e9e1e1 to your computer and use it in GitHub Desktop.
unix filter for computing percentiles / quantiles
#!/bin/sh
die() {
printf >&2 'fatal: %s\n'
exit 1
}
case $# in
0) die 'no percentiles provided' 'usage: pct P [P ...]' ;;
1) ps="$1" ;;
*)
ps="$1"
shift
ps="${ps}$(printf ',%s' "$@")"
;;
esac
tmpdir="$(mktemp -d --suffix=.pct)" || exit 2
cleanup() {
rm -f "${tmpdir}/f" 2>/dev/null
rmdir "${tmpdir}" 2>/dev/null
}
trap cleanup EXIT
sort -n >"${tmpdir}/f" || exit 2
awk -v psraw="${ps}" '
function dec(z) {
z = z % 1
if (z == 0) return ""
return substr(z, 2)
}
function ndec(z) {
return length(dec(z))
}
ARGIND == 1 {
if (length(int($0)) > vmaxint) vmaxint = length(int($0))
if (ndec($0) > vmaxdec) vmaxdec = ndec($0)
next
}
ENDFILE {
if (nlines) exit
nlines = FNR
split(psraw, ps, ",")
for (i in ps) {
p = ps[i]
if (length(int(p)) > pmaxint) pmaxint = length(int(p))
if (ndec(p) > pmaxdec) pmaxdec = ndec(p)
ns[i] = 1 + int(p * (nlines - 1) / 100)
}
}
function fmt(z, pfx, padend, maxint, maxdec) {
intpart = sprintf("% *s", maxint + length(pfx), pfx int(z))
decpart = sprintf("%s% *s", dec(z), padend ? maxdec - ndec(z) : 0, "")
return intpart decpart
}
{
for (i in ns) {
if (FNR == ns[i]) {
printf "%s = %s\n",
fmt(ps[i], " p", 1, pmaxint, pmaxdec),
fmt($0, "", 0, vmaxint, vmaxdec)
}
}
}
' "${tmpdir}/f" "${tmpdir}/f"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment