Skip to content

Instantly share code, notes, and snippets.

@CristianCantoro
Last active March 5, 2021 23:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save CristianCantoro/51fcb0b967ea0afd7b2e4e7afde76b64 to your computer and use it in GitHub Desktop.
Save CristianCantoro/51fcb0b967ea0afd7b2e4e7afde76b64 to your computer and use it in GitHub Desktop.
Compression efficiency comparison
#!/usr/bin/env bash
SOURCED=false && [ "$0" = "$BASH_SOURCE" ] || SOURCED=true
if ! $SOURCED; then
set -euo pipefail
IFS=$'\n\t'
fi
tmpdir=$(realpath "$(mktemp -d -t test.tmp.XXXXXXXXXX)")
(>&2 echo "tmpdir: $tmpdir" )
function finish {
rm -rf "$tmpdir"
}
trap finish EXIT
function repeat_string {
local str="$1"
local rep="$2"
local out="$3"
yes "$str" | head -n "$rep" > "$out" || \
if [[ $? -eq 141 ]]; then true; else exit $?; fi
}
string1="$1"
string2="$2"
# Capture multiline output as array in bash
# https://stackoverflow.com/a/13824135/2377454
ordered_strings=($(sort <<EOF
$string1
$string2
EOF
))
stringA="${ordered_strings[0]}"
stringB="${ordered_strings[1]}"
(>&2 echo "stringA: $stringA" )
(>&2 echo "stringB: $stringB" )
# round this number down
repeatA=$(python <<EOF
from math import floor
half_gb=(1.0*1024*1024*1024)/2.0
rep=floor(half_gb/len("$stringA"))
print(int(rep))
EOF
)
# round this number down
repeatB=$(python <<EOF
from math import floor
half_gb=(1.0*1024*1024*1024)/2.0
rep=floor(half_gb/len("$stringB"))
print(int(rep))
EOF
)
# write debug messages to stderr
(>&2 echo "repeatA: $repeatA" )
(>&2 echo "repeatB: $repeatB" )
repeat_string "$stringA" "$repeatA" "${tmpdir}/${stringA}.txt"
repeat_string "$stringB" "$repeatB" "${tmpdir}/${stringB}.txt"
numprocs=$(cat /proc/stat | grep cpu | grep -E 'cpu[0-9]+' | wc -l)
cat "${tmpdir}/${stringA}.txt" "${tmpdir}/${stringB}.txt" \
| sort --parallel="$numprocs" > "${tmpdir}/sorted-words.txt" \
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi
cat "${tmpdir}/sorted-words.txt" \
| shuf > "${tmpdir}/unsorted-words.txt" \
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi
cat "${tmpdir}/sorted-words.txt" \
| tr -d $'\n' | head -c1G > 'sorted.txt' \
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi
cat "${tmpdir}/unsorted-words.txt" | \
tr -d $'\n' | head -c1G > 'unsorted.txt' \
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi
exit 0
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment