Compression efficiency comparison
#!/usr/bin/env bash | |
SOURCED=false && [ "$0" = "$BASH_SOURCE" ] || SOURCED=true | |
if ! $SOURCED; then | |
set -euo pipefail | |
IFS=$'\n\t' | |
fi | |
tmpdir=$(realpath "$(mktemp -d -t test.tmp.XXXXXXXXXX)") | |
(>&2 echo "tmpdir: $tmpdir" ) | |
function finish { | |
rm -rf "$tmpdir" | |
} | |
trap finish EXIT | |
function repeat_string { | |
local str="$1" | |
local rep="$2" | |
local out="$3" | |
yes "$str" | head -n "$rep" > "$out" || \ | |
if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
} | |
string1="$1" | |
string2="$2" | |
# Capture multiline output as array in bash | |
# https://stackoverflow.com/a/13824135/2377454 | |
ordered_strings=($(sort <<EOF | |
$string1 | |
$string2 | |
EOF | |
)) | |
stringA="${ordered_strings[0]}" | |
stringB="${ordered_strings[1]}" | |
(>&2 echo "stringA: $stringA" ) | |
(>&2 echo "stringB: $stringB" ) | |
# round this number down | |
repeatA=$(python <<EOF | |
from math import floor | |
half_gb=(1.0*1024*1024*1024)/2.0 | |
rep=floor(half_gb/len("$stringA")) | |
print(int(rep)) | |
EOF | |
) | |
# round this number down | |
repeatB=$(python <<EOF | |
from math import floor | |
half_gb=(1.0*1024*1024*1024)/2.0 | |
rep=floor(half_gb/len("$stringB")) | |
print(int(rep)) | |
EOF | |
) | |
# write debug messages to stderr | |
(>&2 echo "repeatA: $repeatA" ) | |
(>&2 echo "repeatB: $repeatB" ) | |
repeat_string "$stringA" "$repeatA" "${tmpdir}/${stringA}.txt" | |
repeat_string "$stringB" "$repeatB" "${tmpdir}/${stringB}.txt" | |
numprocs=$(cat /proc/stat | grep cpu | grep -E 'cpu[0-9]+' | wc -l) | |
cat "${tmpdir}/${stringA}.txt" "${tmpdir}/${stringB}.txt" \ | |
| sort --parallel="$numprocs" > "${tmpdir}/sorted-words.txt" \ | |
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
cat "${tmpdir}/sorted-words.txt" \ | |
| shuf > "${tmpdir}/unsorted-words.txt" \ | |
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
cat "${tmpdir}/sorted-words.txt" \ | |
| tr -d $'\n' | head -c1G > 'sorted.txt' \ | |
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
cat "${tmpdir}/unsorted-words.txt" | \ | |
tr -d $'\n' | head -c1G > 'unsorted.txt' \ | |
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment