Last active
March 5, 2021 23:11
-
-
Save CristianCantoro/51fcb0b967ea0afd7b2e4e7afde76b64 to your computer and use it in GitHub Desktop.
Compression efficiency comparison
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
SOURCED=false && [ "$0" = "$BASH_SOURCE" ] || SOURCED=true | |
if ! $SOURCED; then | |
set -euo pipefail | |
IFS=$'\n\t' | |
fi | |
tmpdir=$(realpath "$(mktemp -d -t test.tmp.XXXXXXXXXX)") | |
(>&2 echo "tmpdir: $tmpdir" ) | |
function finish { | |
rm -rf "$tmpdir" | |
} | |
trap finish EXIT | |
function repeat_string { | |
local str="$1" | |
local rep="$2" | |
local out="$3" | |
yes "$str" | head -n "$rep" > "$out" || \ | |
if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
} | |
string1="$1" | |
string2="$2" | |
# Capture multiline output as array in bash | |
# https://stackoverflow.com/a/13824135/2377454 | |
ordered_strings=($(sort <<EOF | |
$string1 | |
$string2 | |
EOF | |
)) | |
stringA="${ordered_strings[0]}" | |
stringB="${ordered_strings[1]}" | |
(>&2 echo "stringA: $stringA" ) | |
(>&2 echo "stringB: $stringB" ) | |
# round this number down | |
repeatA=$(python <<EOF | |
from math import floor | |
half_gb=(1.0*1024*1024*1024)/2.0 | |
rep=floor(half_gb/len("$stringA")) | |
print(int(rep)) | |
EOF | |
) | |
# round this number down | |
repeatB=$(python <<EOF | |
from math import floor | |
half_gb=(1.0*1024*1024*1024)/2.0 | |
rep=floor(half_gb/len("$stringB")) | |
print(int(rep)) | |
EOF | |
) | |
# write debug messages to stderr | |
(>&2 echo "repeatA: $repeatA" ) | |
(>&2 echo "repeatB: $repeatB" ) | |
repeat_string "$stringA" "$repeatA" "${tmpdir}/${stringA}.txt" | |
repeat_string "$stringB" "$repeatB" "${tmpdir}/${stringB}.txt" | |
numprocs=$(cat /proc/stat | grep cpu | grep -E 'cpu[0-9]+' | wc -l) | |
cat "${tmpdir}/${stringA}.txt" "${tmpdir}/${stringB}.txt" \ | |
| sort --parallel="$numprocs" > "${tmpdir}/sorted-words.txt" \ | |
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
cat "${tmpdir}/sorted-words.txt" \ | |
| shuf > "${tmpdir}/unsorted-words.txt" \ | |
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
cat "${tmpdir}/sorted-words.txt" \ | |
| tr -d $'\n' | head -c1G > 'sorted.txt' \ | |
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
cat "${tmpdir}/unsorted-words.txt" | \ | |
tr -d $'\n' | head -c1G > 'unsorted.txt' \ | |
|| if [[ $? -eq 141 ]]; then true; else exit $?; fi | |
exit 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment