Skip to content

Instantly share code, notes, and snippets.

@kojiromike
Created May 26, 2012 01:09
Show Gist options
  • Save kojiromike/2791583 to your computer and use it in GitHub Desktop.
Save kojiromike/2791583 to your computer and use it in GitHub Desktop.
#!/bin/bash
generate_lines() {
local length=$1 # Generate this many lines.
local input_file=$2 # Leave $2 and $3 empty to generate an independant file.
local -i input_length # The number of lines in the input file.
local freq_dupes=$(( ${3:-length+1} )) # Copy a line from $input every $freq_dupes iterations
if [[ -r $input_file ]]; then
input_length=$(( $(wc -l < "$input_file") ))
fi
for (( i=0; i<length; i++ )); do
if (( i && (i % freq_dupes == 0) )); then
# If i > input_length, input_line = i % input_length, otherwise i
awk "$((i > input_length ? i % input_length : i))"'==NR { print $0; exit }' "$input_file"
else
echo $RANDOM
fi
done
}
time_comm() {
echo 'Testing comm method.' >&2
time bash comm_method a b
}
time_grep() {
echo 'Testing grep method.' >&2
time bash grep_method a b
}
setup_tests() {
echo "Setting up tests"
local comm_path="$PWD/comm_method"
local grep_path="$PWD/grep_method"
if ! pushd "$(mktemp -dt "${0##*/}")"; then
echo "fail."
exit 1
fi
ln -s "$comm_path"
ln -s "$grep_path"
}
generic_test() {
local -i length=$1
local -i rep=$2
(( rep=rep?rep:10 )) # rep can't be zero
echo "Creating ${length}-line files with $rep repetitions in the second file."
generate_lines $length > a
generate_lines $length a $rep > b
echo '----------'
time_comm a b > /dev/null
echo
echo '----------'
time_grep a b > /dev/null
}
test_10() {
generic_test 10 $(( RANDOM % 10 ))
}
test_100() {
generic_test 100 $(( RANDOM % 100 ))
}
test_1000() {
generic_test 1000 $(( RANDOM % 1000 ))
}
test_10000() {
generic_test 10000 $(( RANDOM % 10000 ))
}
test_100000() {
generic_test 100000 $(( RANDOM % 100000 ))
}
run() {
setup_tests
echo
echo '=========='
echo
test_10
echo
echo '=========='
echo
test_100
echo
echo '=========='
echo
test_1000
echo
echo '=========='
echo
test_10000
echo
echo '=========='
echo
test_100000
}
run
#!/bin/bash
while read; do
if [[ $REPLY = $'\t'* ]] ; then
printf "%s\t0\n" "${REPLY#?}"
else
printf "%s\t1\n" "${REPLY}"
fi
done < <(comm -2 <(sort "$1") <(sort "$2"))
#!/bin/bash
grep -xF -f "$@" | sed $'s/$/\t1/'
grep -vxF -f "$@" | sed $'s/$/\t0/'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment