Skip to content

Instantly share code, notes, and snippets.

@Niols
Last active April 1, 2019 08:19
Show Gist options
  • Save Niols/e709bc2b9886c569e7db1c009c7d09f8 to your computer and use it in GitHub Desktop.
Save Niols/e709bc2b9886c569e7db1c009c7d09f8 to your computer and use it in GitHub Desktop.
Script to run Morbig and Dash on a Software Heritage archive and compare the results.
#!/bin/sh
set -euC
cd "$(dirname "$0")"
command -v >/dev/null morbig
command -v >/dev/null dash
while [ $# -gt 0 ]; do
case $1 in
--corpus)
shift; readonly CORPUS=$1
;;
--time)
readonly TIME=time
;;
*)
printf 'Unknown argument: %s\n' "$1"
exit 1
esac
shift
done
if [ -z "${CORPUS+x}" ]; then
printf 'Corpus is mandatory.\n'
exit 1
fi
[ -z "${TIME+x}" ] && readonly TIME=
rename_gunzip_corpus () {
for subcorpus in "$CORPUS"/*; do
{
find "$subcorpus" -type f -exec mv '{}' '{}'.sh.gz ';'
find "$subcorpus" -type f -exec gunzip '{}' '+'
} &
done
wait
}
run_on () {
while read -r script; do
if ! [ -e "$script"."$1"-accepted ] && ! [ -e "script"."$1"-rejected ]; then
if $TIME "$@" "$script" > "$script"."$1"-output 2>&1; then
> "$script"."$1"-accepted
else
> "$script"."$1"-rejected
fi
fi
done
}
run_on_dir () {
target=$1; shift
[ "$1" = '--' ] || exit 1; shift
find "$target" -name '*.sh' | run_on "$@"
}
run_on_whole_corpus () {
for subcorpus in "$CORPUS"/*; do
run_on_dir "$subcorpus" -- "$@" &
done
wait
}
count () {
wc -l < "$1"
}
list_all () {
find "$CORPUS" -name '*.sh' \
| sort \
> all.list
count all.list
}
list_by_how () {
find "$CORPUS" -name '*.sh'."$1"-"$2" \
| cut -d . -f 1-2 \
| sort \
> "$1"-"$2".list
count "$1"-"$2".list
}
list_diff_by_how () {
comm -12 "$2"-"$3".list "$4"-"$5".list > "$1".list
count "$1".list
}
list_morbig_fatal () {
while read -r script; do
if grep -q 'Fatal error' "$script".morbig-output; then
echo "$script"
fi
done \
< all.list \
> morbig-fatal-error.list
count morbig-fatal-error.list
}
list_morbig_only_wo_u_keyword () {
while read -r script; do
if grep -q ': Syntax error: "[^"]*" unexpected (expecting "[^"]*")' "$script".dash-output; then
echo "$script"
fi
done \
< morbig-only.list \
> morbig-only-w-u-keyword.list
comm -23 \
morbig-only.list \
morbig-only-w-u-keyword.list \
> morbig-only-wo-u-keyword.list
printf 'Morbig only w/ unexp. keyword: '
count morbig-only-w-u-keyword.list
printf 'Morbig only w/o unexp. keyword: '
count morbig-only-wo-u-keyword.list
}
list_morbig_only_wo_u_keyword_wo_u_newline () {
while read -r script; do
if grep -q ': Syntax error: newline unexpected' "$script".dash-output; then
echo "$script"
fi
done \
< morbig-only-wo-u-keyword.list \
> morbig-only-wo-u-keyword-w-u-newline.list
comm -23 \
morbig-only-wo-u-keyword.list \
morbig-only-wo-u-keyword-w-u-newline.list \
> morbig-only-wo-u-keyword-wo-u-newline.list
printf 'Morbig only w/o unexp. keyword w/ unexp. newline: '
count morbig-only-wo-u-keyword-w-u-newline.list
printf 'Morbig only w/o unexp. keyword w/o unexp. newline: '
count morbig-only-wo-u-keyword-wo-u-newline.list
}
list_morbig_only_wo_u_keyword_wo_u_newline_wo_200 () {
while read -r script; do
if grep -q '200>' "$script"; then
echo "$script"
fi
done \
< morbig-only-wo-u-keyword-wo-u-newline.list \
> morbig-only-wo-u-keyword-wo-u-newline-w-200.list
comm -23 \
morbig-only-wo-u-keyword-wo-u-newline.list \
morbig-only-wo-u-keyword-wo-u-newline-w-200.list \
> morbig-only-wo-u-keyword-wo-u-newline-wo-200.list
printf 'Morbig only w/o unexp. keyword w/o unexp. newline w/ 200>: '
count morbig-only-wo-u-keyword-wo-u-newline-w-200.list
printf 'Morbig only w/o unexp. keyword w/o unexp. newline w/o 200>: '
count morbig-only-wo-u-keyword-wo-u-newline-wo-200.list
}
list_dash_only_wo_i_param () {
while read -r script; do
if grep -q ': Lexical error (Invalid variable parameter).' "$script".morbig-output; then
echo "$script"
fi
done \
< dash-only.list \
> dash-only-w-i-param.list
comm -23 \
dash-only.list \
dash-only-w-i-param.list \
> dash-only-wo-i-param.list
printf 'Dash only w/ inval. param.: '
count dash-only-w-i-param.list
printf 'Dash only w/o inval. param.: '
count dash-only-wo-i-param.list
}
list_dash_only_wo_i_param_wo_syntax_err () {
while read -r script; do
if grep -q ': Syntax error.' "$script".morbig-output; then
echo "$script"
fi
done \
< dash-only-wo-i-param.list \
> dash-only-wo-i-param-w-syntax-err.list
comm -23 \
dash-only-wo-i-param.list \
dash-only-wo-i-param-w-syntax-err.list \
> dash-only-wo-i-param-wo-syntax-err.list
printf 'Dash only w/o inval. param. w/ syntax err.: '
count dash-only-wo-i-param-w-syntax-err.list
printf 'Dash only w/o inval. param. w/o syntax err.: '
count dash-only-wo-i-param-wo-syntax-err.list
}
list_dash_only_wo_i_param_wo_syntax_err_wo_here_doc () {
while read -r script; do
if grep -q '<<' "$script"; then
echo "$script"
fi
done \
< dash-only-wo-i-param-wo-syntax-err.list \
> dash-only-wo-i-param-wo-syntax-err-w-here-doc.list
comm -23 \
dash-only-wo-i-param-wo-syntax-err.list \
dash-only-wo-i-param-wo-syntax-err-w-here-doc.list \
> dash-only-wo-i-param-wo-syntax-err-wo-here-doc.list
printf 'Dash only w/o inval. param. w/o syntax err. w/ here doc.: '
count dash-only-wo-i-param-wo-syntax-err-w-here-doc.list
printf 'Dash only w/o inval. param. w/o syntax err. w/o here doc.: '
count dash-only-wo-i-param-wo-syntax-err-wo-here-doc.list
}
print_help () {
cat <<'EOF'
0. Uncompress archive.
1. Gunzip all files.
2. Cleanup corpus.
3. Cleanup local list files.
4. Run Morbig on the whole corpus.
5. Run Dash on the whole corpus.
10. List all scripts.
11. List scripts handled by Morbig.
12. List scripts handled by Dash.
13. List scripts handled by Morbig and Dash differently.
21. List scripts accepted by Morbig only that (don't) have an unexpected keyword.
22. List those scripts that (don't) have an unexpected newline.
23. List those scripts that (don't) use >200
30. List scripts accepted by Dash only that (don't) have an invalid parameter.
31. List those scripts that (don't) have a syntax error.
32. List those scripts that (don't) have a here document.
40. List scripts on which Morbig fails fatally.
a. Run everything in this list. You'll get asked where to start.
h. Print this help.
EOF
}
unknown_choice () {
printf 'Unknown choice: %s\n' "$choice"
print_help
}
print_help
printf 'Enter your choice below:\n'
auto=false
while true; do
if $auto; then
if [ -z "$choice" ]; then
printf '\nAuto mode: where to start?\n>>> '
read -r choice
else
choice=$((choice + 1))
printf '\n>>> %d\n' "$choice"
fi
else
printf '\n>>> '; read -r choice
fi
start=$(date +%s)
case "$choice" in
a|A)
auto=true
auto_start=$(date +%s)
choice=
;;
h|H|'?')
print_help
;;
0)
if [ -e "$CORPUS" ]; then
printf 'Cannot create corpus: it already exists.\n'
exit 1
else
mkdir "$CORPUS"
printf 'Where is that archive?\n>>> '; read archive
(cd "$CORPUS" && tar xf "$archive")
fi
;;
1)
rename_gunzip_corpus
;;
2)
find "$CORPUS" -type f -not -name '*.sh' -delete
;;
3)
rm -f ./*.list
;;
4)
run_on_whole_corpus morbig --as none --disable-alias-expansion
;;
5)
run_on_whole_corpus dash -n
;;
10)
printf 'Total number of scripts: '
list_all
;;
11)
printf 'Accepted by Morbig: '
list_by_how morbig accepted
printf 'Rejected by Morbig: '
list_by_how morbig rejected
;;
12)
printf 'Accepted by Dash: '
list_by_how dash accepted
printf 'Rejected by Dash: '
list_by_how dash rejected
;;
13)
printf 'Accepted by both: '
list_diff_by_how both-accepted morbig accepted dash accepted
printf 'Rejected by both: '
list_diff_by_how both-rejected morbig rejected dash rejected
printf 'Accepted by Morbig only: '
list_diff_by_how morbig-only morbig accepted dash rejected
printf 'Accepted by Dash only: '
list_diff_by_how dash-only morbig rejected dash accepted
;;
21)
list_morbig_only_wo_u_keyword
;;
22)
list_morbig_only_wo_u_keyword_wo_u_newline
;;
23)
list_morbig_only_wo_u_keyword_wo_u_newline_wo_200
;;
30)
list_dash_only_wo_i_param
;;
31)
list_dash_only_wo_i_param_wo_syntax_err
;;
32)
list_dash_only_wo_i_param_wo_syntax_err_wo_here_doc
;;
40)
printf 'Where Morbig fails fatally: '
list_morbig_fatal
;;
?|1?|2?|3?|4?)
if ! $auto; then
unknown_choice "$choice"
fi
;;
50)
if $auto; then
printf 'End of auto mode.\n'
auto=false
auto_end=$(date +%s)
printf 'Time taken in auto mode: %ds.\n' $((auto_end - auto_start))
else
unknown_choice "$choice"
fi
;;
*)
unknown_choice "$choice"
;;
esac
end=$(date +%s)
printf 'Time taken: %ds.\n' $((end - start))
done
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment