Last active
April 1, 2019 08:19
-
-
Save Niols/e709bc2b9886c569e7db1c009c7d09f8 to your computer and use it in GitHub Desktop.
Script to run Morbig and Dash on a Software Heritage archive and compare the results.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/sh | |
set -euC | |
cd "$(dirname "$0")" | |
command -v >/dev/null morbig | |
command -v >/dev/null dash | |
while [ $# -gt 0 ]; do | |
case $1 in | |
--corpus) | |
shift; readonly CORPUS=$1 | |
;; | |
--time) | |
readonly TIME=time | |
;; | |
*) | |
printf 'Unknown argument: %s\n' "$1" | |
exit 1 | |
esac | |
shift | |
done | |
if [ -z "${CORPUS+x}" ]; then | |
printf 'Corpus is mandatory.\n' | |
exit 1 | |
fi | |
[ -z "${TIME+x}" ] && readonly TIME= | |
rename_gunzip_corpus () { | |
for subcorpus in "$CORPUS"/*; do | |
{ | |
find "$subcorpus" -type f -exec mv '{}' '{}'.sh.gz ';' | |
find "$subcorpus" -type f -exec gunzip '{}' '+' | |
} & | |
done | |
wait | |
} | |
run_on () { | |
while read -r script; do | |
if ! [ -e "$script"."$1"-accepted ] && ! [ -e "script"."$1"-rejected ]; then | |
if $TIME "$@" "$script" > "$script"."$1"-output 2>&1; then | |
> "$script"."$1"-accepted | |
else | |
> "$script"."$1"-rejected | |
fi | |
fi | |
done | |
} | |
run_on_dir () { | |
target=$1; shift | |
[ "$1" = '--' ] || exit 1; shift | |
find "$target" -name '*.sh' | run_on "$@" | |
} | |
run_on_whole_corpus () { | |
for subcorpus in "$CORPUS"/*; do | |
run_on_dir "$subcorpus" -- "$@" & | |
done | |
wait | |
} | |
count () { | |
wc -l < "$1" | |
} | |
list_all () { | |
find "$CORPUS" -name '*.sh' \ | |
| sort \ | |
> all.list | |
count all.list | |
} | |
list_by_how () { | |
find "$CORPUS" -name '*.sh'."$1"-"$2" \ | |
| cut -d . -f 1-2 \ | |
| sort \ | |
> "$1"-"$2".list | |
count "$1"-"$2".list | |
} | |
list_diff_by_how () { | |
comm -12 "$2"-"$3".list "$4"-"$5".list > "$1".list | |
count "$1".list | |
} | |
list_morbig_fatal () { | |
while read -r script; do | |
if grep -q 'Fatal error' "$script".morbig-output; then | |
echo "$script" | |
fi | |
done \ | |
< all.list \ | |
> morbig-fatal-error.list | |
count morbig-fatal-error.list | |
} | |
list_morbig_only_wo_u_keyword () { | |
while read -r script; do | |
if grep -q ': Syntax error: "[^"]*" unexpected (expecting "[^"]*")' "$script".dash-output; then | |
echo "$script" | |
fi | |
done \ | |
< morbig-only.list \ | |
> morbig-only-w-u-keyword.list | |
comm -23 \ | |
morbig-only.list \ | |
morbig-only-w-u-keyword.list \ | |
> morbig-only-wo-u-keyword.list | |
printf 'Morbig only w/ unexp. keyword: ' | |
count morbig-only-w-u-keyword.list | |
printf 'Morbig only w/o unexp. keyword: ' | |
count morbig-only-wo-u-keyword.list | |
} | |
list_morbig_only_wo_u_keyword_wo_u_newline () { | |
while read -r script; do | |
if grep -q ': Syntax error: newline unexpected' "$script".dash-output; then | |
echo "$script" | |
fi | |
done \ | |
< morbig-only-wo-u-keyword.list \ | |
> morbig-only-wo-u-keyword-w-u-newline.list | |
comm -23 \ | |
morbig-only-wo-u-keyword.list \ | |
morbig-only-wo-u-keyword-w-u-newline.list \ | |
> morbig-only-wo-u-keyword-wo-u-newline.list | |
printf 'Morbig only w/o unexp. keyword w/ unexp. newline: ' | |
count morbig-only-wo-u-keyword-w-u-newline.list | |
printf 'Morbig only w/o unexp. keyword w/o unexp. newline: ' | |
count morbig-only-wo-u-keyword-wo-u-newline.list | |
} | |
list_morbig_only_wo_u_keyword_wo_u_newline_wo_200 () { | |
while read -r script; do | |
if grep -q '200>' "$script"; then | |
echo "$script" | |
fi | |
done \ | |
< morbig-only-wo-u-keyword-wo-u-newline.list \ | |
> morbig-only-wo-u-keyword-wo-u-newline-w-200.list | |
comm -23 \ | |
morbig-only-wo-u-keyword-wo-u-newline.list \ | |
morbig-only-wo-u-keyword-wo-u-newline-w-200.list \ | |
> morbig-only-wo-u-keyword-wo-u-newline-wo-200.list | |
printf 'Morbig only w/o unexp. keyword w/o unexp. newline w/ 200>: ' | |
count morbig-only-wo-u-keyword-wo-u-newline-w-200.list | |
printf 'Morbig only w/o unexp. keyword w/o unexp. newline w/o 200>: ' | |
count morbig-only-wo-u-keyword-wo-u-newline-wo-200.list | |
} | |
list_dash_only_wo_i_param () { | |
while read -r script; do | |
if grep -q ': Lexical error (Invalid variable parameter).' "$script".morbig-output; then | |
echo "$script" | |
fi | |
done \ | |
< dash-only.list \ | |
> dash-only-w-i-param.list | |
comm -23 \ | |
dash-only.list \ | |
dash-only-w-i-param.list \ | |
> dash-only-wo-i-param.list | |
printf 'Dash only w/ inval. param.: ' | |
count dash-only-w-i-param.list | |
printf 'Dash only w/o inval. param.: ' | |
count dash-only-wo-i-param.list | |
} | |
list_dash_only_wo_i_param_wo_syntax_err () { | |
while read -r script; do | |
if grep -q ': Syntax error.' "$script".morbig-output; then | |
echo "$script" | |
fi | |
done \ | |
< dash-only-wo-i-param.list \ | |
> dash-only-wo-i-param-w-syntax-err.list | |
comm -23 \ | |
dash-only-wo-i-param.list \ | |
dash-only-wo-i-param-w-syntax-err.list \ | |
> dash-only-wo-i-param-wo-syntax-err.list | |
printf 'Dash only w/o inval. param. w/ syntax err.: ' | |
count dash-only-wo-i-param-w-syntax-err.list | |
printf 'Dash only w/o inval. param. w/o syntax err.: ' | |
count dash-only-wo-i-param-wo-syntax-err.list | |
} | |
list_dash_only_wo_i_param_wo_syntax_err_wo_here_doc () { | |
while read -r script; do | |
if grep -q '<<' "$script"; then | |
echo "$script" | |
fi | |
done \ | |
< dash-only-wo-i-param-wo-syntax-err.list \ | |
> dash-only-wo-i-param-wo-syntax-err-w-here-doc.list | |
comm -23 \ | |
dash-only-wo-i-param-wo-syntax-err.list \ | |
dash-only-wo-i-param-wo-syntax-err-w-here-doc.list \ | |
> dash-only-wo-i-param-wo-syntax-err-wo-here-doc.list | |
printf 'Dash only w/o inval. param. w/o syntax err. w/ here doc.: ' | |
count dash-only-wo-i-param-wo-syntax-err-w-here-doc.list | |
printf 'Dash only w/o inval. param. w/o syntax err. w/o here doc.: ' | |
count dash-only-wo-i-param-wo-syntax-err-wo-here-doc.list | |
} | |
print_help () { | |
cat <<'EOF' | |
0. Uncompress archive. | |
1. Gunzip all files. | |
2. Cleanup corpus. | |
3. Cleanup local list files. | |
4. Run Morbig on the whole corpus. | |
5. Run Dash on the whole corpus. | |
10. List all scripts. | |
11. List scripts handled by Morbig. | |
12. List scripts handled by Dash. | |
13. List scripts handled by Morbig and Dash differently. | |
21. List scripts accepted by Morbig only that (don't) have an unexpected keyword. | |
22. List those scripts that (don't) have an unexpected newline. | |
23. List those scripts that (don't) use >200 | |
30. List scripts accepted by Dash only that (don't) have an invalid parameter. | |
31. List those scripts that (don't) have a syntax error. | |
32. List those scripts that (don't) have a here document. | |
40. List scripts on which Morbig fails fatally. | |
a. Run everything in this list. You'll get asked where to start. | |
h. Print this help. | |
EOF | |
} | |
unknown_choice () { | |
printf 'Unknown choice: %s\n' "$choice" | |
print_help | |
} | |
print_help | |
printf 'Enter your choice below:\n' | |
auto=false | |
while true; do | |
if $auto; then | |
if [ -z "$choice" ]; then | |
printf '\nAuto mode: where to start?\n>>> ' | |
read -r choice | |
else | |
choice=$((choice + 1)) | |
printf '\n>>> %d\n' "$choice" | |
fi | |
else | |
printf '\n>>> '; read -r choice | |
fi | |
start=$(date +%s) | |
case "$choice" in | |
a|A) | |
auto=true | |
auto_start=$(date +%s) | |
choice= | |
;; | |
h|H|'?') | |
print_help | |
;; | |
0) | |
if [ -e "$CORPUS" ]; then | |
printf 'Cannot create corpus: it already exists.\n' | |
exit 1 | |
else | |
mkdir "$CORPUS" | |
printf 'Where is that archive?\n>>> '; read archive | |
(cd "$CORPUS" && tar xf "$archive") | |
fi | |
;; | |
1) | |
rename_gunzip_corpus | |
;; | |
2) | |
find "$CORPUS" -type f -not -name '*.sh' -delete | |
;; | |
3) | |
rm -f ./*.list | |
;; | |
4) | |
run_on_whole_corpus morbig --as none --disable-alias-expansion | |
;; | |
5) | |
run_on_whole_corpus dash -n | |
;; | |
10) | |
printf 'Total number of scripts: ' | |
list_all | |
;; | |
11) | |
printf 'Accepted by Morbig: ' | |
list_by_how morbig accepted | |
printf 'Rejected by Morbig: ' | |
list_by_how morbig rejected | |
;; | |
12) | |
printf 'Accepted by Dash: ' | |
list_by_how dash accepted | |
printf 'Rejected by Dash: ' | |
list_by_how dash rejected | |
;; | |
13) | |
printf 'Accepted by both: ' | |
list_diff_by_how both-accepted morbig accepted dash accepted | |
printf 'Rejected by both: ' | |
list_diff_by_how both-rejected morbig rejected dash rejected | |
printf 'Accepted by Morbig only: ' | |
list_diff_by_how morbig-only morbig accepted dash rejected | |
printf 'Accepted by Dash only: ' | |
list_diff_by_how dash-only morbig rejected dash accepted | |
;; | |
21) | |
list_morbig_only_wo_u_keyword | |
;; | |
22) | |
list_morbig_only_wo_u_keyword_wo_u_newline | |
;; | |
23) | |
list_morbig_only_wo_u_keyword_wo_u_newline_wo_200 | |
;; | |
30) | |
list_dash_only_wo_i_param | |
;; | |
31) | |
list_dash_only_wo_i_param_wo_syntax_err | |
;; | |
32) | |
list_dash_only_wo_i_param_wo_syntax_err_wo_here_doc | |
;; | |
40) | |
printf 'Where Morbig fails fatally: ' | |
list_morbig_fatal | |
;; | |
?|1?|2?|3?|4?) | |
if ! $auto; then | |
unknown_choice "$choice" | |
fi | |
;; | |
50) | |
if $auto; then | |
printf 'End of auto mode.\n' | |
auto=false | |
auto_end=$(date +%s) | |
printf 'Time taken in auto mode: %ds.\n' $((auto_end - auto_start)) | |
else | |
unknown_choice "$choice" | |
fi | |
;; | |
*) | |
unknown_choice "$choice" | |
;; | |
esac | |
end=$(date +%s) | |
printf 'Time taken: %ds.\n' $((end - start)) | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment