arzga/wer.sh

## wer.sh
#!/bin/bash

# Check for requirements:

if ! command -v sclite &> /dev/null; then
  echo "Error: 'sclite' not found in path."
  echo
  echo "You need to install it for transcript substitution/insertion/deletion analysis."
  echo "See: https://github.com/usnistgov/SCTK"
  exit 1
fi

if ! command -v jq &> /dev/null; then
  echo "Error: 'jq' not found in path."
  echo
  echo "You need to install it for jquery parsing."
  echo "See: https://stedolan.github.io/jq/"
  exit 1
fi

# Parse optional arguments

FIELD1=.transcript
FIELD2=.hypothesis

while [ $# -gt 0 ]
do
  case "$1" in
      -tt) FIELD1=.transcript
           FIELD2=.transcript
           ;;
      -hh) FIELD1=.hypothesis
           FIELD2=.hypothesis
           ;;
      -th) FIELD1=.transcript
           FIELD2=.hypothesis
           ;;
   --help) HELP=1
           ;;
       -h) HELP=1
           ;;
   --mode) OUT_MODE=$2
           shift
           ;;
       -m) OUT_MODE=$2
           shift
           ;;
        *) break
           ;;
  esac
  shift
done

# Display usage if requested so

if [[ $# -eq 0 || $HELP == 1 ]]; then
  echo "Analyze word error rate (WER) and other sentence metrics between REF and HYP transcripts using 'sclite'."
  echo
  echo "usage: wer.sh [--help] [-th|-hh|-tt] [--mode <mode>]"
  echo "              <ref.ac> [<hyp.ac>]"
  echo
  echo "If only <ref.ac> is given, both REF and HYP fields are picked from it."
  echo "If both <ref.ac> and <hyp.ac> are given, REF field is picked from <ref.ac> and HYP from <hyp.ac>."
  echo
  echo "Select fields to compare:"
  echo
  echo "-th  Compare 'transcript' (REF) to 'hypothesis' (HYP) field (default)"
  echo "-hh  Compare 'hypothesis' (REF) to 'hypothesis' (HYP) field"
  echo "-tt  Compare 'transcript' (REF) to 'transcript' (HYP) field"
  echo
  echo "Select the 'sclite' report to display:"
  echo "-m <mode>, --mode <mode>"
  echo "     'wer' for word error rate (WER) (default)"
  echo "     'sum' for ins/del/substitution summary"
  echo "     'snt' for sentence diff report"
  echo "     'dtl' for detailed report"
  echo "     'prf' for detailed report, no word wrap"
  echo "     'err' above with errors only"
  exit 0
fi

# Extract .ac file path(s) from arguments

AC_FILE1=$1

if [ -z "$2" ]; then
  AC_FILE2=$1
else
  AC_FILE2=$2
fi

# Prepare a temp folder and clean it up upon exit with trap

function finish {
  if [ ! -z "$TEMP_FOLDER" ]; then
    rm -rf $TEMP_FOLDER
  fi
}

TEMP_FOLDER=$(mktemp -d) || exit 1
trap finish EXIT

# Prepare 'sclite' compatible transcript (.trn) files from jsonl (.ac) files

jq -r '. | ['$FIELD1'," (sp1-",.utterance,")"] | join("")' $AC_FILE1 > $TEMP_FOLDER/ref.trn
jq -r '. | ['$FIELD2'," (sp1-",.utterance,")"] | join("")' $AC_FILE2 > $TEMP_FOLDER/hyp.trn

# Compare and analyze with 'sclite'

if [[ -z "$OUT_MODE" || $OUT_MODE == "wer" ]]; then
  >&2 echo "Displaying word error rate (WER). Use 'wer.sh --help' for more options."
  sclite -r $TEMP_FOLDER/ref.trn -h $TEMP_FOLDER/hyp.trn -i wsj -o dtl stdout|grep "Percent Total Error"
elif [[ $OUT_MODE == "err" ]]; then
  sclite -r $TEMP_FOLDER/ref.trn -h $TEMP_FOLDER/hyp.trn -i wsj -o prf stdout|grep -E -B 5 "^Eval:.*\S+.*$"
else
  sclite -r $TEMP_FOLDER/ref.trn -h $TEMP_FOLDER/hyp.trn -i wsj -o $OUT_MODE stdout
fi
	#!/bin/bash

	# Check for requirements:

	if ! command -v sclite &> /dev/null; then
	echo "Error: 'sclite' not found in path."
	echo
	echo "You need to install it for transcript substitution/insertion/deletion analysis."
	echo "See: https://github.com/usnistgov/SCTK"
	exit 1
	fi

	if ! command -v jq &> /dev/null; then
	echo "Error: 'jq' not found in path."
	echo
	echo "You need to install it for jquery parsing."
	echo "See: https://stedolan.github.io/jq/"
	exit 1
	fi

	# Parse optional arguments

	FIELD1=.transcript
	FIELD2=.hypothesis

	while [ $# -gt 0 ]
	do
	case "$1" in
	-tt) FIELD1=.transcript
	FIELD2=.transcript
	;;
	-hh) FIELD1=.hypothesis
	FIELD2=.hypothesis
	;;
	-th) FIELD1=.transcript
	FIELD2=.hypothesis
	;;
	--help) HELP=1
	;;
	-h) HELP=1
	;;
	--mode) OUT_MODE=$2
	shift
	;;
	-m) OUT_MODE=$2
	shift
	;;
	*) break
	;;
	esac
	shift
	done

	# Display usage if requested so

	if [[ $# -eq 0 \|\| $HELP == 1 ]]; then
	echo "Analyze word error rate (WER) and other sentence metrics between REF and HYP transcripts using 'sclite'."
	echo
	echo "usage: wer.sh [--help] [-th\|-hh\|-tt] [--mode <mode>]"
	echo " <ref.ac> [<hyp.ac>]"
	echo
	echo "If only <ref.ac> is given, both REF and HYP fields are picked from it."
	echo "If both <ref.ac> and <hyp.ac> are given, REF field is picked from <ref.ac> and HYP from <hyp.ac>."
	echo
	echo "Select fields to compare:"
	echo
	echo "-th Compare 'transcript' (REF) to 'hypothesis' (HYP) field (default)"
	echo "-hh Compare 'hypothesis' (REF) to 'hypothesis' (HYP) field"
	echo "-tt Compare 'transcript' (REF) to 'transcript' (HYP) field"
	echo
	echo "Select the 'sclite' report to display:"
	echo "-m <mode>, --mode <mode>"
	echo " 'wer' for word error rate (WER) (default)"
	echo " 'sum' for ins/del/substitution summary"
	echo " 'snt' for sentence diff report"
	echo " 'dtl' for detailed report"
	echo " 'prf' for detailed report, no word wrap"
	echo " 'err' above with errors only"
	exit 0
	fi

	# Extract .ac file path(s) from arguments

	AC_FILE1=$1

	if [ -z "$2" ]; then
	AC_FILE2=$1
	else
	AC_FILE2=$2
	fi

	# Prepare a temp folder and clean it up upon exit with trap

	function finish {
	if [ ! -z "$TEMP_FOLDER" ]; then
	rm -rf $TEMP_FOLDER
	fi
	}

	TEMP_FOLDER=$(mktemp -d) \|\| exit 1
	trap finish EXIT

	# Prepare 'sclite' compatible transcript (.trn) files from jsonl (.ac) files

	jq -r '. \| ['$FIELD1'," (sp1-",.utterance,")"] \| join("")' $AC_FILE1 > $TEMP_FOLDER/ref.trn
	jq -r '. \| ['$FIELD2'," (sp1-",.utterance,")"] \| join("")' $AC_FILE2 > $TEMP_FOLDER/hyp.trn

	# Compare and analyze with 'sclite'

	if [[ -z "$OUT_MODE" \|\| $OUT_MODE == "wer" ]]; then
	>&2 echo "Displaying word error rate (WER). Use 'wer.sh --help' for more options."
	sclite -r $TEMP_FOLDER/ref.trn -h $TEMP_FOLDER/hyp.trn -i wsj -o dtl stdout\|grep "Percent Total Error"
	elif [[ $OUT_MODE == "err" ]]; then
	sclite -r $TEMP_FOLDER/ref.trn -h $TEMP_FOLDER/hyp.trn -i wsj -o prf stdout\|grep -E -B 5 "^Eval:.\S+.$"
	else
	sclite -r $TEMP_FOLDER/ref.trn -h $TEMP_FOLDER/hyp.trn -i wsj -o $OUT_MODE stdout
	fi