Skip to content

Instantly share code, notes, and snippets.

@kadirmalak
Created April 3, 2019 17:32
Show Gist options
  • Save kadirmalak/c3465aaeeec5a233a36e7458a6b3eaad to your computer and use it in GitHub Desktop.
Save kadirmalak/c3465aaeeec5a233a36e7458a6b3eaad to your computer and use it in GitHub Desktop.
#!/usr/bin/env bash
if [ "$#" -ne 4 ]; then
echo "usage: $0 <normal-label> <anomaly-label> <K-percent> <actual__predicted__confidence.csv>"
exit 1
fi
LABEL_0=$1
LABEL_1=$2
K=$3 # percent
FILE=$4
if [ ! -f $FILE ]; then
echo "file not found: $FILE"
exit 1
fi
if [ "$K" -lt 1 -o "$K" -gt 100 ]; then
echo "K must be between 1 and 100"
exit 1
fi
COL_COUNT=`awk -F, '{print NF; exit}' $FILE`
if [ "$COL_COUNT" -ne 3 ]; then
echo "column count must be 3: (ACTUAL,PREDICTED,CONFIDENCE)"
exit 1
fi
COL_CHECK=`awk -F, '{print NF}' $FILE | sort | uniq -c | wc -l`
if [ "$COL_CHECK" -ne 1 ]; then
echo "not all rows have 3 columns, corrupt csv..."
exit 1
fi
echo "top: $K%"
LINE_COUNT=`wc -l $FILE`
echo "line count: $LINE_COUNT"
HEAD=`awk -v L="$LINE_COUNT" -v K="$K" 'BEGIN{print int(L * K / 100)}'`
echo "top: $HEAD lines"
A_1_COUNT=`awk -F, -v L1="$LABEL_1" 'BEGIN{count=0} ($1==L1){count++} END{print count}' $FILE`
echo "total A1 count: $A_1_COUNT"
# - convert confidence to confidence1
# - sort by confidence desc
# - select top HEAD rows
# - count actual 1s
A_1_COUNT_IN_REGION=`awk -F, -v L0="$LABEL_0" -v L1="$LABEL_1" '($2==L1){print $1 "," $2 "," $3} ($2==L0){print $1 "," $2 "," 1-$3}' $FILE | sort -t, -nk3 -r | head -n $HEAD | awk -F, -v L1="$LABEL_1" 'BEGIN{count=0} ($1==L1){count++} END{print count}'`
echo "total A1 count in top $HEAD after sort: $A_1_COUNT_IN_REGION"
if [ "$A_1_COUNT_IN_REGION" -gt 0 ]; then
SCORE=`awk -v A="$A_1_COUNT_IN_REGION" -v B="$A_1_COUNT" 'BEGIN{print A / B}'`
echo "score: $SCORE"
else
echo "score: 0"
echo "notice: you may need to check your label names"
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment