Created
April 3, 2019 17:32
-
-
Save kadirmalak/c3465aaeeec5a233a36e7458a6b3eaad to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env bash | |
if [ "$#" -ne 4 ]; then | |
echo "usage: $0 <normal-label> <anomaly-label> <K-percent> <actual__predicted__confidence.csv>" | |
exit 1 | |
fi | |
LABEL_0=$1 | |
LABEL_1=$2 | |
K=$3 # percent | |
FILE=$4 | |
if [ ! -f $FILE ]; then | |
echo "file not found: $FILE" | |
exit 1 | |
fi | |
if [ "$K" -lt 1 -o "$K" -gt 100 ]; then | |
echo "K must be between 1 and 100" | |
exit 1 | |
fi | |
COL_COUNT=`awk -F, '{print NF; exit}' $FILE` | |
if [ "$COL_COUNT" -ne 3 ]; then | |
echo "column count must be 3: (ACTUAL,PREDICTED,CONFIDENCE)" | |
exit 1 | |
fi | |
COL_CHECK=`awk -F, '{print NF}' $FILE | sort | uniq -c | wc -l` | |
if [ "$COL_CHECK" -ne 1 ]; then | |
echo "not all rows have 3 columns, corrupt csv..." | |
exit 1 | |
fi | |
echo "top: $K%" | |
LINE_COUNT=`wc -l $FILE` | |
echo "line count: $LINE_COUNT" | |
HEAD=`awk -v L="$LINE_COUNT" -v K="$K" 'BEGIN{print int(L * K / 100)}'` | |
echo "top: $HEAD lines" | |
A_1_COUNT=`awk -F, -v L1="$LABEL_1" 'BEGIN{count=0} ($1==L1){count++} END{print count}' $FILE` | |
echo "total A1 count: $A_1_COUNT" | |
# - convert confidence to confidence1 | |
# - sort by confidence desc | |
# - select top HEAD rows | |
# - count actual 1s | |
A_1_COUNT_IN_REGION=`awk -F, -v L0="$LABEL_0" -v L1="$LABEL_1" '($2==L1){print $1 "," $2 "," $3} ($2==L0){print $1 "," $2 "," 1-$3}' $FILE | sort -t, -nk3 -r | head -n $HEAD | awk -F, -v L1="$LABEL_1" 'BEGIN{count=0} ($1==L1){count++} END{print count}'` | |
echo "total A1 count in top $HEAD after sort: $A_1_COUNT_IN_REGION" | |
if [ "$A_1_COUNT_IN_REGION" -gt 0 ]; then | |
SCORE=`awk -v A="$A_1_COUNT_IN_REGION" -v B="$A_1_COUNT" 'BEGIN{print A / B}'` | |
echo "score: $SCORE" | |
else | |
echo "score: 0" | |
echo "notice: you may need to check your label names" | |
fi | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment