Skip to content

Instantly share code, notes, and snippets.

@lucasRolff
Forked from moisseev/rspamc_learn.sh
Created May 4, 2022 09:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lucasRolff/fd9435400acbda1febaa09a97a51e93d to your computer and use it in GitHub Desktop.
Save lucasRolff/fd9435400acbda1febaa09a97a51e93d to your computer and use it in GitHub Desktop.
Train Rspamd using Dovecot folders ( * the script expunges folders *)
#!/bin/sh
USER="spam@example.com"
FOLDER_PREFIX="train_"
FOLDER_HAM="${FOLDER_PREFIX}ham"
FOLDER_PROB="${FOLDER_PREFIX}prob"
FOLDER_SPAM="${FOLDER_PREFIX}spam"
DOVEADM="/usr/local/bin/doveadm"
RSPAMADM="/usr/local/bin/rspamadm"
RSPAMC_CONNECT="-h localhost:11334" # rspamc (--connect|-h) option
SETTINGS='{"groups_enabled":["fuzzy","statistics"],"symbols_enabled":["LOCAL_NO_LOG_STAT"]}'
#-------------------------------
MODE_BULK=
MODE_SCAN=
COLOR=
PROGNAME=`basename $0`
PROGDESCR="$PROGNAME - learn Rspamd using Dovecot folders"
usage () {
cat >&2 <<EOF
$PROGDESCR
usage: $PROGNAME [-b|-s|-c|-h]
-b bulk learning (do not scan, do not synchronize fuzzy after each learn)
-s scan messages, do not learn
-c colorize output (requires histring)
-h brief help
EOF
}
while getopts bsch opt; do
case "$opt" in
b)
MODE_BULK=1;;
s)
MODE_SCAN=1;;
c)
COLOR=1;;
# \? - unknown flag
h|?)
usage
[ $opt = "h" ] && exit 0
exit 1;;
esac
done
fn_fetch_text() {
$DOVEADM fetch -u $USER text $ID
}
fn_filter_learn_output() {
egrep -v '^(Results for file: stdin \(0\.[[:digit:]]{3} seconds\)|filename = "stdin";|scan_time = 0\.[[:digit:]]{6};|hashes \[| {4}"[[:xdigit:]]{128}",|]|^$)$'
}
fn_fuzzy_sync() {
$RSPAMADM control fuzzy_sync > /dev/null
}
fn_list_symbols() {
fn_fetch_text | rspamc $RSPAMC_CONNECT --header=settings:$SETTINGS symbols | egrep -v '^Results for file: stdin|^\[Metric: default\]|^Action: |^Spam: |^Score: |^Urls: |^Emails: |^$'
echo
}
fn_list_symbols_short() {
fn_fetch_text | rspamc $RSPAMC_CONNECT --header=settings:$SETTINGS symbols | egrep -v '^Results for file: stdin|^\[Metric: default\]|^Action: |^Spam: |^Score: |^Urls: |^Emails: |^$|^Message-ID: '
echo
}
fn_highlight() {
[ $COLOR ] && (
histring -c green -fE '^success ' |
histring -c white -fE '^error = "all learn conditions denied learning (ham|spam) in default classifier"' |
histring -c white -fE '^HTTP error: 404, No content to generate fuzzy for flag [0-9]+' |
histring -fE '^error[ :]'
) || cat
}
fn_highlight_cathegory() {
[ $COLOR ] && (
histring -c green -fE ' ham$' |
histring -E ' spam$'
) || cat
}
[ $MODE_SCAN ] && {
for EACH_FOLDER in $FOLDER_HAM $FOLDER_PROB $FOLDER_SPAM; do
echo -e "==> $EACH_FOLDER"
echo
$DOVEADM search -u $USER mailbox $EACH_FOLDER UNDELETED | \
while read GUID UID; do
ID="mailbox-guid $GUID uid $UID"
fn_list_symbols
done
done
exit 0;
}
$DOVEADM search -u $USER \( mailbox $FOLDER_HAM OR mailbox $FOLDER_PROB OR mailbox $FOLDER_SPAM \) UNDELETED | \
while read GUID UID; do
ID="mailbox-guid $GUID uid $UID"
CATHEGORY=`$DOVEADM -f flow fetch -u $USER mailbox $ID \
| sed -En "s|^mailbox=${FOLDER_PREFIX}||p"`
echo -e "==> $CATHEGORY" | fn_highlight_cathegory
case "$CATHEGORY" in
spam)
# FUZZY_DENIED
FUZZY_FLAG=11
FUZZY_WEIGHT=20;;
prob)
CATHEGORY="spam"
# FUZZY_PROB
FUZZY_FLAG=12;
FUZZY_WEIGHT=6;;
ham)
# FUZZY_WHITE
FUZZY_FLAG=13;
FUZZY_WEIGHT=20;;
*)
echo "Unknown cathegory: $CATHEGORY"
exit 1;;
esac
[ $MODE_BULK ] || fn_list_symbols
printf '%-12s' "learn_$CATHEGORY:"
fn_fetch_text | rspamc $RSPAMC_CONNECT learn_$CATHEGORY | fn_filter_learn_output | fn_highlight
printf '%-12s' "fuzzy_add:"
fn_fetch_text | rspamc $RSPAMC_CONNECT -w $FUZZY_WEIGHT -f $FUZZY_FLAG fuzzy_add | fn_filter_learn_output | fn_highlight
echo
[ $MODE_BULK ] || {
fn_fuzzy_sync
fn_list_symbols_short
}
$DOVEADM flags add -u $USER '\Deleted' $ID
done
[ $MODE_BULK ] && fn_fuzzy_sync
#$DOVEADM expunge -u $USER mailbox Trash ALL
# Workaround for dovecot2-2.2.21
$DOVEADM expunge -u $USER mailbox Trash 1:*
$DOVEADM expunge -u $USER \
\( \
mailbox $FOLDER_HAM \
OR mailbox $FOLDER_PROB \
OR mailbox $FOLDER_SPAM \
OR mailbox Inbox \
OR mailbox Junk \
\) \
DELETED
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment