Skip to content

Instantly share code, notes, and snippets.

@kensanata
Last active December 21, 2015 04:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kensanata/6248849 to your computer and use it in GitHub Desktop.
Save kensanata/6248849 to your computer and use it in GitHub Desktop.
Report on the anti-spam measures that are most effective on Emacs Wiki.
;;; anti-spam-report.el -- working with Emacs Wiki's anti-spam data
;;
;; Copyright (C) 2013 Alex Schroeder <alex@gnu.org>
;;
;; This program is free software: you can redistribute it and/or modify it under
;; the terms of the GNU General Public License as published by the Free Software
;; Foundation, either version 3 of the License, or (at your option) any later
;; version.
;;
;; This program is distributed in the hope that it will be useful, but WITHOUT
;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
;; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
;; details.
;;
;; You should have received a copy of the GNU General Public License along with
;; GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;
;; You probably want to download all the necessary files to a local
;; directory, first. Use `anti-spam-download-files' to do that.
;;
;; Next, prepare the report using `anti-spam-report'.
;;
;;; Code:
(defun anti-spam-download-files (dir log hosts content regexps)
"Download spammer.log, BannedHosts, BannedContent and BannedRegexps."
(interactive
(list (read-directory-name
"Directory to save them to? "
"/tmp")
(read-from-minibuffer
"spammer.log URL? "
"http://www.emacswiki.org/emacs/spammer.log")
(read-from-minibuffer
"BannedHosts URL? "
"http://www.emacswiki.org/emacs/raw/BannedHosts")
(read-from-minibuffer
"BannedContent URL? "
"http://www.emacswiki.org/emacs/raw/BannedContent")
(read-from-minibuffer
"BannedRegexps URL? "
"http://www.emacswiki.org/emacs/raw/BannedRegexps")))
(dolist (url (list log hosts content regexps))
(url-retrieve url
(lambda (status dir url)
(goto-char (point-min))
(forward-paragraph 1)
(delete-region (point-min) (1+ (point)))
(write-file
(expand-file-name
(url-file-nondirectory url)
dir) t))
(list dir url)))
(message "Downloading..."))
(defun anti-spam-extract-rule (str)
"Trim the cruft."
(cond ((string-match "\"\\(.*\\)\" matched.*/Banned\\(Regexps\\|Content\\)"
str)
(concat "Banned" (match-string 2 str) ": " (match-string 1 str)))
((string-match "Host or IP matched \\(.*\\)" str)
(concat "BannedHosts: " (match-string 1 str)))
(t str)))
(defun anti-spam-load-spammer-log (filename)
"Read spammer.log from Emacs Wiki.
Returns (REASONS COUNT DAYS).
REASONS is a hash-table of reasons extracted.
COUNT is the number of lines parsed.
DAYS is the number of individual days found."
(let ((days (make-hash-table :test 'equal))
(reasons (make-hash-table :test 'equal))
count
(lines (with-temp-buffer
(message "Reading %s..." (file-name-nondirectory filename))
(insert-file-contents filename)
;; some fixes
(message "Fixing %s..." (file-name-nondirectory filename))
(goto-char (point-min))
(while (re-search-forward "\n+http" nil t)
(replace-match " http" t t))
(message "Splitting %s..." (file-name-nondirectory filename))
(split-string (buffer-string) "\n" t))))
(setq count (length lines))
(message "Parsing %s..." (file-name-nondirectory filename))
(dolist (line lines)
(let ((fields (split-string line "\t")))
(cond ((= (length fields) 3)
(let ((key (replace-regexp-in-string "/" "-" (car fields))))
(puthash key (1+ (gethash key days 0)) days))
(let ((key (anti-spam-extract-rule
(substring
(nth 2 fields)
(+ 3 (string-match " - " (nth 2 fields)))))))
(puthash key (1+ (gethash key reasons 0)) reasons)))
((= (length fields) 4)
(let ((key (substring (car fields) 0 10)))
(puthash key (1+ (gethash key days 0)) days))
(let ((key (anti-spam-extract-rule (nth 3 fields))))
(puthash key (1+ (gethash key reasons 0)) reasons)))
(t
(error "Cannot parse line '%s'" line)))))
(list reasons count (hash-table-count days))))
(defun anti-spam-report (filename)
"Read spammer.log from Emacs Wiki."
(interactive "fWhere is spammer.log? ")
(destructuring-bind (reasons count days)
(anti-spam-load-spammer-log filename)
(message "Number crunching...")
(maphash (lambda (key value)
(setq top-reasons (cons (cons key value) top-reasons)))
reasons)
(setq top-reasons (sort top-reasons (lambda (a b) (> (cdr a) (cdr b)))))
(setcdr (nthcdr 49 top-reasons) nil)
(message "Preparing report...")
(let ((buf (get-buffer-create "*Anti Spam Report*")))
(set-buffer buf)
(erase-buffer)
(insert (format "Total spam rejected: %d\n" count))
(insert (format "Total days surveyed: %d\n" days))
(insert " (days without a single spam are skipped)\n")
(insert (format "Average spam rejected per day: %.2f\n"
(/ (float count) days)))
(newline)
(insert "Which rules are effective?\n")
(newline)
(insert " Matches Rule\n")
(dolist (cell top-reasons)
(insert (format "%8d %s\n" (cdr cell) (car cell))))
(message "Preparing report...done")
(switch-to-buffer buf))))
(defun anti-spam-prune-list (log filename)
"Read spammer.log from Emacs Wiki and prune one of the lists.
LOG is the filename of spammer.log,
FILENAME is the filename of either BannedHosts, BannedContent or BannedRegexps."
(interactive "fWhere is spammer.log? \nfWhich file do you want to prune? ")
(destructuring-bind (reasons count days)
(anti-spam-load-spammer-log log)
(let* (kept-lines
(name (file-name-nondirectory filename))
(lines-kept 0)
(lines-pruned 0)
(lines (with-temp-buffer
(message "Reading %s..." name)
(insert-file-contents filename)
(split-string (buffer-string) "\n" t)))
(buf (get-buffer-create
(format "*Suggested %s*" name))))
(message "Pruning %s..." name)
(dolist (line lines)
;; don't count comments
(cond ((= (aref line 0) ?#)
(setq kept-lines (cons line kept-lines)))
((gethash (concat name ": " (car (split-string line " "))) reasons)
(setq lines-kept (1+ lines-kept)
kept-lines (cons line kept-lines)))
(t
(setq lines-pruned (1+ lines-pruned)))))
(set-buffer buf)
(erase-buffer)
(dolist (line (nreverse kept-lines))
(insert line)
(newline))
(message "%d expressions kept, %d lines pruned" lines-kept lines-pruned)
(switch-to-buffer buf))))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment