Source

mh-e / mh-junk.el

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
;;; mh-junk.el --- Interface to anti-spam measures

;; Copyright (C) 2003 Free Software Foundation, Inc.

;; Author: Satyaki Das <satyaki@theforce.stanford.edu>,
;;         Bill Wohler <wohler@newt.com>
;; Maintainer: Bill Wohler <wohler@newt.com>
;; Keywords:  mail, spam

;; This file is part of GNU Emacs.

;; GNU Emacs is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation; either version 2, or (at your option)
;; any later version.

;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.

;; You should have received a copy of the GNU General Public License
;; along with GNU Emacs; see the file COPYING.  If not, write to the
;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;; Boston, MA 02111-1307, USA.

;;; Commentary:

;; Spam handling in MH-E.

;;; Change Log:

;;; Code:

(require 'mh-e)

;; Interactive functions callable from the folder buffer
;;;###mh-autoload
(defun mh-junk-blacklist (range)
  "Blacklist RANGE as spam.

Check the documentation of `mh-interactive-range' to see how RANGE is read in
interactive use.

First the appropriate function is called depending on the value of
`mh-junk-choice'. Then if `mh-junk-mail-folder' is a string then the message is
refiled to that folder. If nil, the message is deleted.

To change the spam program being used, customize `mh-junk-program'. Directly
setting `mh-junk-choice' is not recommended.

The documentation for the following functions describes what setup is needed
for the different spam fighting programs:

  - `mh-bogofilter-blacklist'
  - `mh-spamprobe-blacklist'
  - `mh-spamassassin-blacklist'"
  (interactive (list (mh-interactive-range "Blacklist")))
  (let ((blacklist-func (nth 1 (assoc mh-junk-choice mh-junk-function-alist))))
    (unless blacklist-func
      (error "Customize `mh-junk-program' appropriately"))
    (let ((dest (cond ((null mh-junk-mail-folder) nil)
                      ((equal mh-junk-mail-folder "") "+")
                      ((eq (aref mh-junk-mail-folder 0) ?+)
                       mh-junk-mail-folder)
                      ((eq (aref mh-junk-mail-folder 0) ?@)
                       (concat mh-current-folder "/"
                               (substring mh-junk-mail-folder 1)))
                      (t (concat "+" mh-junk-mail-folder)))))
      (mh-iterate-on-range msg range
        (funcall (symbol-function blacklist-func) msg)
        (if dest
            (mh-refile-a-msg nil (intern dest))
          (mh-delete-a-msg nil)))
      (mh-next-msg))))

;;;###mh-autoload
(defun mh-junk-whitelist (range)
  "Whitelist RANGE incorrectly classified as spam.

Check the documentation of `mh-interactive-range' to see how RANGE is read in
interactive use.

First the appropriate function is called depending on the value of
`mh-junk-choice'. Then the message is refiled to `mh-inbox'.

To change the spam program being used, customize `mh-junk-program'. Directly
setting `mh-junk-choice' is not recommended."
  (interactive (list (mh-interactive-range "Whitelist")))
  (let ((whitelist-func (nth 2 (assoc mh-junk-choice mh-junk-function-alist))))
    (unless whitelist-func
      (error "Customize `mh-junk-program' appropriately"))
    (mh-iterate-on-range msg range
      (funcall (symbol-function whitelist-func) msg)
      (mh-refile-a-msg nil (intern mh-inbox)))
    (mh-next-msg)))



;; Bogofilter Interface

(defvar mh-bogofilter-executable (executable-find "bogofilter"))

(defun mh-bogofilter-blacklist (msg)
  "Classify MSG as spam.
Tell bogofilter that the message is spam.

Bogofilter is a Bayesian spam filtering program. Get it from your local
distribution or from:
   http://bogofilter.sourceforge.net/

You first need to teach bogofilter. This is done by running

   bogofilter -n < good-message

on every good message, and

   bogofilter -s < spam-message

on every spam message. Most Bayesian filters need 1000 to 5000 of each to
start doing a good job.

To use bogofilter, add the following .procmailrc recipes which you can also
find in the bogofilter man page:

   # Bogofilter
   :0fw
   | bogofilter -u -e -p

   :0
   * ^X-Bogosity: Yes, tests=bogofilter
   $SPAM

Bogofilter continues to feed the messages it classifies back into its
database. Occasionally it misses, and those messages need to be reclassified.
MH-E can do this for you. Use \\[mh-junk-blacklist] to reclassify messges in
your +inbox as spam, and \\[mh-junk-whitelist] to reclassify messages in your
spambox as good messages."
  (unless mh-bogofilter-executable
    (error "Couldn't find the bogofilter executable"))
  (let ((msg-file (mh-msg-filename msg mh-current-folder)))
    (call-process mh-bogofilter-executable msg-file 0 nil "-Ns")))

(defun mh-bogofilter-whitelist (msg)
  "Reinstate incorrectly filtered MSG.
Train bogofilter to think of the message as non-spam."
  (unless mh-bogofilter-executable
    (error "Couldn't find the bogofilter executable"))
  (let ((msg-file (mh-msg-filename msg mh-current-folder)))
    (call-process mh-bogofilter-executable msg-file 0 nil "-Sn")))



;; Spamprobe Interface

(defvar mh-spamprobe-executable (executable-find "spamprobe"))

(defun mh-spamprobe-blacklist (msg)
  "Classify MSG as spam.
Tell spamprobe that the message is spam.

Spamprobe is a Bayesian spam filtering program. More info about the program can
be found at:
   http://spamprobe.sourceforge.net

Here is a procmail recipe to stores incoming spam mail into the folder +spam
and good mail in /home/user/Mail/mdrop/mbox. This recipe is provided as an
example in the spamprobe man page.

   PATH=/bin:/usr/bin:/usr/local/bin
   DEFAULT=/home/user/Mail/mdrop/mbox
   SPAM=/home/user/Mail/spam/.

   # Spamprobe filtering
   :0
   SCORE=| spamprobe receive
   :0 wf
   | formail -I \"X-SpamProbe: $SCORE\"
   :0 a:
   *^X-SpamProbe: SPAM
   $SPAM

Occasionally some good mail gets misclassified as spam. You can use
\\[mh-junk-whitelist] to reclassify that as good mail."
  (unless mh-spamprobe-executable
    (error "Couldn't find the spamprobe executable"))
  (let ((msg-file (mh-msg-filename msg mh-current-folder)))
    (call-process mh-spamprobe-executable msg-file 0 nil "spam")))

(defun mh-spamprobe-whitelist (msg)
  "Reinstate incorrectly filtered MSG.
Train spamprobe to think of the message as non-spam."
  (unless mh-spamprobe-executable
    (error "Couldn't find the spamprobe executable"))
  (let ((msg-file (mh-msg-filename msg mh-current-folder)))
    (call-process mh-spamprobe-executable msg-file 0 nil "good")))



;; Spamassassin Interface

(defvar mh-spamassassin-executable (executable-find "spamassassin"))
(defvar mh-sa-learn-executable (executable-find "sa-learn"))

(defun mh-spamassassin-blacklist (msg)
  "Blacklist MSG.
This is done by sending the message to Razor and by appending the sender to
~/.spamassassin/user_prefs in a blacklist_from rule. If sa-learn is available,
the message is also recategorized as spam.

Spamassassin is an excellent spam filter. For more information, see:
  http://spamassassin.org/.

I ran \"spamassassin -t\" on every mail message in my archive and ran an
analysis in Gnumeric to find that the standard deviation of good mail
scored under 5 (coincidentally, the spamassassin default for \"spam\").

Furthermore, I observed that there weren't any messages with a score of 8
or more that were interesting, so I added a couple of points to be
conservative and send any message with a score of 10 or more down the
drain. You might want to use a score of 12 or 13 to be really conservative.
I have found that this really decreases the amount of junk to review.

Messages with a score of 5-9 are set aside for later review. The major
weakness of rules-based filters is a plethora of false positives\; I catch one
or two legitimate messages in here a week, so it is worthwhile to check.

You might choose to do this analysis yourself to pick a good score for
deleting spam sight unseen, or you might pick a score out of a hat, or you
might choose to be very conservative and not delete any messages at all.

Based upon this discussion, here is what the associated ~/.procmailrc
entries look like. These rules appear before my list filters so that spam
sent to mailing lists gets pruned too.

   #
   # Spam
   #
   :0fw
   | spamc

   # Anything with a spam level of 10 or more is junked immediately.
   :0:
   * ^X-Spam-Level: ..........
   /dev/null

   :0
   * ^X-Spam-Status: Yes
   $SPAM

If you don't use \"spamc\", use \"spamassassin -P -a\".

A handful of spam does find its way into +inbox. In this case, use
\\[mh-junk-blacklist] to add a \"blacklist_from\" line to
~/spamassassin/user_prefs, delete the message, and send the message to the
Razor, so that others might not see this spam.

Over time, you see some patterns in the blacklisted addresses and can
replace several lines with wildcards. For example, it is clear that High
Speed Media is the biggest bunch of jerks on the Net. Here are some of the
entries I have for them, and the list continues to grow.

   blacklist_from	*@*-hsm-*.com
   blacklist_from	*@*182*643*.com
   blacklist_from	*@*antarhsm*.com
   blacklist_from	*@*h*speed*
   blacklist_from	*@*hsm*182*.com
   blacklist_from	*@*hsm*643*.com
   blacklist_from	*@*hsmridi2983cslt227.com
   blacklist_from	*@*list*hsm*.com
   blacklist_from	*@h*s*media*
   blacklist_from	*@hsmdrct.com
   blacklist_from	*@hsmridi2983csltsite.com

The function `mh-spamassassin-identify-spammers' is provided that shows the
frequency counts of the host and domain names in your blacklist_from
entries. This can be helpful when editing the blacklist_from entries.

In versions of spamassassin (2.50 and on) that support a Bayesian classifier,
\\[mh-junk-blacklist] uses the sa-learn program to recategorize the message as
spam. Neither MH-E, nor spamassassin, rebuilds the database after adding
words, so you will need to run \"sa-learn --rebuild\" periodically. This can
be done by adding the following to your crontab:

  0 * * * *	sa-learn --rebuild > /dev/null 2>&1"
  (unless mh-spamassassin-executable
    (error "Couldn't find the spamassassin executable"))
  (let ((current-folder mh-current-folder)
        (msg-file (mh-msg-filename msg mh-current-folder))
        (sender))
    (save-excursion
      (message "Giving this message the Razor...")
      (mh-truncate-log-buffer)
      (call-process mh-spamassassin-executable msg-file mh-log-buffer nil
                    "--report" "--remove-from-whitelist")
      (when mh-sa-learn-executable
          (message "Recategorizing this message as spam...")
          (call-process mh-sa-learn-executable msg-file mh-log-buffer nil
                        "--single" "--spam" "--local" "--no-rebuild"))
      (message "Blacklisting address...")
      (set-buffer (get-buffer-create mh-temp-buffer))
      (erase-buffer)
      (call-process (expand-file-name mh-scan-prog mh-progs) nil t nil
                    (format "%s" msg) current-folder
                    "-format" "%<(mymbox{from})%|%(addr{from})%>")
      (goto-char (point-min))
      (if (search-forward-regexp "^\\(.+\\)$" nil t)
          (progn
            (setq sender (match-string 0))
            (mh-spamassassin-add-rule "blacklist_from" sender)
            (message "Blacklisting address...done"))
        (message "Blacklisting address...not done (from my address)")))))

(defun mh-spamassassin-whitelist (msg)
  "Whitelist MSG.
Add a whitelist_from rule to the ~/.spamassassin/user_prefs file. If sa-learn
is available, then the message is recategorized as ham."
  (unless mh-spamassassin-executable
    (error "Couldn't find the spamassassin executable"))
  (let ((msg-file (mh-msg-filename msg mh-current-folder))
        (show-buffer (get-buffer mh-show-buffer))
        from)
    (save-excursion
      (set-buffer (get-buffer-create mh-temp-buffer))
      (erase-buffer)
      (message "Removing spamassassin markup from message...")
      (call-process mh-spamassassin-executable msg-file mh-temp-buffer nil
                    "--remove-markup")
      (if show-buffer
          (kill-buffer show-buffer))
      (write-file msg-file)
      (when mh-sa-learn-executable
        (message "Recategorizing this message as ham...")
        (call-process mh-sa-learn-executable msg-file mh-temp-buffer nil
                      "--single" "--ham" "--local --no-rebuild"))
      (message "Whitelisting address...")
      (setq from (car (ietf-drums-parse-address (mh-get-header-field "From:"))))
      (kill-buffer nil)
      (unless (equal from "")
        (mh-spamassassin-add-rule "whitelist_from" from))
      (message "Whitelisting address...done"))))

(defun mh-spamassassin-add-rule (rule body)
  "Add a new rule to ~/.spamassassin/user_prefs.
The name of the rule is RULE and its body is BODY."
  (save-window-excursion
    (let* ((line (format "%s\t%s\n" rule body))
           (case-fold-search t)
           (file (expand-file-name "~/.spamassassin/user_prefs"))
           (buffer-exists (find-buffer-visiting file)))
      (find-file file)
      (if (not (search-forward (format "\n%s" line) nil t))
          (progn
            (goto-char (point-max))
            (insert (if (bolp) "" "\n") line)
            (save-buffer)))
      (if (not buffer-exists)
          (kill-buffer nil)))))

(defun mh-spamassassin-identify-spammers ()
  "Identifies spammers who are repeat offenders.

For each blacklist_from entry from the last blank line of
~/.spamassassin/user_prefs to the end of the file, a list of host and domain
names along with their frequency counts is displayed. This information can be
used to replace multiple blacklist_from entries with a single wildcard entry
such as:

  blacklist_from *@*amazingoffersdirect2u.com"
  (interactive)
  (let* ((file (expand-file-name "~/.spamassassin/user_prefs"))
         (domains (make-hash-table :test 'equal)))
    (find-file file)
    ;; Only consider entries between last blank line and end of file.
    (goto-char (1- (point-max)))
    (search-backward-regexp "^$")
    ;; Perform frequency count.
    (save-excursion
      (while (search-forward-regexp "^blacklist_from\\s-*\\(.*\\)@\\(.*\\)$"
                                    nil t)
        (let ((host (match-string 2))
              value)
          ;; Remove top-level-domain from hostname.
          (setq host (cdr (reverse (split-string host "\\."))))
          ;; Add counts for each host and domain part.
          (while host
            (setq value (gethash (car host) domains))
            (puthash (car host) (1+ (if (not value) 0 value)) domains)
            (setq host (cdr host))))))

    ;; Output
    (delete-other-windows)
    (pop-to-buffer (get-buffer-create "*MH-E Spammer Frequencies*"))
    (erase-buffer)
    (maphash '(lambda (key value) ""
                (if (> value 2)
                    (insert (format "%s %s\n" key value))))
             domains)
    (sort-numeric-fields 2 (point-min) (point-max))
    (reverse-region (point-min) (point-max))
    (goto-char (point-min))))

(provide 'mh-junk)

;;; Local Variables:
;;; indent-tabs-mode: nil
;;; sentence-end-double-space: nil
;;; End:

;;; mh-junk.el ends here
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.