Commits

Ezio Melotti committed af0e43d

Replace the symlinks to the spambayes detector/extension with the actual files.

  • Participants
  • Parent commits 9e2dce5

Comments (0)

Files changed (2)

File detectors/spambayes.py

-../../spambayes_integration/detectors/spambayes.py
+
+import xmlrpclib
+import socket
+import time
+import math
+import re
+
+from roundup.exceptions import Reject
+
+REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
+
+def extract_classinfo(db, klass, nodeid, newvalues):
+    if None == nodeid:
+        node = newvalues
+        content = newvalues['content']
+    else:
+        node = db.getnode(klass.classname, nodeid)
+        content = klass.get(nodeid, 'content')
+
+    if node.has_key('creation') or node.has_key('date'):
+        nodets = node.get('creation', node.get('date')).timestamp()
+    else:
+        nodets = time.time()
+
+    if node.has_key('author') or node.has_key('creator'):
+        authorid = node.get('author', node.get('creator'))
+    else:
+        authorid = db.getuid()
+
+    authorage = nodets - db.getnode('user', authorid)['creation'].timestamp()
+
+    tokens = ["klass:%s" % klass.classname,
+              "author:%s" % authorid,
+              "authorage:%d" % int(math.log(authorage)),
+              "hasrev:%s" % (REVPAT.search(content) is not None)]
+
+
+    return (content, tokens)
+
+def check_spambayes(db, content, tokens):
+    try:
+        spambayes_uri = db.config.detectors['SPAMBAYES_URI']
+    except KeyError, e:
+        return (False, str(e))
+
+    try:
+        server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
+    except IOError, e:
+        return (False, str(e))
+
+
+    try:
+        prob = server.score({'content':content}, tokens, {})
+        return (True, prob)
+    except (socket.error, xmlrpclib.Error), e:
+        return (False, str(e))
+
+
+def check_spam(db, klass, nodeid, newvalues):
+    """Auditor to score a website submission."""
+
+
+    if newvalues.has_key('spambayes_score'):
+        if not db.security.hasPermission('SB: May Classify', db.getuid()):
+            raise ValueError, "You don't have permission to spamclassify messages"
+        # Don't do anything if we're explicitly setting the score
+        return
+
+    if not newvalues.has_key('content'):
+        # No need to invoke spambayes if the content of the message
+        # is unchanged.
+        return
+
+    (content, tokens) = extract_classinfo(db, klass, nodeid, newvalues)
+    (success, other) = check_spambayes(db, content, tokens)
+    if success:
+        newvalues['spambayes_score'] = other
+        newvalues['spambayes_misclassified'] = False
+    else:
+        newvalues['spambayes_score'] = -1
+        newvalues['spambayes_misclassified'] = True
+
+def init(database):
+    """Initialize auditor."""
+    database.msg.audit('create', check_spam)
+    database.msg.audit('set', check_spam)
+    database.file.audit('create', check_spam)
+    database.file.audit('set', check_spam)

File extensions/spambayes.py

-../../spambayes_integration/extensions/spambayes.py
+import re, math
+from roundup.cgi.actions import Action
+from roundup.cgi.exceptions import *
+
+import xmlrpclib, socket
+
+REVPAT = re.compile(r'(r[0-9]+\b|rev(ision)? [0-9]+\b)')
+
+def extract_classinfo(db, classname, nodeid):
+    node = db.getnode(classname, nodeid)
+
+    authorage = node['creation'].timestamp() - \
+                db.getnode('user', node.get('author', node.get('creator')))['creation'].timestamp()
+
+    authorid = node.get('author', node.get('creator'))
+
+    content = db.getclass(classname).get(nodeid, 'content')
+
+    tokens = ["klass:%s" % classname,
+              "author:%s" % authorid,
+              "authorage:%d" % int(math.log(authorage)),
+              "hasrev:%s" % (REVPAT.search(content) is not None)]
+
+    return (content, tokens)
+
+def train_spambayes(db, content, tokens, is_spam):
+    # spambayes training is now disabled; only leave
+    # spam classification UI
+    return True, None
+    spambayes_uri = db.config.detectors['SPAMBAYES_URI']
+
+    server = xmlrpclib.ServerProxy(spambayes_uri, verbose=False)
+    try:
+        server.train({'content':content}, tokens, {}, is_spam)
+        return (True, None)
+    except (socket.error, xmlrpclib.Error), e:
+        return (False, str(e))
+
+
+class SpambayesClassify(Action):
+    permissionType = 'SB: May Classify'
+
+    def handle(self):
+        (content, tokens) = extract_classinfo(self.db,
+                                              self.classname, self.nodeid)
+
+        if self.form.has_key("trainspam"):
+            is_spam = True
+        elif self.form.has_key("trainham"):
+            is_spam = False
+
+        (status, errmsg) = train_spambayes(self.db, content, tokens,
+                                           is_spam)
+
+        node = self.db.getnode(self.classname, self.nodeid)
+        props = {}
+
+        if status:
+            if node.get('spambayes_misclassified', False):
+                props['spambayes_misclassified'] = True
+
+            props['spambayes_score'] = 1.0
+
+            s = " SPAM"
+            if not is_spam:
+                props['spambayes_score'] = 0.0
+                s = " HAM"
+            self.client.ok_message.append(self._('Message classified as') + s)
+        else:
+            self.client.error_message.append(self._('Unable to classify message, got error:') + errmsg)
+
+        klass = self.db.getclass(self.classname)
+        klass.set(self.nodeid, **props)
+        self.db.commit()
+
+def sb_is_spam(obj):
+    cutoff_score = float(obj._db.config.detectors['SPAMBAYES_SPAM_CUTOFF'])
+    try:
+        score = obj['spambayes_score']
+    except KeyError:
+        return False
+    return score >= cutoff_score
+
+def init(instance):
+    instance.registerAction("spambayes_classify", SpambayesClassify)
+    instance.registerUtil('sb_is_spam', sb_is_spam)
+