1. David Krentzlin
  2. vandusen-markov

Commits

David Krentzlin  committed 4955c1a

first impl

  • Participants
  • Branches default

Comments (0)

Files changed (3)

File markov-impl.scm

View file
  • Ignore whitespace
+;; 
+;; %%HEADER%%
+;; 
+
+(define message-body
+  (let ((rx (irregex '(seq #\: (+ (~ #\:)) #\: (submatch (+ any))))))
+    (lambda (m)
+      (irregex-match-substring (irregex-match rx (irc:message-body m)) 1))))
+
+
+(define (make-dictionary) (make-hash-table string-ci=?))
+(define dictionary? hash-table?)
+(define dictionary-set!   hash-table-set!)
+(define (dictionary-ref dict key #!optional (default #f))
+  (hash-table-ref/default dict key default))
+(define dictionary-values hash-table-values)
+
+
+(define (learn-tokens! dict token-list)
+  (for-each-token-pair
+   (lambda (token successor)
+     (when successor
+       (if (not (token-pair-known? dict token successor))
+           (learn-token-pair! dict token successor)
+           (update-token-pair! dict token successor))))
+   token-list))
+
+(define (for-each-token-pair proc list-of-tokens)
+  (let loop ((ls list-of-tokens))
+    (unless (null? ls)
+     (cond
+      ((null? (cdr ls))
+       (proc (car ls) #f)
+       (loop (cdr ls)))
+      (else
+       (proc (car ls) (cadr ls))
+       (loop (cdr ls)))))))
+
+(define (token-pair-known? dict token successor)
+  (and-let* ((_  (dictionary-ref dict token))
+             (__ (dictionary-ref _ successor)))
+            #t))
+
+(define (learn-token-pair! dict token successor)
+  (unless (dictionary-ref dict token)
+    (dictionary-set! dict token (make-dictionary)))
+  (let ((successors (dictionary-ref dict token)))
+    (dictionary-set! successors successor 1.0)))
+
+(define (update-token-pair! dict token successor)
+  (and-let* ((successors (dictionary-ref dict token))
+             (amount     (dictionary-ref successors successor)))
+    (dictionary-set! successors successor (+ amount 1.0))))
+
+(define (token-pair-ref dict token successor)
+  (let ((successors (dictionary-ref dict token)))
+    (and successors (dictionary-ref successors successor))))

File tests/markov-spec.scm

View file
  • Ignore whitespace
+(load "../markov-impl.scm")
+(use srfi-1)
+
+(context "Dictionary"
+         (before each:
+            (set! ($ 'dict) (make-dictionary))
+            (subject-set! ($ 'dict)))
+
+         (it should (be a dictionary))
+
+         (it "returns #f if one wants to retrieve a value from an empty dictonary"
+             (expect (dictionary-ref ($' dict) "key") (be false)))
+
+         (it "returns value if it exists in the dictionary"
+             (dictionary-set! ($ 'dict) "key" "value")
+             (expect (dictionary-ref ($  'dict) "key") (be "value")))
+
+         (it "returns #f if the given key does not exist in the dictionary"
+             (dictionary-set! ($ 'dict) "key" "value")
+             (expect (dictionary-ref ($ 'dict) "key2") (be false)))
+
+         (it "replaces the value of a key if inserted multiple times"
+             (dictionary-set! ($ 'dict) "key" "value1")
+             (expect (dictionary-ref ($ 'dict)  "key") (be "value1"))
+             (dictionary-set! ($ 'dict) "key" "value2")
+             (expect (dictionary-ref ($ 'dict) "key") (be "value2")))
+
+         (it "returns a list of values"
+             (dictionary-set! ($ 'dict) "key" "value")
+             (dictionary-set! ($ 'dict) "key1" "value1")
+             (dictionary-set! ($ 'dict) "key2" "value2")
+             (expect (lset= string-ci=? (dictionary-values ($ 'dict)) (list "value" "value1" "value2"))
+                     (be true))))
+
+(context "Tokenize input")
+
+(context "Learning"
+
+         (before each:
+                 (set! ($ 'dict) (make-dictionary)))
+         
+         (it "learns unknown token"
+             (learn-tokens! ($ 'dict) (list "just" "a" "test"))
+             (expect (token-pair-ref ($ 'dict) "just" "a") (be 1.0)))
+
+         (it "updates known tokens"
+             (learn-tokens! ($ 'dict) (list "just" "a" "test" "just" "a"))
+             (expect (token-pair-ref ($ 'dict) "just" "a") (be 2.0))))

File vandusen-markov.scm

View file
  • Ignore whitespace
+;; 
+;; %%HEADER%%
+;; 
+
+(module vanduse-markov ()
+
+(define message-body
+  (let ((rx (irregex '(seq #\: (+ (~ #\:)) #\: (submatch (+ any))))))
+    (lambda (m)
+      (irregex-match-substring (irregex-match rx (irc:message-body m)) 1))))
+
+
+
+(plugin 'markov
+        (lambda ()
+          (let ((tokens-seen (make-hash-table string=?))
+                (probabilities (make-hash-table string=?))
+                (autolearn #f)
+                (dictionary-quota #f))
+
+
+            (define (add-tokens! token successor)
+              (unless (hash-table-ref tokens-seen token)
+                (hash-table-set! tokens-seen (make-hash-table string=?)))
+              (let ((next-tokens (hash-table-ref tokens-seen token)))
+                (hash-table-set! next-tokens successor 1.0)))
+
+            (define (update-tokens! token successor)
+              (let* ((text-tokens (hash-table-ref tokens-seen token))
+                     (count       (hash-table-ref next-tokens successor)))
+                (hash-table-set! next-tokens successor (+ count 1.0))))
+
+            (define (tokens-known? token successor)
+              (and-*let ((_  (hash-table-ref tokens-seen token))
+                         (__ (hash-table-ref _ successor)))
+                        #t))
+            
+            (define (learn-tokens! tokens)
+              (for-each-token-with-successor
+               (lambda (token successor)
+                 (unless (hash-table-ref dictionary token)
+                   (hash-table-set! tokens-seen token (make-hash-table string=?)))
+              
+                 (when successor
+                   (if (not (tokens-known? token successor))
+                       (add-tokens! token successor)
+                       (update-tokens! token successor))))
+               tokens))
+
+            (define (update-probability! tokens)
+              (hash-table-clear! probabilities)
+              (hash-table-for-each
+               tokens-seen
+               (lambda (token successors)
+                 (let ((total (sum (hash-table-values successors)))
+                       (probs (make-hash-table string=?)))
+                   (hash-table-for-each
+                    successors
+                    (lambda (next_token count)
+                      (hash-table-set! pobs next_token (/ count total))))
+                   (hash-table-set! probabilities token probs)))))
+
+            (define (learn-text text)
+              (let ((tokens    (tokenize-text text)))
+                (learn-tokens! tokens)
+                (update-probability!)))
+
+            (define (generate-text trigger length) #t)
+
+            (define (!markov-clear m . rest)
+              (hash-table-clear! tokens-seen)
+              (hash-table-clear! probabilities))
+
+            (define (!markov-auto-learn m mode)
+              (cond
+               ((string=? mode "on")
+                (set! autolearn #t)
+                (reply-to m "autolearning enabled"))
+               ((string=? mode "off")
+                (set! autolearn #f)
+                (reply-to m "autolearning disabled"))
+               (else (reply-to m "I don't understand"))))
+
+            (command 'markov-clear
+                     '(: "markov" (+ space) "clear")
+                     (lambda (m . rest)
+                       (hash-table-clear! dictionary)
+                       (reply-to m (format "Dictionary has been cleared"))))
+
+            (command 'markov-auto-learn
+                     '(: "markov" (+ space) "autolearn" (+ space) (submatch (or "on" "off")))
+              )
+
+            (command 'markov-learn
+                     '(: "markov" (+ space) "learn" (+ any))
+                     (lambda (m text)
+                       (if (dictionary-quota-exceeded?)
+                         (reply-to m "Sorry. Could not learn, because the dictionary quota is exceeded")
+                         (begin
+                           (learn-text text)
+                           (reply-to m "Aye!")))))
+
+            (command 'marko-set-quota
+                     '(: "markov" (+ space) "quota" (submatch (or "disabled" "show"  (+ numeric))))
+                     (lambda (m value)
+                       (cond
+                        ((string=? value "disabled")
+                         (set! dictionary-quota #f)
+                         (reply-to m "Disabled dictionary quota"))
+                        ((string=? value "show")
+                         (if (number? dictionary-quota)
+                             (reply-to m "Dictionary quota is currently set to ~A" dictionary-quota)
+                             (reply-to m "Dictionary quota is currently disabled")))
+                        (else
+                         (set! dictionary-quota (string->number value))
+                         (reply-to m (format "Aye! Set dictionary quota to ~A" value))))))
+
+            (command 'markov-say
+                     '(: "markov" (+ (~ space))  (? (+ space) (submatch (+ numeric))))
+                     (lambda (m trigger . value)
+                       (let ((length (if (null? value) 30 (car value))))
+                         (reply-to m (generate-text trigger length)))))
+
+            (message-handler (lambda (message)
+                               (when autolearn
+                                 (unless (dictionary-quota-exceeded?)
+                                   (learn-text (message-body m))))))))))