Commits

David Krentzlin  committed fa0f3da

implemented tokenization

  • Participants
  • Parent commits 4955c1a

Comments (0)

Files changed (2)

File markov-impl.scm

 (define (token-pair-ref dict token successor)
   (let ((successors (dictionary-ref dict token)))
     (and successors (dictionary-ref successors successor))))
+
+(define (tokenize-input input)
+  (irregex-split '(: (+ space)) input))

File tests/markov-spec.scm

 (load "../markov-impl.scm")
-(use srfi-1)
+(use srfi-1 format)
 
-(context "Dictionary"
-         (before each:
-            (set! ($ 'dict) (make-dictionary))
-            (subject-set! ($ 'dict)))
+(context "Markov"
+  (before each:
+          (set! ($ 'dict) (make-dictionary))
+          (subject-set! ($ 'dict)))
+  
+  (context "Dictionary"
+           (it should (be a dictionary))
+           
+           (it "returns #f if one wants to retrieve a value from an empty dictonary"
+               (expect (dictionary-ref ($' dict) "key") (be false)))
 
-         (it should (be a dictionary))
+           (it "returns value if it exists in the dictionary"
+               (dictionary-set! ($ 'dict) "key" "value")
+               (expect (dictionary-ref ($  'dict) "key") (be "value")))
+           
+           (it "returns #f if the given key does not exist in the dictionary"
+               (dictionary-set! ($ 'dict) "key" "value")
+               (expect (dictionary-ref ($ 'dict) "key2") (be false)))
+           
+           (it "replaces the value of a key if inserted multiple times"
+               (dictionary-set! ($ 'dict) "key" "value1")
+               (expect (dictionary-ref ($ 'dict)  "key") (be "value1"))
+               (dictionary-set! ($ 'dict) "key" "value2")
+               (expect (dictionary-ref ($ 'dict) "key") (be "value2")))
+           
+           (it "returns a list of values"
+               (dictionary-set! ($ 'dict) "key" "value")
+               (dictionary-set! ($ 'dict) "key1" "value1")
+               (dictionary-set! ($ 'dict) "key2" "value2")
+               (expect (lset= string-ci=? (dictionary-values ($ 'dict)) (list "value" "value1" "value2"))
+                       (be true))))
 
-         (it "returns #f if one wants to retrieve a value from an empty dictonary"
-             (expect (dictionary-ref ($' dict) "key") (be false)))
+  (context "Tokenize input"
+           (let ((expected (list "just" "a" "test")))
+             (it "splits on space"
+                 (expect (tokenize-input "just a    test")
+                         (be expected)))
+             (it "splits on tabs"
+                 (expect (tokenize-input (format #f "just ~T a test"))
+                         (be expected)))
+             
+             (it "splits on newline"
+                 (expect (tokenize-input (format #f "just ~% a test"))
+                         (be expected)))))
 
-         (it "returns value if it exists in the dictionary"
-             (dictionary-set! ($ 'dict) "key" "value")
-             (expect (dictionary-ref ($  'dict) "key") (be "value")))
+  (context "Learning"
+           (it "learns unknown token"
+               (learn-tokens! ($ 'dict) (list "just" "a" "test"))
+               (expect (token-pair-ref ($ 'dict) "just" "a") (be 1.0)))
 
-         (it "returns #f if the given key does not exist in the dictionary"
-             (dictionary-set! ($ 'dict) "key" "value")
-             (expect (dictionary-ref ($ 'dict) "key2") (be false)))
-
-         (it "replaces the value of a key if inserted multiple times"
-             (dictionary-set! ($ 'dict) "key" "value1")
-             (expect (dictionary-ref ($ 'dict)  "key") (be "value1"))
-             (dictionary-set! ($ 'dict) "key" "value2")
-             (expect (dictionary-ref ($ 'dict) "key") (be "value2")))
-
-         (it "returns a list of values"
-             (dictionary-set! ($ 'dict) "key" "value")
-             (dictionary-set! ($ 'dict) "key1" "value1")
-             (dictionary-set! ($ 'dict) "key2" "value2")
-             (expect (lset= string-ci=? (dictionary-values ($ 'dict)) (list "value" "value1" "value2"))
-                     (be true))))
-
-(context "Tokenize input")
-
-(context "Learning"
-
-         (before each:
-                 (set! ($ 'dict) (make-dictionary)))
-         
-         (it "learns unknown token"
-             (learn-tokens! ($ 'dict) (list "just" "a" "test"))
-             (expect (token-pair-ref ($ 'dict) "just" "a") (be 1.0)))
-
-         (it "updates known tokens"
-             (learn-tokens! ($ 'dict) (list "just" "a" "test" "just" "a"))
-             (expect (token-pair-ref ($ 'dict) "just" "a") (be 2.0))))
+           (it "updates known tokens"
+               (learn-tokens! ($ 'dict) (list "just" "a" "test" "just" "a"))
+               (expect (token-pair-ref ($ 'dict) "just" "a") (be 2.0)))))