Commits

Ning Sun committed 367a149

nouns filter

Comments (0)

Files changed (3)

slacker-client-demo/src/slacker_client_demo/core.clj

 
 (defn -main [subreddit & args]
   (let [reddits (top-titles subreddit)
-        words-stats (apply word-count reddits)]
+        words-stats (apply nouns-count reddits)]
     (pprint words-stats)))
 

slacker-server-demo/model/en-pos-maxent.bin

Binary file added.

slacker-server-demo/src/redday/stats.clj

 (ns redday.stats
   (:require [clojure.string :as string])
-  (:require [opennlp.nlp :as nlp]))
+  (:require [opennlp.nlp :as nlp])
+  (:require [opennlp.tools.filters :as nlp-filters]))
 
 (def tokenizer (nlp/make-tokenizer "model/en-token.bin"))
+(def pos-tag (nlp/make-pos-tagger "model/en-pos-maxent.bin"))
 
-(defn word-count [& sentence]
-  (let [words (mapcat tokenizer (map string/lower-case sentence))]
+(defn- filter-words [sentence]
+  (map first (nlp-filters/nouns (pos-tag (tokenizer sentence)))))
+
+(defn nouns-count [& sentences]
+  (let [words (mapcat filter-words (map string/lower-case sentences))]
     (frequencies words)))