Commits

Miki Tebeka committed 2b008ac

Text jobs

Comments (0)

Files changed (1)

         [clj-time.format :only (formatter unparse)]
         [incanter.core :only (save)]
         [incanter.charts :only (bar-chart)]
+        [clojure.java.io :only (reader)]
         [clojure.contrib.string :only (trim lower-case split)])
   (:import java.io.File))
 
 (defn day-only [time]
   (date-time (year time) (month time) (day time)))
 
-(defn gen-chart [records job]
-  (let [result (map-reduce (:map job) (:reduce job) records)
-        filtered (if-let [f (:filter job)] (f result) result)
-        xs (sort (keys filtered))
-        ys (map #(filtered %) xs)
+(defn gen-chart [result job]
+  (let [xs (sort (keys result))
+        ys (map #(result %) xs)
         fxs (map (:x-format job identity) xs)]
     (bar-chart fxs ys :title (:title job) :x-label (:x-label job) 
                :y-label (:y-label job))))
 
+(defn outfile [job]
+  (let [ext (if (:text job) "txt" "png")]
+    (format "charts/%s.%s" (:outfile job) ext)))
+
+(defn gen-text [result job]
+  (let [top (take (:max job 100) (reverse (sort-by result (keys result))))
+        top-vals (map result top)]
+    (with-out-str (dorun (map #(println (format "%s: %s" %1 %2)) top top-vals)))))
+
+
 (defn run-job [records job]
-  (let [chart (gen-chart records job)]
-    (save chart (str "charts/" (:filename job) ".png"))))
+  (let [result (map-reduce (:map job) (:reduce job) records)
+        [genfn savefn] (if (:text job) 
+                         [gen-text spit] 
+                         [gen-chart (fn [f o] (save o f))])]
+    (savefn (outfile job) (genfn result job))))
 
 (defn time-fmt [fmt]
   (fn [dt]
 (defn sum [values]
   (reduce + values))
 
-(defn max-n-filter [n results]
-  (let [xs (take n (reverse (sort-by results (keys results))))]
-    (zipmap xs (map results xs))))
+(defn load-stop-words []
+  (set (line-seq (reader "stop-words.txt"))))
+
+(def *stop-words* (load-stop-words))
+
+(defn stop-word? [s]
+  (contains? *stop-words* s))
+
+(defn ok-word? [token]
+  (and (> (count token) 2)
+       (not (stop-word? token))))
 
 (def numlines {
     :map (fn [record] [[(month-only (:time record)) 1]])
     :title "Lines/Month"
     :x-label "Month"
     :y-label "Lines"
-    :filename "lines"
+    :outfile "lines"
     :x-format (time-fmt "MMM")})
 
 (def numusers {
     :title "Users/Month"
     :x-label "Month"
     :y-label "Users"
-    :filename "users"
+    :outfile "users"
     :x-format (time-fmt "MMM")})
 
 (def active {
     :map (fn [record] (if-let [u (:user record)] [[u 1]] []))
     :reduce (fn [key values] (sum values))
-    :title "Lines/User"
-    :x-label "User"
-    :y-label "Lines"
-    :filter (partial max-n-filter 10)
-    :filename "active"})
+    :outfile "active"
+    :text true
+    :max 10})
 
 (def words {
-    :map (fn [record] (map (fn [tok] [tok 1]) (:tokens record)))
+    :map (fn [record] 
+            (map (fn [tok] [tok 1]) (filter ok-word? (:tokens record))))
     :reduce (fn [key values] (reduce + values))
-    :title "Words"
-    :x-label "Word"
-    :y-label "Count"
-    :filename "words"})
+    :text true
+    :max 100
+    :outfile "words"
+    :count 100})
 
-(use '[incanter.core :only (view)])
+(def *jobs* [numlines numusers active words])
+
 (defn -main []
-  (let [records (load-data "logs")]
-    (let [chart (gen-chart records active)]
-      (view chart))))
+  (let [records (load-data "logs")
+        run (partial run-job records)]
+    (dorun (pmap run *jobs*))
+    (shutdown-agents)))