Commits

greg elliott committed c6c5844

word count, include category in query, starting to add compute audio length function

  • Participants
  • Parent commits 0ac4587

Comments (0)

Files changed (2)

File search/model.py

     searchStrings = tornado.escape.json_decode(searchStrings).replace(" ","+")
 
     #find all people with skills that match text
-    url = "http://localhost:8983/solr/select/?q=(text_en:" + searchStrings + \
+    url = "http://localhost:8983/solr/select/?q=(category:" + searchStrings + "+OR+text_en:" + searchStrings + \
         "+OR+text_ht:" + searchStrings + "+OR+tags:" + searchStrings + ")+AND+location:" + location + \
           "&version=2.2&wt=json&rows=999999"
     logging.info(url)
 
     return tags
 
+  def wordCount(self):
+    typesCount = {}
 
+    peopleCollection = self.db.people
+    people = peopleCollection.find({"completely_translated":True})
 
+    i = 0
+    for person in people:
+      logging.info(person["asterisk_id"])
+      #if i == 1000: break
+      for key,item in person.iteritems():
+        if type(item) is dict:
+          if 'en_text' in person[key]:
+            logging.error(person[key]['en_text'])
+            words = person[key]['en_text'].split(" ")
+            for word in words:
+              word = word.replace(".","").replace(",","")
+              if word in typesCount:
+                typesCount[word] += 1
+              else:
+                typesCount[word] = 1
+      i += 1
 
+    items = typesCount.items()
+    items.sort(key=lambda option:option[1], reverse=True)
+    for key,value in items:
+      if value > 20:
+        print "%s : %d" % (key,value)
+    return None
+
+
+
+
+

File search/server.py

           (r'/tags', Tags),
           (r'/resume', Resume),
           (r'/searchQuerySolr', SearchQuerySolr),
-          (r'/export', Export)
+          (r'/export', Export),
+          (r'/wordCount', WordCount),
+          (r'/computeAudioLength', ComputeAudioLength)
         ]
         settings = dict(
         login_url="/search/login",
     self.write("Exported %d rows to people_export_for_solr.csv" % count)
 
 
+class WordCount(BaseHandler):
+  @tornado.web.authenticated
+  def get(self):
+    Model.instance().wordCount()
+    self.write("done")
+
+class ComputeAudioLength(BaseHandler):
+  @tornado.web.authenticated
+  def get(self):
+    conn = pymongo.Connection(options.mongo_host, options.mongo_port)
+    db = conn.konbit_translate
+    peopleCollection = db.people.sort("asterisk_id")
+
+    i = 0
+    for person in peopleCollection:
+      for key,item in person.iteritems():
+        if type(item) is dict:
+          if 'audio_length' not in person[key]:
+            fpath = "/recordings/%d/%s.processed.mp3" % (int(person["asterisk_id"]), key)
+            cmd = "nice sox %s -n stat 2>&1 | grep Length" % fpath
+            statOutput = runSystemCommand(cmd)
+            lengthInSeconds = float(statOutput.replace("Length (seconds):", "").strip())
+            lengthInMinutes = lengthInSeconds / 60.0
+            person[key]['audio_length'] = lengthInMinutes
+      i += 1
+      if i > 2: break
+
+  def runSystemCommand(cmd):
+    """Runs a system command.
+    If it does not exit successfully (error code 0), it raises an exception.
+
+    Returns: the stdout of running the command"""
+    logging.debug("Running command: %s" % cmd)
+    status, output = commands.getstatusoutput(cmd)
+    #logging.debug("Exit %r: %r" % (status, output))
+    if int(status) != 0:
+      errorMsg = "Could not process %s; returned %r with message: %r" % (file, status, output)
+      raise Exception(errorMsg)
+    return output
+
+
 def main():
   logging.info("Starting KONBIT SEARCH on port %d" % options.port)
   http_server = tornado.httpserver.HTTPServer(Application())