Commits

yanchuan sim committed 8af70d2

ignore 0 counts

Comments (0)

Files changed (1)

ycutils/bagofwords.py

 
     return self.dot_product(other)
   #end def
-  
+
   def __iadd__(self, other):
     """Adds two :class:`BOW` in place.
 
     :param other: the other BOW object to add."""
-    
+
     for w, c in other.iteritems(): self[w] += c
 
     return self
     :param s: a string of the form in ``word:count`` format."""
     for wc_str in s.split():
       w, c = wc_str.split(':')
+      c = float(c)
+      if c == 0: continue
       self[w] += float(c)
     #end for
   #end def
     return ' '.join([u'{}:{}'.format(w, c) for w, c in self.iteritems()])
   #end def
 
+  def to_sentence(self, sort=False):
+    """Returns a sentence which is equivalent (assuming non-negative integer counts) in content to the :class:`BOW` object.
+
+    :returns: a sentence string."""
+    words = []
+    for w, c in self.iteritems(): words += [w] * int(c)
+
+    if sort: words.sort()
+
+    return ' '.join(words)
+  #end def
+
   def dot_product(self, other):
     """Iterates through words in the counter and multiplies counts for the same words together.
 
     """:returns: the L1-norm of the bag of words vector."""
     return sum(self.itervalues())
   #end def
-  
+
   def normalize(self, sum_to=1.0):
     """Normalizes the counts of words, such that they sum up to :attr:`sum_to`.
-    
+
     :param sum_to: total count of words after normalizing."""
-    
+
     c = self.l1_norm() / float(sum_to)
     for w in self.iterkeys(): self[w] /= c
   #end def
   bow1_norm = 0.0
   bow2_norm = 0.0
 
-  for w, c in bow1.iteritems(): 
+  for w, c in bow1.iteritems():
     dot_prod += c * bow2[w]
     bow1_norm += c * c