Commits

Timo Sulg  committed 7e75886

Reviewed chapter.8 and fixed some typos.
Add testCase, haveto finish it.

  • Participants
  • Parent commits 16c883b

Comments (0)

Files changed (4)

File Chapter8/.nltk_frmwrk.py.swp

Binary file added.

File Chapter8/nltk_frmwrk.py

         return  SynonymPhraseStopWordFilter(stop_filter, self.synonym_cache,
                 self.phrase_cache)
 
-class SynonymPhraseStopWordFilter(Filter):
+class SynonymPhraseStopWordFilter(TokenFilter):
     '''
    '''
     def __init__(self, input_stream, synonym_cache, phrase_cache):
             synonyms = self.synonym_cache.get(text)
             if synonyms:
                 for synonym in synonyms:
-                    syn_token = Token(synonom, cur_token.start_offset()
+                    syn_token = Token(synonom, cur_token.start_offset(),
                                 cur_token.end_offset(), "synonym")
                     syn_token.set_pos(0)
                     self.token_stack.put(syn_token)
                 token = token_stream.next()
                 if token:
                     text += " "
+            except:
+                pass
         return text
 
-
 class PhraseCache(Cache):
     ''' determines whether is of interest to us'''
     def __init__(self):
     def is_valid(text):
         result = False
         key = self.get_stemmed_text(text)
-        if self.valid_phrases.has_key(key)
-        return result
+        return self.valid_phrases.get_key(key, None)
 
 class SynonymCache(Cache):
     ''' '''
 
     def get_synonym(text):
         key = self.get_stemmed_text(text)
-        return = self.synonyms.get(key, None)
+        return self.synonyms.get(key, None)
 
 class TagCache(Cache):
     ''' '''
 
 #TODO: refactor it
     def normalize(self, tag_magnitudes):
-        if tag_magnitudes != None || len(tag_magnitudes) > 0:
+        if tag_magnitudes != None or len(tag_magnitudes) > 0:
             sum_sqd = sum([x.magnitude**2 for x in tag_magnitudes])
         if sum_sqd == 0.0:
             sum_sqd = 1.0 / len(tag_magnitudes)
         self.values[tm.tag] = norm_tag
 
     def get_tagmagnitudes(self):
-         ''' return sorted list of values of tagmagnitudes'''
+        ''' return sorted list of values of tagmagnitudes'''
         results = self.values.values()
         return sorted(results, reverse = True)
+
     def merge_magnitudes(a,b):
         return math.sqrt(a*a + b*b)
     #TODO: refactor it by using numpy
         pass
 
     #TODO: refactor it - to many exit points and awful nesting logic
-    def merge_tms(self, a, b)
+    def merge_tms(self, a, b):
         ''' merges tag magnitudes '''
         if a == None:
             if b == None:
     for token in ts:
         print token
 
-def demo_basic_operations()
+def demo_basic_operations():
     print("Demo of using tag magnitudes")
     tag_cache = TagCache()
     tm_list = [] #[TagMagnitudes]

File Chapter8/nltk_frmwrk.pyc

Binary file added.

File Chapter8/test_nltk_frmwrk.py

+'''
+Test cases for Chapter8 example - nltk framework
+
+'''
+
+import unittest
+from nltk_frmwrk import *
+
+class TestPorterStemStopWordAnalyzer(unittest.TestCase):
+    ''' tests functionality and doc examples for
+        PorterStemStopWordAnalyzer
+    '''
+    def setUp(self):
+        self.sw_analyzer = PorterStemStopWordAnalyzer()
+
+    def tearDown(self):
+        del self.sw_analyzer
+
+    def test_initialization(self):
+        self.assertIsNotNone(self.sw_analyzer.stopwords)
+
+class TestSynonymPhraseStopWordAnalyzer(unittest.TestCase):
+    '''
+    '''
+    def setUp(self):
+        pass
+    def tearDown(self):
+        pass
+    def test_initialization(self):
+        pass
+
+if __name__ == "__main__":
+    unittest.main()