Commits

Flávio Coelho committed 0039701

exemplo de nlp melhorado

  • Participants
  • Parent commits 144a6be

Comments (0)

Files changed (1)

         self.words = corpus.words(fid)
         self.raw = corpus.raw(fid)
         self.sents = self.sent_seg(self.raw)
+        self.fd = None
         
     def find_ngrams(self,texto, palavra, n):
         """
         return fd
 
     def frequencia_de_palavras(self):
-        fd = nltk.FreqDist(w.lower() for w in self.words() if w not in stopwords)
+        """
+        Calcula frequencia de palavras no texto
+        """
+        if not self.fd:
+            self.fd = nltk.FreqDist(w.lower() for w in self.words() if w not in stopwords)
+        return self.fd
 
     def sent_seg(self,texto):
         return sent_tokenizer.tokenize(texto)
             if palavra in sent:
                 pos = sent.index(palavra)
                 left = ' '.join(sent[:pos])
-                right = ' '.join(sent[pos+1:])
-                print '%*s %s %-*s' %\
-                    (contexto, left[-contexto:], palavra, contexto, right[:contexto])
-        
+                right = ' '.join(sent[pos+len(palavra):])
+                print '%s %s %s'%(left[-contexto:], palavra, right[:contexto])
+
 if __name__=="__main__":
     from nltk.corpus import machado
-    
-    t1 = machado.words('romance/marm05.txt')
-    t1_raw = machado.raw('romance/marm05.txt')
-    #~ print machado.fileids()
+    import locale
+    print "numero de textos disponiveis: ",len(machado.fileids())
     PT = ProcessaTexto(machado,'romance/marm05.txt')
     ngs = PT.find_ngrams(PT.words, 'olho',4)
     for ng,c in ngs.iteritems():
         print ' '.join(ng), c
     frases = PT.sent_seg(PT.raw)
-    PT.concordance('dar',30)
+    PT.concordance('Lobo',30)
+    print locale.getlocale()
+    #T = nltk.Text(machado.words('romance/marm05.txt'))
+    #T.plot(100)