Commits

Mikhail Korobov committed cc8bacc

correct whitespaces in Corpora.raw()

Comments (0)

Files changed (2)

opencorpora/__init__.py

         return self.__doc__
 
     def raw(self, fileids=None, categories=None):
-        return " ".join(self.iterwords(fileids, categories))
+        return "\n\n\n".join(
+            d.as_text() for d in
+            self.iterdocuments(fileids, categories)
+        )
 
     def words(self, fileids=None, categories=None):
         return list(self.iterwords(fileids, categories))

tests/test_corpora.py

 
     def test_raw(self):
         raw = self.corpus.raw(categories='Автор:Яна Сарно')
-        self.assertEqual(len(raw), 2100)
+        self.assertEqual(len(raw), 2053)
         self.assertIn('биеннале', raw)
 
         self.assertEqual(raw, self.corpus.raw(3))