Commits

Mikhail Korobov committed 56158b3

categories support for Corpora.catalog

  • Participants
  • Parent commits cd9d879

Comments (0)

Files changed (2)

opencorpora/__init__.py

         """
         Returns a list of corpus documents.
 
-        XXX: it can be very slow and memory-consuming if fileids is None;
-        use iterdocuments or pass fileids when possible.
+        XXX: it can be very slow and memory-consuming if fileids
+        and categories are both None; use iterdocuments or
+        pass fileids/categories when possible.
         """
         return list(self.iterdocuments(fileids, categories))
 
 
         return result
 
-    def catalog(self):
+    def catalog(self, categories=None):
         """
         Returns information about documents in corpora:
         a list of tuples (doc_id, doc_title).
         """
+        ids = self._filter_ids(None, categories)
         doc_meta = self._get_meta()
-        return [(doc_id, doc_meta[doc_id].title) for doc_id in doc_meta]
+        return [(doc_id, doc_meta[doc_id].title) for doc_id in ids]
 
     def get_document(self, doc_id):
         """

tests/test_corpora.py

             ('3', '00022 Последнее восстание в Сеуле'),
             ('4', '00023 За кота - ответишь!'),
         ])
-
+        self.assertEqual(self.corpus.catalog('Тема:ЧасКор:Культура*'), [
+            ('3', '00022 Последнее восстание в Сеуле'),
+            ('4', '00023 За кота - ответишь!'),
+        ])
         self.assertEqual(self.corpus.fileids(), ['1', '2', '3', '4'])
 
     def test_raw_loading(self):