Commits

Jakub Wilk committed f55e425

xhocr: add TODOs.

  • Participants
  • Parent commits 5f1bb6c

Comments (0)

Files changed (2)

misc/xhocr/hocr-corpus

             print('</chunk>')
 
     def merge_words(self, elements):
+        # TODO: Add support for UAX#29.
         elements = list(elements)
         base_element = elements[0]
         base_title_pattern = hocr.parse_title(base_element)[0]
             if lang:
                 lang = bcp47.from_tesseract(lang)
                 tag += ':' + lang
+            # TODO: Add font attribute.
             try:
                 prev_wconf = welements[(text, tag)]
             except LookupError:
                 welements[(text, tag)] = wconf
             else:
                 welements[(text, tag)] = max(wconf, prev_wconf)
-            # TODO: check if bounding boxes are matching
+            # TODO: Check if bounding boxes are matching.
             if wconf > max_wconf:
                 max_wconf = wconf
                 max_element = element

misc/xhocr/hocr.py

                         elem=xmlutils.repr(e),
                         value=e.get('title'),
                     )
-            # TODO: check if bounding boxes are matching
+            # TODO: Check if bounding boxes are matching.
             if wconf > max_wconf:
                 max_wconf = wconf
                 max_element = element