Victor Gavro committed 36d11b6

Added language code detection from html, more strict arguments validation

Comments (0)

Files changed (1)


         return bool(' '.join(block)))
+    def parse_language_code(self, document):
+        #in case this is part of document
+        root = document.getroottree().getroot()
+        code = root.attrib.get('lang', '') or root.attrib.get('xml:lang', '')
+        return code.split('-')[0].lower() or None
     def translate(self, document, target_language, source_language=None, **kwargs):
         if not isinstance(document, html.HtmlElement):
+            if not isinstance(document, basestring):
+                raise ValueError('Document must be string or lxml.html.HtmlElement')
             document = html.fromstring(document)
+        if not source_language:
+            #try to determine language from html
+            source_language = self.parse_language_code(document)
         source_blocks = self._parse_blocks(document)
         translation_map = dict.fromkeys(source_blocks)
