Commits

Ezio Melotti committed 11a31eb

#13987: HTMLParser is now able to handle EOFs in the middle of a construct.

  • Participants
  • Parent commits d40e1e1
  • Branches 2.7

Comments (0)

Files changed (3)

File Lib/HTMLParser.py

                 else:
                     break
                 if k < 0:
-                    if end:
-                        self.error("EOF in middle of construct")
-                    break
+                    if not end:
+                        break
+                    k = rawdata.find('>', i + 1)
+                    if k < 0:
+                        k = rawdata.find('<', i + 1)
+                        if k < 0:
+                            k = i + 1
+                    else:
+                        k += 1
+                    self.handle_data(rawdata[i:k])
                 i = self.updatepos(i, k)
             elif startswith("&#", i):
                 match = charref.match(rawdata, i)

File Lib/test/test_htmlparser.py

     def test_starttag_junk_chars(self):
         self._run_check("</>", [])
         self._run_check("</$>", [('comment', '$')])
-        self._parse_error("</")
-        self._parse_error("</a")
+        self._run_check("</", [('data', '</')])
+        self._run_check("</a", [('data', '</a')])
         self._parse_error("<a<a>")
         self._run_check("</a<a>", [('endtag', 'a<a')])
-        self._parse_error("<!")
-        self._parse_error("<a")
-        self._parse_error("<a foo='bar'")
-        self._parse_error("<a foo='bar")
-        self._parse_error("<a foo='>'")
-        self._parse_error("<a foo='>")
+        self._run_check("<!", [('data', '<!')])
+        self._run_check("<a", [('data', '<a')])
+        self._run_check("<a foo='bar'", [('data', "<a foo='bar'")])
+        self._run_check("<a foo='bar", [('data', "<a foo='bar")])
+        self._run_check("<a foo='>'", [('data', "<a foo='>'")])
+        self._run_check("<a foo='>", [('data', "<a foo='>")])
 
     def test_valid_doctypes(self):
         # from http://www.w3.org/QA/2002/04/valid-dtd-list.html
 Library
 -------
 
+- Issue #13987: HTMLParser is now able to handle EOFs in the middle of a
+  construct.
+
 - Issue #13015: Fix a possible reference leak in defaultdict.__repr__.
   Patch by Suman Saha.