Commits

Armin Rigo  committed c4a6668

Tests and fix (thanks defnull): handle zero-width matches differently in
greedy repetition operators, in what is hopefully the same way as
CPython.

  • Participants
  • Parent commits 1b71922

Comments (0)

Files changed (2)

File rpython/rlib/rsre/rsre_core.py

                 marks = p.marks
                 enum = p.enum.move_to_next_result(ctx)
             #
-            # zero-width match protection
             min = ctx.pat(ppos+1)
-            if self.num_pending >= min:
-                while enum is not None and ptr == ctx.match_end:
-                    enum = enum.move_to_next_result(ctx)
-                    # matched marks for zero-width assertions
-                    marks = ctx.match_marks
-            #
             if enum is not None:
                 # matched one more 'item'.  record it and continue.
+                last_match_length = ctx.match_end - ptr
                 self.pending = Pending(ptr, marks, enum, self.pending)
                 self.num_pending += 1
                 ptr = ctx.match_end
                 marks = ctx.match_marks
-                match_more = True
-            else:
-                # 'item' no longer matches.
-                if self.num_pending >= min:
-                    # try to match 'tail' if we have enough 'item'
-                    result = sre_match(ctx, tailppos, ptr, marks)
-                    if result is not None:
-                        self.subresult = result
-                        self.cur_ptr = ptr
-                        self.cur_marks = marks
-                        return self
-                match_more = False
+                if last_match_length == 0 and self.num_pending >= min:
+                    # zero-width protection: after an empty match, if there
+                    # are enough matches, don't try to match more.  Instead,
+                    # fall through to trying to match 'tail'.
+                    pass
+                else:
+                    match_more = True
+                    continue
+
+            # 'item' no longer matches.
+            if self.num_pending >= min:
+                # try to match 'tail' if we have enough 'item'
+                result = sre_match(ctx, tailppos, ptr, marks)
+                if result is not None:
+                    self.subresult = result
+                    self.cur_ptr = ptr
+                    self.cur_marks = marks
+                    return self
+            match_more = False
 
 class MinUntilMatchResult(AbstractUntilMatchResult):
 

File rpython/rlib/rsre/test/test_search.py

     def test_empty_maxuntil(self):
         r_code, r = get_code_and_re(r'(a?)+y')
         assert r.match('y')
+        assert r.match('aaayaaay').span() == (0, 4)
         res = rsre_core.match(r_code, 'y')
         assert res
+        res = rsre_core.match(r_code, 'aaayaaay')
+        assert res and res.span() == (0, 4)
         #
         r_code, r = get_code_and_re(r'(a?){4,6}y')
         assert r.match('y')
         res = rsre_core.match(r_code, 'y')
         assert res
 
+    def test_empty_maxuntil_2(self):
+        r_code, r = get_code_and_re(r'X(.*?)+X')
+        assert r.match('XfooXbarX').span() == (0, 5)
+        assert r.match('XfooXbarX').span(1) == (4, 4)
+        res = rsre_core.match(r_code, 'XfooXbarX')
+        assert res.span() == (0, 5)
+        assert res.span(1) == (4, 4)
+
     def test_empty_minuntil(self):
         r_code, r = get_code_and_re(r'(a?)+?y')
         #assert not r.match('z') -- CPython bug (at least 2.5) eats all memory