Devin Jeanpierre avatar Devin Jeanpierre committed 8b64eab

Fixed infinite recursion problem with zero-width-assertions

Consider the regex '(?=)*'. That shouldn't recurse forever.

Strangely, Python allows (?=)*, but not, say, ()* . And so on.

See the following Python-List thread for when I asked Python-List
about it. As of this commit the discussion is only beginning,
maybe it'll get somewhere. :)

Comments (0)

Files changed (4)


     def testNonconsumingnessNegativeAssertion(self):
         self.assertEquivalent("(?!b)", "a")
+    def testAssertionsDontLoopForever(self):
+        self.assertEquivalent("(?=)*", "")
 if __name__ == '__main__':


         # mark_stack_base restoring before restoring marks
         self.assertEqual(re.match('(a)(?:(?=(b)*)c)*', 'abb').groups(),
                          ('a', None))
+        # this still doesn't work because grouping is wrong.
         self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
                          ('a', None, None))
         self.program = [] # in the form: (instruction, args)
         self.programs = [self.program]
         self.next_label = itertools.count().next
+        # more readable not to reuse next_label
+        self.next_progress_point = itertools.count().next
         self.upcoming_labels = []
         self.label2index = {}
         self.labels2indexes = [self.label2index]
         self.l_append(start, vm.SPLIT, splits)
-        self.label_next(findone)
+        self.l_append(findone, vm.PROGRESS, (self.next_progress_point(),))
         self.append(vm.JUMP, (start,))
 # This is unfortunately made necessary by some stuff.
 NOOP = "NOOP" # just go to the next instruction.
+# this permits $*
+# don't repeat zero-width matches to infinity. Just once is fine.
+# Specifically:
+#   """Fail if we haven't read anything since the last time we saw this
+#      particular PROGRESS instruction."""
 # Here we have the FORK instruction!
 # A fork produces a new "process". See the PikeProcess class for details.
         self.in_s = in_s
         self.threadlist = []
         self.pc2thread = {}
+        self.seen_progress_pcs = set()
     def __iter__(self):
         return iter(self.threadlist)
         elif op.opcode == SPLIT:
             for arg in op.arguments:
+        elif op.opcode == PROGRESS:
+            prog_point, = op.arguments
+            if prog_point in self.seen_progress_pcs:
+                return
+            else:
+                self.seen_progress_pcs.add(prog_point)
+                self.add_thread(thread.increment())
         elif op.opcode in [START_SAVE, END_SAVE]:
             slot, = op.arguments
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.