Commits

Guido van Rossum  committed 09e6b4e

Latest from Jeffrey Ollie.
Infinite failure stack, some bugs fixed (fastmap, star_jump, register bug).

  • Participants
  • Parent commits b0584bf
  • Branches legacy-trunk

Comments (0)

Files changed (2)

File Modules/regexpr.c

-/*
-
-regexpr.c
-
-Author: Tatu Ylonen <ylo@ngs.fi>
-
-Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
-
-Permission to use, copy, modify, distribute, and sell this software
-and its documentation for any purpose is hereby granted without fee,
-provided that the above copyright notice appear in all copies.  This
-software is provided "as is" without express or implied warranty.
-
-Created: Thu Sep 26 17:14:05 1991 ylo
-Last modified: Mon Nov  4 17:06:48 1991 ylo
-Ported to Think C: 19 Jan 1992 guido@cwi.nl
-
-This code draws many ideas from the regular expression packages by
-Henry Spencer of the University of Toronto and Richard Stallman of the
-Free Software Foundation.
-
-Emacs-specific code and syntax table code is almost directly borrowed
-from GNU regexp.
-
-*/
+/* regexpr.c
+ *
+ * Author: Tatu Ylonen <ylo@ngs.fi>
+ *
+ * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
+ *
+ * Permission to use, copy, modify, distribute, and sell this software
+ * and its documentation for any purpose is hereby granted without
+ * fee, provided that the above copyright notice appear in all copies.
+ * This software is provided "as is" without express or implied
+ * warranty.
+ *
+ * Created: Thu Sep 26 17:14:05 1991 ylo
+ * Last modified: Mon Nov  4 17:06:48 1991 ylo
+ * Ported to Think C: 19 Jan 1992 guido@cwi.nl
+ *
+ * This code draws many ideas from the regular expression packages by
+ * Henry Spencer of the University of Toronto and Richard Stallman of
+ * the Free Software Foundation.
+ *
+ * Emacs-specific code and syntax table code is almost directly borrowed
+ * from GNU regexp.
+ *
+ * Bugs fixed and lots of reorganization by Jeffrey C. Ollie, April
+ * 1997 Thanks for bug reports and ideas from Andrew Kuchling, Tim
+ * Peters, Guido van Rossum, Ka-Ping Yee, Sjoerd Mullender, and
+ * probably one or two others that I'm forgetting.
+ *
+ * $Id$ */
 
 #include "config.h" /* For Win* specific redefinition of printf c.s. */
 
-#include "myproto.h" /* For Py_PROTO macro --Guido */
+#include "myproto.h" /* For PROTO macro --Guido */
 
 #include <stdio.h>
+
+#ifndef NDEBUG
+#define NDEBUG 1
+#endif
+
 #include <assert.h>
 #include "regexpr.h"
 
 #endif /* __STDC__ */
 #endif /* THINK_C */
 
-#define MACRO_BEGIN do {
-#define MACRO_END } while (0)
+/* The stack implementation is taken from an idea by Andrew Kuchling.
+ * It's a doubly linked list of arrays. The advantages of this over a
+ * simple linked list are that the number of mallocs required are
+ * reduced. It also makes it possible to statically allocate enough
+ * space so that small patterns don't ever need to call malloc.
+ *
+ * The advantages over a single array is that is periodically
+ * realloced when more space is needed is that we avoid ever copying
+ * the stack. */
+
+/* item_t is the basic stack element.  Defined as a union of
+ * structures so that both registers, failure points, and counters can
+ * be pushed/popped from the stack.  There's nothing built into the
+ * item to keep track of whether a certain stack item is a register, a
+ * failure point, or a counter. */
+
+typedef union item_t
+{
+      struct
+      {
+	    int num;
+	    int level;
+	    char *start;
+	    char *end;
+      } reg;
+      struct
+      {
+	    int count;
+	    int level;
+	    int phantom;
+	    char *code;
+	    char *text;
+      } fail;
+      struct
+      {
+	    int num;
+	    int level;
+	    int count;
+      } cntr;
+} item_t;
+
+#define STACK_PAGE_SIZE 256
+#define NUM_REGISTERS 256
+
+/* A 'page' of stack items. */
+
+typedef struct item_page_t
+{
+      item_t items[STACK_PAGE_SIZE];
+      struct item_page_t *prev;
+      struct item_page_t *next;
+} item_page_t;
+
+
+typedef struct match_state
+{
+      /* Structure to encapsulate the stack. */
+      struct
+      {
+	    /* index into the curent page.  If index == 0 and you need
+	     * to pop and item, move to the previous page and set
+	     * index = STACK_PAGE_SIZE - 1.  Otherwise decrement index
+	     * to push a page. If index == STACK_PAGE_SIZE and you
+	     * need to push a page move to the next page and set index
+	     * = 0. If there is no new next page, allocate a new page
+	     * and link it in. Otherwise, increment index to push a
+	     * page. */
+	    int index;
+	    item_page_t *current; /* Pointer to the current page. */
+	    item_page_t first; /* First page is statically allocated. */
+      } stack;
+      char *start[NUM_REGISTERS];
+      char *end[NUM_REGISTERS];
+
+      int changed[NUM_REGISTERS];
+      /* The number of registers that have been pushed onto the stack
+       * since the last failure point. */
+      int count;
+      /* Used to control when registers need to be pushed onto the
+       * stack. */
+      int level;
+      /* The number of failure points on the stack. */
+      int point;
+} match_state;
+
+/* Discard the top 'count' stack items. */
+
+#define STACK_DISCARD(stack, count, on_error) \
+stack.index -= count; \
+while (stack.index < 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   stack.current = stack.current->prev; \
+   stack.index += STACK_PAGE_SIZE; \
+}
+
+/* Store a pointer to the previous item on the stack. Used to pop an
+ * item off of the stack. */
+
+#define STACK_PREV(stack, top, on_error) \
+if (stack.index == 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   stack.current = stack.current->prev; \
+   stack.index = STACK_PAGE_SIZE - 1; \
+} \
+else \
+   stack.index--; \
+top = &(stack.current->items[stack.index])
+
+/* Store a pointer to the next item on the stack. Used to push an item
+ * on to the stack. */
+
+#define STACK_NEXT(stack, top, on_error) \
+if (stack.index == STACK_PAGE_SIZE) \
+{ \
+   if (stack.current->next == NULL) \
+   { \
+      stack.current->next = malloc(sizeof(item_page_t)); \
+      if (stack.current->next == NULL) \
+         on_error; \
+      stack.current->next->prev = stack.current; \
+      stack.current->next->next = NULL; \
+   } \
+   stack.current = stack.current->next; \
+   stack.index = 0; \
+} \
+top = &(stack.current->items[stack.index++])
+
+/* Store a pointer to the item that is 'count' items back in the
+ * stack. STACK_BACK(stack, top, 1, on_error) is equivalent to
+ * STACK_TOP(stack, top, on_error).  */
+
+#define STACK_BACK(stack, top, count, on_error) \
+{ \
+   int index; \
+   item_page_t *current; \
+   current = stack.current; \
+   index = stack.index - (count); \
+   while (index < 0) \
+   { \
+      if (current->prev == NULL) \
+	 on_error; \
+      current = current->prev; \
+      index += STACK_PAGE_SIZE; \
+   } \
+   top = &(current->items[index]); \
+}
+
+/* Store a pointer to the top item on the stack. Execute the
+ * 'on_error' code if there are no items on the stack. */
+
+#define STACK_TOP(stack, top, on_error) \
+if (stack.index == 0) \
+{ \
+   if (stack.current->prev == NULL) \
+      on_error; \
+   top = &(stack.current->prev->items[STACK_PAGE_SIZE - 1]); \
+} \
+else \
+   top = &(stack.current->items[stack.index - 1])
+
+/* Test to see if the stack is empty */
+
+#define STACK_EMPTY(stack) ((stack.index == 0) && \
+			    (stack.current->prev == NULL))
+
+
+/* Initialize a state object */
+
+#define NEW_STATE(state) \
+memset(&state, 0, sizeof(match_state)); \
+state.stack.current = &state.stack.first; \
+state.level = 1
+
+/* Free any memory that might have been malloc'd */
+
+#define FREE_STATE(state) \
+while(state.stack.first.next != NULL) \
+{ \
+   state.stack.current = state.stack.first.next; \
+   state.stack.first.next = state.stack.current->next; \
+   free(state.stack.current); \
+}
+
+/* Return the start of register 'reg' */
+
+#define GET_REG_START(state, reg) (state.start[reg])
+
+/* Return the end of register 'reg' */
+
+#define GET_REG_END(state, reg) (state.end[reg])
+
+/* Set the start of register 'reg'. If the state of the register needs
+ * saving, push it on the stack. */
+
+#define SET_REG_START(state, reg, text, on_error) \
+if(state.changed[reg] < state.level) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->reg.num = reg; \
+   item->reg.start = state.start[reg]; \
+   item->reg.end = state.end[reg]; \
+   item->reg.level = state.changed[reg]; \
+   state.changed[reg] = state.level; \
+   state.count++; \
+} \
+state.start[reg] = text
+
+/* Set the end of register 'reg'. If the state of the register needs
+ * saving, push it on the stack. */
+
+#define SET_REG_END(state, reg, text, on_error) \
+if(state.changed[reg] < state.level) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->reg.num = reg; \
+   item->reg.start = state.start[reg]; \
+   item->reg.end = state.end[reg]; \
+   item->reg.level = state.changed[reg]; \
+   state.changed[reg] = state.level; \
+   state.count++; \
+} \
+state.end[reg] = text
+
+#define PUSH_FAILURE(state, xcode, xtext, on_error) \
+{ \
+   item_t *item; \
+   STACK_NEXT(state.stack, item, on_error); \
+   item->fail.code = xcode; \
+   item->fail.text = xtext; \
+   item->fail.count = state.count; \
+   item->fail.level = state.level; \
+   item->fail.phantom = 0; \
+   state.count = 0; \
+   state.level++; \
+   state.point++; \
+}
+
+/* Update the last failure point with a new position in the text. */
+
+/* #define UPDATE_FAILURE(state, xtext, on_error) \ */
+/* { \ */
+/*    item_t *item; \ */
+/*    STACK_DISCARD(state.stack, state.count, on_error); \ */
+/*    STACK_TOP(state.stack, item, on_error); \ */
+/*    item->fail.text = xtext; \ */
+/*    state.count = 0; \ */
+/* } */
+
+/* #define UPDATE_FAILURE(state, xtext, on_error) \ */
+/* { \ */
+/*    item_t *item; \ */
+/*    STACK_BACK(state.stack, item, state.count + 1, on_error); \ */
+/*    item->fail.text = xtext; \ */
+/* } */
+
+#define UPDATE_FAILURE(state, xtext, on_error) \
+{ \
+   item_t *item; \
+   STACK_BACK(state.stack, item, state.count + 1, on_error); \
+   if (!item->fail.phantom) \
+   { \
+      item_t *item2; \
+      STACK_NEXT(state.stack, item2, on_error); \
+      item2->fail.code = item->fail.code; \
+      item2->fail.text = xtext; \
+      item2->fail.count = state.count; \
+      item2->fail.level = state.level; \
+      item2->fail.phantom = 1; \
+      state.count = 0; \
+      state.level++; \
+      state.point++; \
+   } \
+   else \
+   { \
+      STACK_DISCARD(state.stack, state.count, on_error); \
+      STACK_TOP(state.stack, item, on_error); \
+      item->fail.text = xtext; \
+      state.count = 0; \
+      state.level++; \
+   } \
+}
+
+#define POP_FAILURE(state, xcode, xtext, on_empty, on_error) \
+{ \
+   item_t *item; \
+   do \
+   { \
+      while(state.count > 0) \
+      { \
+         STACK_PREV(state.stack, item, on_error); \
+         state.start[item->reg.num] = item->reg.start; \
+         state.end[item->reg.num] = item->reg.end; \
+         state.changed[item->reg.num] = item->reg.level; \
+         state.count--; \
+      } \
+      STACK_PREV(state.stack, item, on_empty); \
+      xcode = item->fail.code; \
+      xtext = item->fail.text; \
+      state.count = item->fail.count; \
+      state.level = item->fail.level; \
+      state.point--; \
+   } \
+   while (item->fail.text == NULL); \
+}
 
 enum regexp_compiled_ops /* opcodes for compiled regexp */
 {
   Cwordend,		/* match at end of word */
   Cwordbound,		/* match if at word boundary */
   Cnotwordbound,	/* match if not at word boundary */
-#ifdef emacs
-  Cemacs_at_dot,	/* emacs only: matches at dot */
-#endif /* emacs */
   Csyntaxspec,		/* matches syntax code (1 byte follows) */
   Cnotsyntaxspec	/* matches if syntax code does not match (1 byte foll)*/
 };
   Rwordend,		/* end of word */
   Rwordbound,		/* word bound */
   Rnotwordbound,	/* not word bound */
-#ifdef emacs
-  Remacs_at_dot,	/* emacs: at dot */
-  Remacs_syntaxspec,	/* syntaxspec */
-  Remacs_notsyntaxspec,	/* notsyntaxspec */
-#endif /* emacs */
   Rnum_ops
 };
 
 #define NUM_LEVELS  5    /* number of precedence levels in use */
 #define MAX_NESTING 100  /* max nesting level of operators */
 
-#ifdef emacs
-
-/* This code is for emacs compatibility only. */
-
-#include "config.h"
-#include "lisp.h"
-#include "buffer.h"
-#include "syntax.h"
-
-/* emacs defines NULL in some strange way? */
-#undef NULL
-#define NULL 0
-
-#else /* emacs */
-
 #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
 #define Sword 1
 
-#ifdef SYNTAX_TABLE
-char *re_syntax_table;
-#else
 static char re_syntax_table[256];
-#endif /* SYNTAX_TABLE */
 
-#endif /* emacs */
-
-static void re_compile_initialize Py_PROTO((void));
-static void re_compile_initialize()
+static void re_compile_initialize(void)
 {
-  int a;
+   int a;
   
-#if !defined(emacs) && !defined(SYNTAX_TABLE)
-  static int syntax_table_inited = 0;
-  
-  if (!syntax_table_inited)
-    {
+   static int syntax_table_inited = 0;
+   
+   if (!syntax_table_inited)
+   {
       syntax_table_inited = 1;
       memset(re_syntax_table, 0, 256);
       for (a = 'a'; a <= 'z'; a++)
-	re_syntax_table[a] = Sword;
+	 re_syntax_table[a] = Sword;
       for (a = 'A'; a <= 'Z'; a++)
-	re_syntax_table[a] = Sword;
+	 re_syntax_table[a] = Sword;
       for (a = '0'; a <= '9'; a++)
-	re_syntax_table[a] = Sword;
-    }
-#endif /* !emacs && !SYNTAX_TABLE */
-  re_compile_initialized = 1;
-  for (a = 0; a < 256; a++)
-    {
+	 re_syntax_table[a] = Sword;
+   }
+   re_compile_initialized = 1;
+   for (a = 0; a < 256; a++)
+   {
       regexp_plain_ops[a] = Rnormal;
       regexp_quoted_ops[a] = Rnormal;
-    }
-  for (a = '0'; a <= '9'; a++)
-    regexp_quoted_ops[a] = Rmemory;
-  regexp_plain_ops['\134'] = Rquote;
-  if (regexp_syntax & RE_NO_BK_PARENS)
-    {
+   }
+   for (a = '0'; a <= '9'; a++)
+      regexp_quoted_ops[a] = Rmemory;
+   regexp_plain_ops['\134'] = Rquote;
+   if (regexp_syntax & RE_NO_BK_PARENS)
+   {
       regexp_plain_ops['('] = Ropenpar;
       regexp_plain_ops[')'] = Rclosepar;
-    }
-  else
-    {
+   }
+   else
+   {
       regexp_quoted_ops['('] = Ropenpar;
       regexp_quoted_ops[')'] = Rclosepar;
-    }
-  if (regexp_syntax & RE_NO_BK_VBAR)
-    regexp_plain_ops['\174'] = Ror;
-  else
-    regexp_quoted_ops['\174'] = Ror;
-  regexp_plain_ops['*'] = Rstar;
-  if (regexp_syntax & RE_BK_PLUS_QM)
-    {
+   }
+   if (regexp_syntax & RE_NO_BK_VBAR)
+      regexp_plain_ops['\174'] = Ror;
+   else
+      regexp_quoted_ops['\174'] = Ror;
+   regexp_plain_ops['*'] = Rstar;
+   if (regexp_syntax & RE_BK_PLUS_QM)
+   {
       regexp_quoted_ops['+'] = Rplus;
       regexp_quoted_ops['?'] = Roptional;
-    }
-  else
-    {
+   }
+   else
+   {
       regexp_plain_ops['+'] = Rplus;
       regexp_plain_ops['?'] = Roptional;
-    }
-  if (regexp_syntax & RE_NEWLINE_OR)
-    regexp_plain_ops['\n'] = Ror;
-  regexp_plain_ops['\133'] = Ropenset;
-  regexp_plain_ops['\136'] = Rbol;
-  regexp_plain_ops['$'] = Reol;
-  regexp_plain_ops['.'] = Ranychar;
-  if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
-    {
-#ifdef emacs
-      regexp_quoted_ops['='] = Remacs_at_dot;
-      regexp_quoted_ops['s'] = Remacs_syntaxspec;
-      regexp_quoted_ops['S'] = Remacs_notsyntaxspec;
-#endif /* emacs */
+   }
+   if (regexp_syntax & RE_NEWLINE_OR)
+      regexp_plain_ops['\n'] = Ror;
+   regexp_plain_ops['\133'] = Ropenset;
+   regexp_plain_ops['\136'] = Rbol;
+   regexp_plain_ops['$'] = Reol;
+   regexp_plain_ops['.'] = Ranychar;
+   if (!(regexp_syntax & RE_NO_GNU_EXTENSIONS))
+   {
       regexp_quoted_ops['w'] = Rwordchar;
       regexp_quoted_ops['W'] = Rnotwordchar;
       regexp_quoted_ops['<'] = Rwordbeg;
       regexp_quoted_ops['B'] = Rnotwordbound;
       regexp_quoted_ops['`'] = Rbegbuf;
       regexp_quoted_ops['\''] = Rendbuf;
-    }
-  if (regexp_syntax & RE_ANSI_HEX)
-    regexp_quoted_ops['v'] = Rextended_memory;
-  for (a = 0; a < Rnum_ops; a++)
-    regexp_precedences[a] = 4;
-  if (regexp_syntax & RE_TIGHT_VBAR)
-    {
+   }
+   if (regexp_syntax & RE_ANSI_HEX)
+      regexp_quoted_ops['v'] = Rextended_memory;
+   for (a = 0; a < Rnum_ops; a++)
+      regexp_precedences[a] = 4;
+   if (regexp_syntax & RE_TIGHT_VBAR)
+   {
       regexp_precedences[Ror] = 3;
       regexp_precedences[Rbol] = 2;
       regexp_precedences[Reol] = 2;
-    }
-  else
-    {
+   }
+   else
+   {
       regexp_precedences[Ror] = 2;
       regexp_precedences[Rbol] = 3;
       regexp_precedences[Reol] = 3;
-    }
-  regexp_precedences[Rclosepar] = 1;
-  regexp_precedences[Rend] = 0;
-  regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
-  regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
+   }
+   regexp_precedences[Rclosepar] = 1;
+   regexp_precedences[Rend] = 0;
+   regexp_context_indep_ops = (regexp_syntax & RE_CONTEXT_INDEP_OPS) != 0;
+   regexp_ansi_sequences = (regexp_syntax & RE_ANSI_HEX) != 0;
 }
 
-int re_set_syntax(syntax)
-int syntax;
+int re_set_syntax(int syntax)
 {
-  int ret;
-
-  ret = regexp_syntax;
-  regexp_syntax = syntax;
-  re_syntax = syntax; /* Exported copy */
-  re_compile_initialize();
-  return ret;
+   int ret;
+   
+   ret = regexp_syntax;
+   regexp_syntax = syntax;
+   re_syntax = syntax; /* Exported copy */
+   re_compile_initialize();
+   return ret;
 }
 
-static int hex_char_to_decimal Py_PROTO((int));
-static int hex_char_to_decimal(ch)
-int ch;
+static int hex_char_to_decimal(int ch)
 {
-  if (ch >= '0' && ch <= '9')
-    return ch - '0';
-  if (ch >= 'a' && ch <= 'f')
-    return ch - 'a' + 10;
-  if (ch >= 'A' && ch <= 'F')
-    return ch - 'A' + 10;
-  return 16;
+   if (ch >= '0' && ch <= '9')
+      return ch - '0';
+   if (ch >= 'a' && ch <= 'f')
+      return ch - 'a' + 10;
+   if (ch >= 'A' && ch <= 'F')
+      return ch - 'A' + 10;
+   return 16;
 }
 
-char *re_compile_pattern(regex, size, bufp)
-char *regex;
-int size;
-regexp_t bufp;
+static void re_compile_fastmap_aux(char *code,
+				   int pos,
+				   char *visited,
+				   char *can_be_null,
+				   char *fastmap)
 {
-  int a, pos, op, current_level, level, opcode;
-  int pattern_offset = 0, alloc;
-  int starts[NUM_LEVELS * MAX_NESTING], starts_base;
-  int future_jumps[MAX_NESTING], num_jumps;
-  unsigned char ch = '\0';
-  char *pattern, *translate;
-  int next_register, paren_depth, num_open_registers, open_registers[RE_NREGS];
-  int beginning_context;
+   int a;
+   int b;
+   int syntaxcode;
+   
+   if (visited[pos])
+      return;  /* we have already been here */
+   visited[pos] = 1;
+   for (;;)
+      switch (code[pos++])
+      {
+	 case Cend:
+	 {
+	    *can_be_null = 1;
+	    return;
+	 }
+	 case Cbol:
+	 case Cbegbuf:
+	 case Cendbuf:
+	 case Cwordbeg:
+	 case Cwordend:
+	 case Cwordbound:
+	 case Cnotwordbound:
+	 {
+	    break;
+	 }
+	 case Csyntaxspec:
+	 {
+	    syntaxcode = code[pos++];
+	    for (a = 0; a < 256; a++)
+	       if (SYNTAX(a) == syntaxcode)
+		  fastmap[a] = 1;
+	    return;
+	 }
+	 case Cnotsyntaxspec:
+	 {
+	    syntaxcode = code[pos++];
+	    for (a = 0; a < 256; a++)
+	       if (SYNTAX(a) != syntaxcode)
+		  fastmap[a] = 1;
+	    return;
+	 }
+	 case Ceol:
+	 {
+	    fastmap['\n'] = 1;
+	    if (*can_be_null == 0)
+	       *can_be_null = 2; /* can match null, but only at end of buffer*/
+	    return;
+	 }
+	 case Cset:
+	 {
+	    for (a = 0; a < 256/8; a++)
+	       if (code[pos + a] != 0)
+		  for (b = 0; b < 8; b++)
+		     if (code[pos + a] & (1 << b))
+			fastmap[(a << 3) + b] = 1;
+	    pos += 256/8;
+	    return;
+	 }
+	 case Cexact:
+	 {
+	    fastmap[(unsigned char)code[pos]] = 1;
+	    return;
+	 }
+	 case Canychar:
+	 {
+	    for (a = 0; a < 256; a++)
+	       if (a != '\n')
+		  fastmap[a] = 1;
+	    return;
+	 }
+	 case Cstart_memory:
+	 case Cend_memory:
+	 {
+	    pos++;
+	    break;
+	 }
+	 case Cmatch_memory:
+	 {
+	    for (a = 0; a < 256; a++)
+	       fastmap[a] = 1;
+	    *can_be_null = 1;
+	    return;
+	 }
+	 case Cjump:
+	 case Cdummy_failure_jump:
+	 case Cupdate_failure_jump:
+	 case Cstar_jump:
+	 {
+	    a = (unsigned char)code[pos++];
+	    a |= (unsigned char)code[pos++] << 8;
+	    pos += (int)(short)a;
+	    if (visited[pos])
+	    {
+	       /* argh... the regexp contains empty loops.  This is not
+		  good, as this may cause a failure stack overflow when
+		  matching.  Oh well. */
+	       /* this path leads nowhere; pursue other paths. */
+	       return;
+	    }
+	    visited[pos] = 1;
+	    break;
+	 }
+	 case Cfailure_jump:
+	 {
+	    a = (unsigned char)code[pos++];
+	    a |= (unsigned char)code[pos++] << 8;
+	    a = pos + (int)(short)a;
+	    re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
+	    break;
+	 }
+	 default:
+	 {
+	    abort();  /* probably some opcode is missing from this switch */
+	    /*NOTREACHED*/
+	 }
+      }
+}
 
-#define NEXTCHAR(var)			\
-  MACRO_BEGIN				\
-    if (pos >= size)			\
-      goto ends_prematurely;		\
-    (var) = regex[pos];			\
-    pos++;				\
-  MACRO_END
+static int re_do_compile_fastmap(char *buffer,
+				 int used,
+				 int pos,
+				 char *can_be_null,
+				 char *fastmap)
+{
+   char small_visited[512], *visited;
+   
+   if (used <= sizeof(small_visited))
+      visited = small_visited;
+   else
+   {
+      visited = malloc(used);
+      if (!visited)
+	 return 0;
+   }
+   *can_be_null = 0;
+   memset(fastmap, 0, 256);
+   memset(visited, 0, used);
+   re_compile_fastmap_aux(buffer, pos, visited, can_be_null, fastmap);
+   if (visited != small_visited)
+      free(visited);
+   return 1;
+}
 
-#define ALLOC(amount)				\
-  MACRO_BEGIN					\
-    if (pattern_offset+(amount) > alloc)	\
-      {						\
-	alloc += 256 + (amount);		\
-	pattern = realloc(pattern, alloc);	\
-	if (!pattern)				\
-	  goto out_of_memory;			\
-      }						\
-  MACRO_END
+void re_compile_fastmap(regexp_t bufp)
+{
+   if (!bufp->fastmap || bufp->fastmap_accurate)
+      return;
+   assert(bufp->used > 0);
+   if (!re_do_compile_fastmap(bufp->buffer,
+			      bufp->used,
+			      0,
+			      &bufp->can_be_null,
+			      bufp->fastmap))
+      return;
+   if (bufp->buffer[0] == Cbol)
+      bufp->anchor = 1;   /* begline */
+   else
+      if (bufp->buffer[0] == Cbegbuf)
+	 bufp->anchor = 2; /* begbuf */
+      else
+	 bufp->anchor = 0; /* none */
+   bufp->fastmap_accurate = 1;
+}
+
+/* 
+ * star is coded as:
+ * 1: failure_jump 2
+ *    ... code for operand of star
+ *    star_jump 1
+ * 2: ... code after star
+ *
+ * We change the star_jump to update_failure_jump if we can determine
+ * that it is safe to do so; otherwise we change it to an ordinary
+ * jump.
+ *
+ * plus is coded as
+ *
+ *    jump 2
+ * 1: failure_jump 3
+ * 2: ... code for operand of plus
+ *    star_jump 1
+ * 3: ... code after plus
+ *
+ * For star_jump considerations this is processed identically to star.
+ *
+ */
+
+static int re_optimize_star_jump(regexp_t bufp, char *code)
+{
+   char map[256];
+   char can_be_null;
+   char *p1;
+   char *p2;
+   char ch;
+   int a;
+   int b;
+
+   a = (unsigned char)*code++;
+   a |= (unsigned char)*code++ << 8;
+   a = (int)(short)a;
+
+   p1 = code + a + 3; /* skip the failure_jump */
+   assert(p1[-3] == Cfailure_jump);
+   p2 = code;
+   /* p1 points inside loop, p2 points to after loop */
+   if (!re_do_compile_fastmap(bufp->buffer, bufp->used,
+			      p2 - bufp->buffer, &can_be_null, map))
+      goto make_normal_jump;
+   
+   /* If we might introduce a new update point inside the
+    * loop, we can't optimize because then update_jump would
+    * update a wrong failure point.  Thus we have to be
+    * quite careful here.
+    */
+      
+   /* loop until we find something that consumes a character */
+  loop_p1:
+   switch (*p1++)
+   {
+      case Cbol:
+      case Ceol:
+      case Cbegbuf:
+      case Cendbuf:
+      case Cwordbeg:
+      case Cwordend:
+      case Cwordbound:
+      case Cnotwordbound:
+      {
+	 goto loop_p1;
+      }
+      case Cstart_memory:
+      case Cend_memory:
+      {
+	 p1++;
+	 goto loop_p1;
+      }
+      case Cexact:
+      {
+	 ch = (unsigned char)*p1++;
+	 if (map[ch])
+	    goto make_normal_jump;
+	 break;
+      }
+      case Canychar:
+      {
+	 for (b = 0; b < 256; b++)
+	    if (b != '\n' && map[b])
+	       goto make_normal_jump;
+	 break;
+      }
+      case Cset:
+      {
+	 for (b = 0; b < 256; b++)
+	    if ((p1[b >> 3] & (1 << (b & 7))) && map[b])
+	       goto make_normal_jump;
+	 p1 += 256/8;
+	 break;
+      }
+      default:
+      {
+	 goto make_normal_jump;
+      }
+   }
+   /* now we know that we can't backtrack. */
+   while (p1 != p2 - 3)
+   {
+      switch (*p1++)
+      {
+	 case Cend:
+	 {
+	    return 0;
+	 }
+	 case Cbol:
+	 case Ceol:
+	 case Canychar:
+	 case Cbegbuf:
+	 case Cendbuf:
+	 case Cwordbeg:
+	 case Cwordend:
+	 case Cwordbound:
+	 case Cnotwordbound:
+	 {
+	    break;
+	 }
+	 case Cset:
+	 {
+	    p1 += 256/8;
+	    break;
+	 }
+	 case Cexact:
+	 case Cstart_memory:
+	 case Cend_memory:
+	 case Cmatch_memory:
+	 case Csyntaxspec:
+	 case Cnotsyntaxspec:
+	 {
+	    p1++;
+	    break;
+	 }
+	 case Cjump:
+	 case Cstar_jump:
+	 case Cfailure_jump:
+	 case Cupdate_failure_jump:
+	 case Cdummy_failure_jump:
+	 {
+	    goto make_normal_jump;
+	 }
+	 default:
+	 {
+	    return 0;
+	    break;
+	 }
+      }
+   }
+
+  make_update_jump:
+   code -= 3;
+   a += 3;  /* jump to after the Cfailure_jump */
+   code[0] = Cupdate_failure_jump;
+   code[1] = a & 0xff;
+   code[2] = a >> 8;
+   return 1;
+
+  make_normal_jump:
+   code -= 3;
+   *code = Cjump;
+   return 1;
+}
+
+static int re_optimize(regexp_t bufp)
+{
+   char *code;
+
+   code = bufp->buffer;
+
+   while(1)
+   {
+      switch (*code++)
+      {
+	 case Cend:
+	 {
+	    return 1;
+	 }
+	 case Canychar:
+	 case Cbol:
+	 case Ceol:
+	 case Cbegbuf:
+	 case Cendbuf:
+	 case Cwordbeg:
+	 case Cwordend:
+	 case Cwordbound:
+	 case Cnotwordbound:
+	 {
+	    break;
+	 }
+	 case Cset:
+	 {
+	    code += 256/8;
+	    break;
+	 }
+	 case Cexact:
+	 case Cstart_memory:
+	 case Cend_memory:
+	 case Cmatch_memory:
+	 case Csyntaxspec:
+	 case Cnotsyntaxspec:
+	 {
+	    code++;
+	    break;
+	 }
+	 case Cstar_jump:
+	 {
+	    if (!re_optimize_star_jump(bufp, code))
+	    {
+	       return 0;
+	    }
+	    /* fall through */
+	 }
+	 case Cupdate_failure_jump:
+	 case Cjump:
+	 case Cdummy_failure_jump:
+	 case Cfailure_jump:
+	 {
+	    code += 2;
+	    break;
+	 }
+	 default:
+	 {
+	    return 0;
+	 }
+      }
+   }
+}
+
+#define NEXTCHAR(var) \
+{ \
+   if (pos >= size) \
+      goto ends_prematurely; \
+   (var) = regex[pos]; \
+   pos++; \
+}
+
+#define ALLOC(amount) \
+{ \
+   if (pattern_offset+(amount) > alloc) \
+   { \
+      alloc += 256 + (amount); \
+      pattern = realloc(pattern, alloc); \
+      if (!pattern) \
+	 goto out_of_memory; \
+   } \
+}
 
 #define STORE(ch) pattern[pattern_offset++] = (ch)
 
 
 #define SET_LEVEL_START starts[starts_base + current_level] = pattern_offset
 
-#define PUSH_LEVEL_STARTS if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
-		            starts_base += NUM_LEVELS;			\
-                          else						\
-			    goto too_complex
+#define PUSH_LEVEL_STARTS \
+   if (starts_base < (MAX_NESTING-1)*NUM_LEVELS) \
+      starts_base += NUM_LEVELS; \
+   else \
+      goto too_complex
 
 #define POP_LEVEL_STARTS starts_base -= NUM_LEVELS
 
-#define PUT_ADDR(offset,addr)				\
-  MACRO_BEGIN						\
-    int disp = (addr) - (offset) - 2;			\
-    pattern[(offset)] = disp & 0xff;			\
-    pattern[(offset)+1] = (disp>>8) & 0xff;		\
-  MACRO_END
+#define PUT_ADDR(offset,addr) \
+{ \
+   int disp = (addr) - (offset) - 2; \
+   pattern[(offset)] = disp & 0xff; \
+   pattern[(offset)+1] = (disp>>8) & 0xff; \
+}
 
-#define INSERT_JUMP(pos,type,addr)			\
-  MACRO_BEGIN						\
-    int a, p = (pos), t = (type), ad = (addr);		\
-    for (a = pattern_offset - 1; a >= p; a--)		\
-      pattern[a + 3] = pattern[a];			\
-    pattern[p] = t;					\
-    PUT_ADDR(p+1,ad);					\
-    pattern_offset += 3;				\
-  MACRO_END
-
+#define INSERT_JUMP(pos,type,addr) \
+{ \
+   int a, p = (pos), t = (type), ad = (addr); \
+   for (a = pattern_offset - 1; a >= p; a--) \
+      pattern[a + 3] = pattern[a]; \
+   pattern[p] = t; \
+   PUT_ADDR(p+1,ad); \
+   pattern_offset += 3; \
+}
 #define SETBIT(buf,offset,bit) (buf)[(offset)+(bit)/8] |= (1<<((bit) & 7))
 
-#define SET_FIELDS				\
-  MACRO_BEGIN					\
-    bufp->allocated = alloc;			\
-    bufp->buffer = pattern;			\
-    bufp->used = pattern_offset;		\
-  MACRO_END
+#define SET_FIELDS \
+{ \
+   bufp->allocated = alloc; \
+   bufp->buffer = pattern; \
+   bufp->used = pattern_offset; \
+}
     
-#define GETHEX(var)						\
-  MACRO_BEGIN							\
-    char gethex_ch, gethex_value;				\
-    NEXTCHAR(gethex_ch);					\
-    gethex_value = hex_char_to_decimal(gethex_ch);		\
-    if (gethex_value == 16)					\
-      goto hex_error;						\
-    NEXTCHAR(gethex_ch);					\
-    gethex_ch = hex_char_to_decimal(gethex_ch);			\
-    if (gethex_ch == 16)					\
-      goto hex_error;						\
-    (var) = gethex_value * 16 + gethex_ch;			\
-  MACRO_END
+#define GETHEX(var) \
+{ \
+   char gethex_ch, gethex_value; \
+   NEXTCHAR(gethex_ch); \
+   gethex_value = hex_char_to_decimal(gethex_ch); \
+   if (gethex_value == 16) \
+      goto hex_error; \
+   NEXTCHAR(gethex_ch); \
+   gethex_ch = hex_char_to_decimal(gethex_ch); \
+   if (gethex_ch == 16) \
+      goto hex_error; \
+   (var) = gethex_value * 16 + gethex_ch; \
+}
 
-#define ANSI_TRANSLATE(ch)				\
-  MACRO_BEGIN						\
-    switch (ch)						\
-      {							\
-      case 'a':						\
-      case 'A':						\
-	ch = 7; /* audible bell */			\
-	break;						\
-      case 'b':						\
-      case 'B':						\
-	ch = 8; /* backspace */				\
-	break;						\
-      case 'f':						\
-      case 'F':						\
-	ch = 12; /* form feed */			\
-	break;						\
-      case 'n':						\
-      case 'N':						\
-	ch = 10; /* line feed */			\
-	break;						\
-      case 'r':						\
-      case 'R':						\
-	ch = 13; /* carriage return */			\
-	break;						\
-      case 't':						\
-      case 'T':						\
-	ch = 9; /* tab */				\
-	break;						\
-      case 'v':						\
-      case 'V':						\
-	ch = 11; /* vertical tab */			\
-	break;						\
-      case 'x': /* hex code */				\
-      case 'X':						\
-	GETHEX(ch);					\
-	break;						\
-      default:						\
-	/* other characters passed through */		\
-	if (translate)					\
-	  ch = translate[(unsigned char)ch];		\
-	break;						\
-      }							\
-  MACRO_END
+#define ANSI_TRANSLATE(ch)  \
+{ \
+   switch (ch) \
+   { \
+      case 'a': \
+      case 'A': \
+      { \
+	 ch = 7; /* audible bell */ \
+	 break; \
+      } \
+      case 'b': \
+      case 'B': \
+      { \
+	 ch = 8; /* backspace */ \
+	 break; \
+      } \
+      case 'f': \
+      case 'F': \
+      { \
+	 ch = 12; /* form feed */ \
+	 break; \
+      } \
+      case 'n': \
+      case 'N': \
+      { \
+	 ch = 10; /* line feed */ \
+	 break; \
+      } \
+      case 'r': \
+      case 'R': \
+      { \
+	 ch = 13; /* carriage return */ \
+	 break; \
+      } \
+      case 't': \
+      case 'T': \
+      { \
+	 ch = 9; /* tab */ \
+	 break; \
+      } \
+      case 'v': \
+      case 'V': \
+      { \
+	 ch = 11; /* vertical tab */ \
+	 break; \
+      } \
+      case 'x': /* hex code */ \
+      case 'X': \
+      { \
+	 GETHEX(ch); \
+	 break; \
+      } \
+      default: \
+      { \
+	 /* other characters passed through */ \
+	 if (translate) \
+	    ch = translate[(unsigned char)ch]; \
+	 break; \
+      } \
+   } \
+}
 
-  if (!re_compile_initialized)
-    re_compile_initialize();
-  bufp->used = 0;
-  bufp->fastmap_accurate = 0;
-  bufp->uses_registers = 0;
-  translate = bufp->translate;
-  pattern = bufp->buffer;
-  alloc = bufp->allocated;
-  if (alloc == 0 || pattern == NULL)
-    {
+char *re_compile_pattern(char *regex, int size, regexp_t bufp)
+{
+   int a;
+   int pos;
+   int op;
+   int current_level;
+   int level;
+   int opcode;
+   int pattern_offset, alloc;
+   int starts[NUM_LEVELS * MAX_NESTING];
+   int starts_base;
+   int future_jumps[MAX_NESTING];
+   int num_jumps;
+   unsigned char ch;
+   char *pattern;
+   char *translate;
+   int next_register;
+   int paren_depth;
+   int num_open_registers;
+   int open_registers[RE_NREGS];
+   int beginning_context;
+
+   if (!re_compile_initialized)
+      re_compile_initialize();
+   bufp->used = 0;
+   bufp->fastmap_accurate = 0;
+   bufp->uses_registers = 0;
+   translate = bufp->translate;
+   pattern = bufp->buffer;
+   alloc = bufp->allocated;
+   if (alloc == 0 || pattern == NULL)
+   {
       alloc = 256;
       pattern = malloc(alloc);
       if (!pattern)
-	goto out_of_memory;
-    }
-  pattern_offset = 0;
-  starts_base = 0;
-  num_jumps = 0;
-  current_level = 0;
-  SET_LEVEL_START;
-  num_open_registers = 0;
-  next_register = 1;
-  paren_depth = 0;
-  beginning_context = 1;
-  op = -1;
-  /* we use Rend dummy to ensure that pending jumps are updated (due to
-     low priority of Rend) before exiting the loop. */
-  pos = 0;
-  while (op != Rend)
-    {
+	 goto out_of_memory;
+   }
+   pattern_offset = 0;
+   starts_base = 0;
+   num_jumps = 0;
+   current_level = 0;
+   SET_LEVEL_START;
+   num_open_registers = 0;
+   next_register = 1;
+   paren_depth = 0;
+   beginning_context = 1;
+   op = -1;
+   /* we use Rend dummy to ensure that pending jumps are updated (due to
+      low priority of Rend) before exiting the loop. */
+   pos = 0;
+   while (op != Rend)
+   {
       if (pos >= size)
-	op = Rend;
+	 op = Rend;
       else
-	{
-	  NEXTCHAR(ch);
-	  if (translate)
+      {
+	 NEXTCHAR(ch);
+	 if (translate)
 	    ch = translate[(unsigned char)ch];
-	  op = regexp_plain_ops[(unsigned char)ch];
-	  if (op == Rquote)
-	    {
-	      NEXTCHAR(ch);
-	      op = regexp_quoted_ops[(unsigned char)ch];
-	      if (op == Rnormal && regexp_ansi_sequences)
-		ANSI_TRANSLATE(ch);
-	    }
-	}
+	 op = regexp_plain_ops[(unsigned char)ch];
+	 if (op == Rquote)
+	 {
+	    NEXTCHAR(ch);
+	    op = regexp_quoted_ops[(unsigned char)ch];
+	    if (op == Rnormal && regexp_ansi_sequences)
+	       ANSI_TRANSLATE(ch);
+	 }
+      }
       level = regexp_precedences[op];
       /* printf("ch='%c' op=%d level=%d current_level=%d curlevstart=%d\n",
-	     ch, op, level, current_level, CURRENT_LEVEL_START); */
+	 ch, op, level, current_level, CURRENT_LEVEL_START); */
       if (level > current_level)
-	{
-	  for (current_level++; current_level < level; current_level++)
+      {
+	 for (current_level++; current_level < level; current_level++)
 	    SET_LEVEL_START;
-	  SET_LEVEL_START;
-	}
+	 SET_LEVEL_START;
+      }
       else
-	if (level < current_level)
-	  {
+	 if (level < current_level)
+	 {
 	    current_level = level;
 	    for (;num_jumps > 0 &&
-		 future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
+		    future_jumps[num_jumps-1] >= CURRENT_LEVEL_START;
 		 num_jumps--)
-	      PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
-	  }
+	       PUT_ADDR(future_jumps[num_jumps-1], pattern_offset);
+	 }
       switch (op)
-	{
-	case Rend:
-	  break;
-	case Rnormal:
-	normal_char:
-	  opcode = Cexact;
-	store_opcode_and_arg: /* opcode & ch must be set */
-	  SET_LEVEL_START;
-	  ALLOC(2);
-	  STORE(opcode);
-	  STORE(ch);
-	  break;
-	case Ranychar:
-	  opcode = Canychar;
-	store_opcode:
-	  SET_LEVEL_START;
-	  ALLOC(1);
-	  STORE(opcode);
-	  break;
-	case Rquote:
-	  abort();
-	  /*NOTREACHED*/
-	case Rbol:
-	  if (!beginning_context)
-	    if (regexp_context_indep_ops)
-	      goto op_error;
-	    else
-	      goto normal_char;
-	  opcode = Cbol;
-	  goto store_opcode;
-	case Reol:
-	  if (!((pos >= size) ||
-		((regexp_syntax & RE_NO_BK_VBAR) ?
-		 (regex[pos] == '\174') :
-		 (pos+1 < size && regex[pos] == '\134' &&
-		  regex[pos+1] == '\174')) ||
-		((regexp_syntax & RE_NO_BK_PARENS)?
-		 (regex[pos] == ')'):
-		 (pos+1 < size && regex[pos] == '\134' &&
-		  regex[pos+1] == ')'))))
-	    if (regexp_context_indep_ops)
-	      goto op_error;
-	    else
-	      goto normal_char;
-	  opcode = Ceol;
-	  goto store_opcode;
-	  /* NOTREACHED */
-	  break;
-	case Roptional:
-	  if (beginning_context)
-	    if (regexp_context_indep_ops)
-	      goto op_error;
-	    else
-	      goto normal_char;
-	  if (CURRENT_LEVEL_START == pattern_offset)
-	    break; /* ignore empty patterns for ? */
-	  ALLOC(3);
-	  INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
-		      pattern_offset + 3);
-	  break;
-	case Rstar:
-	case Rplus:
-	  if (beginning_context)
-	    if (regexp_context_indep_ops)
-	      goto op_error;
-	    else
-	      goto normal_char;
-	  if (CURRENT_LEVEL_START == pattern_offset)
-	    break; /* ignore empty patterns for + and * */
-	  ALLOC(9);
-	  INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
-		      pattern_offset + 6);
-	  INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
-	  if (op == Rplus)  /* jump over initial failure_jump */
-	    INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
-			CURRENT_LEVEL_START + 6);
-	  break;
-	case Ror:
-	  ALLOC(6);
-	  INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
-		      pattern_offset + 6);
-	  if (num_jumps >= MAX_NESTING)
-	    goto too_complex;
-	  STORE(Cjump);
-	  future_jumps[num_jumps++] = pattern_offset;
-	  STORE(0);
-	  STORE(0);
-	  SET_LEVEL_START;
-	  break;
-	case Ropenpar:
-	  SET_LEVEL_START;
-	  if (next_register < RE_NREGS)
+      {
+	 case Rend:
+	 {
+	    break;
+	 }
+	 case Rnormal:
+	 {
+	   normal_char:
+	    opcode = Cexact;
+	   store_opcode_and_arg: /* opcode & ch must be set */
+	    SET_LEVEL_START;
+	    ALLOC(2);
+	    STORE(opcode);
+	    STORE(ch);
+	    break;
+	 }
+	 case Ranychar:
+	 {
+	    opcode = Canychar;
+	   store_opcode:
+	    SET_LEVEL_START;
+	    ALLOC(1);
+	    STORE(opcode);
+	    break;
+	 }
+	 case Rquote:
+	 {
+	    abort();
+	    /*NOTREACHED*/
+	 }
+	 case Rbol:
+	 {
+	    if (!beginning_context)
+	       if (regexp_context_indep_ops)
+		  goto op_error;
+	       else
+		  goto normal_char;
+	    opcode = Cbol;
+	    goto store_opcode;
+	 }
+	 case Reol:
+	 {
+	    if (!((pos >= size) ||
+		  ((regexp_syntax & RE_NO_BK_VBAR) ?
+		   (regex[pos] == '\174') :
+		   (pos+1 < size && regex[pos] == '\134' &&
+		    regex[pos+1] == '\174')) ||
+		  ((regexp_syntax & RE_NO_BK_PARENS)?
+		   (regex[pos] == ')'):
+		   (pos+1 < size && regex[pos] == '\134' &&
+		    regex[pos+1] == ')'))))
+	       if (regexp_context_indep_ops)
+		  goto op_error;
+	       else
+		  goto normal_char;
+	    opcode = Ceol;
+	    goto store_opcode;
+	    /* NOTREACHED */
+	    break;
+	 }
+	 case Roptional:
+	 {
+	    if (beginning_context)
+	       if (regexp_context_indep_ops)
+		  goto op_error;
+	       else
+		  goto normal_char;
+	    if (CURRENT_LEVEL_START == pattern_offset)
+	       break; /* ignore empty patterns for ? */
+	    ALLOC(3);
+	    INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
+			pattern_offset + 3);
+	    break;
+	 }
+	 case Rstar:
+	 case Rplus:
+	 {
+	    if (beginning_context)
+	       if (regexp_context_indep_ops)
+		  goto op_error;
+	       else
+		  goto normal_char;
+	    if (CURRENT_LEVEL_START == pattern_offset)
+	       break; /* ignore empty patterns for + and * */
+	    ALLOC(9);
+	    INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
+			pattern_offset + 6);
+	    INSERT_JUMP(pattern_offset, Cstar_jump, CURRENT_LEVEL_START);
+	    if (op == Rplus)  /* jump over initial failure_jump */
+	       INSERT_JUMP(CURRENT_LEVEL_START, Cdummy_failure_jump,
+			   CURRENT_LEVEL_START + 6);
+	    break;
+	 }
+	 case Ror:
+	 {
+	    ALLOC(6);
+	    INSERT_JUMP(CURRENT_LEVEL_START, Cfailure_jump,
+			pattern_offset + 6);
+	    if (num_jumps >= MAX_NESTING)
+	       goto too_complex;
+	    STORE(Cjump);
+	    future_jumps[num_jumps++] = pattern_offset;
+	    STORE(0);
+	    STORE(0);
+	    SET_LEVEL_START;
+	    break;
+	 }
+	 case Ropenpar:
+	 {
+	    SET_LEVEL_START;
+	    if (next_register < RE_NREGS)
 	    {
-	      bufp->uses_registers = 1;
-	      ALLOC(2);
-	      STORE(Cstart_memory);
-	      STORE(next_register);
-	      open_registers[num_open_registers++] = next_register;
-	      next_register++;
+	       bufp->uses_registers = 1;
+	       ALLOC(2);
+	       STORE(Cstart_memory);
+	       STORE(next_register);
+	       open_registers[num_open_registers++] = next_register;
+	       next_register++;
 	    }
-	  paren_depth++;
-	  PUSH_LEVEL_STARTS;
-	  current_level = 0;
-	  SET_LEVEL_START;
-	  break;
-	case Rclosepar:
-	  if (paren_depth <= 0)
-	    goto parenthesis_error;
-	  POP_LEVEL_STARTS;
-	  current_level = regexp_precedences[Ropenpar];
-	  paren_depth--;
-	  if (paren_depth < num_open_registers)
+	    paren_depth++;
+	    PUSH_LEVEL_STARTS;
+	    current_level = 0;
+	    SET_LEVEL_START;
+	    break;
+	 }
+	 case Rclosepar:
+	 {
+	    if (paren_depth <= 0)
+	       goto parenthesis_error;
+	    POP_LEVEL_STARTS;
+	    current_level = regexp_precedences[Ropenpar];
+	    paren_depth--;
+	    if (paren_depth < num_open_registers)
 	    {
-	      bufp->uses_registers = 1;
-	      ALLOC(2);
-	      STORE(Cend_memory);
-	      num_open_registers--;
-	      STORE(open_registers[num_open_registers]);
+	       bufp->uses_registers = 1;
+	       ALLOC(2);
+	       STORE(Cend_memory);
+	       num_open_registers--;
+	       STORE(open_registers[num_open_registers]);
 	    }
-	  break;
-	case Rmemory:
-	  if (ch == '0')
-	    goto bad_match_register;
-	  assert(ch >= '0' && ch <= '9');
-	  bufp->uses_registers = 1;
-	  opcode = Cmatch_memory;
-	  ch -= '0';
-	  goto store_opcode_and_arg;
-	case Rextended_memory:
-	  NEXTCHAR(ch);
-	  if (ch < '0' || ch > '9')
-	    goto bad_match_register;
-	  NEXTCHAR(a);
-	  if (a < '0' || a > '9')
-	    goto bad_match_register;
-	  ch = 10 * (a - '0') + ch - '0';
-	  if (ch <= 0 || ch >= RE_NREGS)
-	    goto bad_match_register;
-	  bufp->uses_registers = 1;
-	  opcode = Cmatch_memory;
-	  goto store_opcode_and_arg;
-	case Ropenset:
-	  {
-	    int complement,prev,offset,range,firstchar;
+	    break;
+	 }
+	 case Rmemory:
+	 {
+	    if (ch == '0')
+	       goto bad_match_register;
+	    assert(ch >= '0' && ch <= '9');
+	    bufp->uses_registers = 1;
+	    opcode = Cmatch_memory;
+	    ch -= '0';
+	    goto store_opcode_and_arg;
+	 }
+	 case Rextended_memory:
+	 {
+	    NEXTCHAR(ch);
+	    if (ch < '0' || ch > '9')
+	       goto bad_match_register;
+	    NEXTCHAR(a);
+	    if (a < '0' || a > '9')
+	       goto bad_match_register;
+	    ch = 10 * (a - '0') + ch - '0';
+	    if (ch <= 0 || ch >= RE_NREGS)
+	       goto bad_match_register;
+	    bufp->uses_registers = 1;
+	    opcode = Cmatch_memory;
+	    goto store_opcode_and_arg;
+	 }
+	 case Ropenset:
+	 {
+	    int complement;
+	    int prev;
+	    int offset;
+	    int range;
+	    int firstchar;
 	    
 	    SET_LEVEL_START;
 	    ALLOC(1+256/8);
 	    STORE(Cset);
 	    offset = pattern_offset;
 	    for (a = 0; a < 256/8; a++)
-	      STORE(0);
+	       STORE(0);
 	    NEXTCHAR(ch);
 	    if (translate)
-	      ch = translate[(unsigned char)ch];
+	       ch = translate[(unsigned char)ch];
 	    if (ch == '\136')
-	      {
-		complement = 1;
-		NEXTCHAR(ch);
-		if (translate)
+	    {
+	       complement = 1;
+	       NEXTCHAR(ch);
+	       if (translate)
 		  ch = translate[(unsigned char)ch];
-	      }
+	    }
 	    else
-	      complement = 0;
+	       complement = 0;
 	    prev = -1;
 	    range = 0;
 	    firstchar = 1;
 	    while (ch != '\135' || firstchar)
-	      {
-		firstchar = 0;
-		if (regexp_ansi_sequences && ch == '\134')
+	    {
+	       firstchar = 0;
+	       if (regexp_ansi_sequences && ch == '\134')
+	       {
+		  NEXTCHAR(ch);
+		  ANSI_TRANSLATE(ch);
+	       }
+	       if (range)
+	       {
+		  for (a = prev; a <= (int)ch; a++)
+		     SETBIT(pattern, offset, a);
+		  prev = -1;
+		  range = 0;
+	       }
+	       else
+		  if (prev != -1 && ch == '-')
+		     range = 1;
+		  else
 		  {
-		    NEXTCHAR(ch);
-		    ANSI_TRANSLATE(ch);
+		     SETBIT(pattern, offset, ch);
+		     prev = ch;
 		  }
-		if (range)
-		  {
-		    for (a = prev; a <= (int)ch; a++)
-		      SETBIT(pattern, offset, a);
-		    prev = -1;
-		    range = 0;
-		  }
-		else
-		  if (prev != -1 && ch == '-')
-		    range = 1;
-		  else
-		    {
-		      SETBIT(pattern, offset, ch);
-		      prev = ch;
-		    }
-		NEXTCHAR(ch);
-		if (translate)
+	       NEXTCHAR(ch);
+	       if (translate)
 		  ch = translate[(unsigned char)ch];
-	      }
+	    }
 	    if (range)
-	      SETBIT(pattern, offset, '-');
+	       SETBIT(pattern, offset, '-');
 	    if (complement)
-	      {
-		for (a = 0; a < 256/8; a++)
+	    {
+	       for (a = 0; a < 256/8; a++)
 		  pattern[offset+a] ^= 0xff;
-	      }
+	    }
 	    break;
-	  }
-	case Rbegbuf:
-	  opcode = Cbegbuf;
-	  goto store_opcode;
-	case Rendbuf:
-	  opcode = Cendbuf;
-	  goto store_opcode;
-	case Rwordchar:
-	  opcode = Csyntaxspec;
-	  ch = Sword;
-	  goto store_opcode_and_arg;
-	case Rnotwordchar:
-	  opcode = Cnotsyntaxspec;
-	  ch = Sword;
-	  goto store_opcode_and_arg;
-	case Rwordbeg:
-	  opcode = Cwordbeg;
-	  goto store_opcode;
-	case Rwordend:
-	  opcode = Cwordend;
-	  goto store_opcode;
-	case Rwordbound:
-	  opcode = Cwordbound;
-	  goto store_opcode;
-	case Rnotwordbound:
-	  opcode = Cnotwordbound;
-	  goto store_opcode;
-#ifdef emacs
-	case Remacs_at_dot:
-	  opcode = Cemacs_at_dot;
-	  goto store_opcode;
-	case Remacs_syntaxspec:
-	  NEXTCHAR(ch);
-	  if (translate)
-	    ch = translate[(unsigned char)ch];
-	  opcode = Csyntaxspec;
-	  ch = syntax_spec_code[(unsigned char)ch];
-	  goto store_opcode_and_arg;
-	case Remacs_notsyntaxspec:
-	  NEXTCHAR(ch);
-	  if (translate)
-	    ch = translate[(unsigned char)ch];
-	  opcode = Cnotsyntaxspec;
-	  ch = syntax_spec_code[(unsigned char)ch];
-	  goto store_opcode_and_arg;
-#endif /* emacs */
-	default:
-	  abort();
-	}
+	 }
+	 case Rbegbuf:
+	 {
+	    opcode = Cbegbuf;
+	    goto store_opcode;
+	 }
+	 case Rendbuf:
+	 {
+	    opcode = Cendbuf;
+	    goto store_opcode;
+	 }
+	 case Rwordchar:
+	 {
+	    opcode = Csyntaxspec;
+	    ch = Sword;
+	    goto store_opcode_and_arg;
+	 }
+	 case Rnotwordchar:
+	 {
+	    opcode = Cnotsyntaxspec;
+	    ch = Sword;
+	    goto store_opcode_and_arg;
+	 }
+	 case Rwordbeg:
+	 {
+	    opcode = Cwordbeg;
+	    goto store_opcode;
+	 }
+	 case Rwordend:
+	 {
+	    opcode = Cwordend;
+	    goto store_opcode;
+	 }
+	 case Rwordbound:
+	 {
+	    opcode = Cwordbound;
+	    goto store_opcode;
+	 }
+	 case Rnotwordbound:
+	 {
+	    opcode = Cnotwordbound;
+	    goto store_opcode;
+	 }
+	 default:
+	 {
+	    abort();
+	 }
+      }
       beginning_context = (op == Ropenpar || op == Ror);
-    }
-  if (starts_base != 0)
-    goto parenthesis_error;
-  assert(num_jumps == 0);
-  ALLOC(1);
-  STORE(Cend);
-  SET_FIELDS;
-  return NULL;
+   }
+   if (starts_base != 0)
+      goto parenthesis_error;
+   assert(num_jumps == 0);
+   ALLOC(1);
+   STORE(Cend);
+   SET_FIELDS;
+   if(!re_optimize(bufp))
+      return "Optimization error";
+   return NULL;
 
- op_error:
-  SET_FIELDS;
-  return "Badly placed special character";
+  op_error:
+   SET_FIELDS;
+   return "Badly placed special character";
 
- bad_match_register:
-  SET_FIELDS;
-  return "Bad match register number";
+  bad_match_register:
+   SET_FIELDS;
+   return "Bad match register number";
+   
+  hex_error:
+   SET_FIELDS;
+   return "Bad hexadecimal number";
+   
+  parenthesis_error:
+   SET_FIELDS;
+   return "Badly placed parenthesis";
+   
+  out_of_memory:
+   SET_FIELDS;
+   return "Out of memory";
+   
+  ends_prematurely:
+   SET_FIELDS;
+   return "Regular expression ends prematurely";
 
- hex_error:
-  SET_FIELDS;
-  return "Bad hexadecimal number";
+  too_complex:
+   SET_FIELDS;
+   return "Regular expression too complex";
+}
 
- parenthesis_error:
-  SET_FIELDS;
-  return "Badly placed parenthesis";
-
- out_of_memory:
-  SET_FIELDS;
-  return "Out of memory";
-
- ends_prematurely:
-  SET_FIELDS;
-  return "Regular expression ends prematurely";
-
- too_complex:
-  SET_FIELDS;
-  return "Regular expression too complex";
-}
 #undef CHARAT
 #undef NEXTCHAR
 #undef GETHEX
 #undef SETBIT
 #undef SET_FIELDS
 
-static void re_compile_fastmap_aux
-	Py_PROTO((char *, int, char *, char *, char *));
-static void re_compile_fastmap_aux(code, pos, visited, can_be_null, fastmap)
-char *code, *visited, *can_be_null, *fastmap;
-int pos;
+#define PREFETCH if (text == textend) goto fail
+
+#define NEXTCHAR(var) \
+PREFETCH; \
+var = (unsigned char)*text++; \
+if (translate) \
+   var = translate[var]
+
+int re_match(regexp_t bufp,
+	     char *string,
+	     int size,
+	     int pos,
+	     regexp_registers_t old_regs)
 {
-  int a, b, syntaxcode;
+  char *code;
+  char *translate;
+  char *text;
+  char *textstart;
+  char *textend;
+  int a;
+  int b;
+  int ch;
+  int reg;
+  int match_end;
+  char *regstart;
+  char *regend;
+  int regsize;
+  match_state state;
+  
+  assert(pos >= 0 && size >= 0);
+  assert(pos <= size);
+  
+  text = string + pos;
+  textstart = string;
+  textend = string + size;
+  
+  code = bufp->buffer;
+  
+  translate = bufp->translate;
+/*   translated = NULL; */
+/*   if (bufp->translate) */
+/*   { */
+/*      char *t1; */
+/*      char *t2; */
+     
+/*      translated = malloc(size); */
+/*      if (translated == NULL) */
+/* 	goto error; */
 
-  if (visited[pos])
-    return;  /* we have already been here */
-  visited[pos] = 1;
-  for (;;)
-    switch (code[pos++])
-      {
-      case Cend:
-	*can_be_null = 1;
-	return;
-      case Cbol:
-      case Cbegbuf:
-      case Cendbuf:
-      case Cwordbeg:
-      case Cwordend:
-      case Cwordbound:
-      case Cnotwordbound:
-#ifdef emacs
-      case Cemacs_at_dot:
-#endif /* emacs */
-	break;
-      case Csyntaxspec:
-	syntaxcode = code[pos++];
-	for (a = 0; a < 256; a++)
-	  if (SYNTAX(a) == syntaxcode)
-	    fastmap[a] = 1;
-	return;
-      case Cnotsyntaxspec:
-	syntaxcode = code[pos++];
-	for (a = 0; a < 256; a++)
-	  if (SYNTAX(a) != syntaxcode)
-	    fastmap[a] = 1;
-	return;
-      case Ceol:
-	fastmap['\n'] = 1;
-	if (*can_be_null == 0)
-	  *can_be_null = 2;  /* can match null, but only at end of buffer*/
-	return;
-      case Cset:
-	for (a = 0; a < 256/8; a++)
-	  if (code[pos + a] != 0)
-	    for (b = 0; b < 8; b++)
-	      if (code[pos + a] & (1 << b))
-		fastmap[(a << 3) + b] = 1;
-	pos += 256/8;
-	return;
-      case Cexact:
-	fastmap[(unsigned char)code[pos]] = 1;
-	return;
-      case Canychar:
-	for (a = 0; a < 256; a++)
-	  if (a != '\n')
-	    fastmap[a] = 1;
-	return;
-      case Cstart_memory:
-      case Cend_memory:
-	pos++;
-	break;
-      case Cmatch_memory:
-	/* should this ever happen for sensible patterns??? */
-	*can_be_null = 1;
-	return;
-      case Cjump:
-      case Cdummy_failure_jump:
-      case Cupdate_failure_jump:
-      case Cstar_jump:
-	a = (unsigned char)code[pos++];
-	a |= (unsigned char)code[pos++] << 8;
-	pos += (int)(short)a;
-	if (visited[pos])
-	  {
-	    /* argh... the regexp contains empty loops.  This is not
-	       good, as this may cause a failure stack overflow when
-	       matching.  Oh well. */
-	    /* this path leads nowhere; pursue other paths. */
-	    return;
-	  }
-	visited[pos] = 1;
-	break;
-      case Cfailure_jump:
-	a = (unsigned char)code[pos++];
-	a |= (unsigned char)code[pos++] << 8;
-	a = pos + (int)(short)a;
-	re_compile_fastmap_aux(code, a, visited, can_be_null, fastmap);
-	break;
-      default:
-	abort();  /* probably some opcode is missing from this switch */
+/*      t1 = string; */
+/*      t2 = translated; */
+/*      while(t1 < textend) */
+/* 	*t2++ = bufp->translate[*t1++]; */
+     
+/*      text = translated + pos; */
+/*      textstart = translated; */
+/*      textend = translated + size; */
+/*   } */
+  
+  NEW_STATE(state);
+  
+  continue_matching:
+  switch (*code++)
+  {
+     case Cend:
+     {
+	match_end = text - textstart;
+	if (old_regs)
+	{
+	   old_regs->start[0] = pos;
+	   old_regs->end[0] = match_end;
+	   if (!bufp->uses_registers)
+	   {
+	      for (a = 1; a < RE_NREGS; a++)
+	      {
+		 old_regs->start[a] = -1;
+		 old_regs->end[a] = -1;
+	      }
+	   }
+	   else
+	   {
+	      for (a = 1; a < RE_NREGS; a++)
+	      {
+		 if ((GET_REG_START(state, a) == NULL) ||
+		     (GET_REG_END(state, a) == NULL))
+		 {
+		    old_regs->start[a] = -1;
+		    old_regs->end[a] = -1;
+		    continue;
+		 }
+		 old_regs->start[a] = GET_REG_START(state, a) - textstart;
+		 old_regs->end[a] = GET_REG_END(state, a) - textstart;
+	      }
+	   }
+	}
+/* 	if(translated) */
+/* 	   free(translated); */
+	FREE_STATE(state);
+	return match_end - pos;
+     }
+     case Cbol:
+     {
+	if (text == textstart || text[-1] == '\n')
+	   goto continue_matching;
+	goto fail;
+     }
+     case Ceol:
+     {
+	if (text == textend || *text == '\n')
+	   goto continue_matching;
+	goto fail;
+     }
+     case Cset:
+     {
+	NEXTCHAR(ch);
+	if (code[ch/8] & (1<<(ch & 7)))
+	{
+	   code += 256/8;
+	   goto continue_matching;
+	}
+	goto fail;
+     }
+     case Cexact:
+     {
+	NEXTCHAR(ch);
+	if (ch != (unsigned char)*code++)
+	   goto fail;
+/* 	{ */
+/* 	   char *p1 = code - 2; */
+/* 	   ch = *(code - 1); */
+/* 	   POP_FAILURE(state, code, text, goto done_matching, goto error); */
+/* 	   while ((code == p1) && (*text != ch)) */
+/* 	      POP_FAILURE(state, code, text, goto done_matching, goto error); */
+/* 	   if ((code == p1) && (*text == ch)) */
+/* 	   { */
+/* 	      code += 2; */
+/* 	      text++; */
+/* 	   } */
+/* 	} */
+	goto continue_matching;
+     }
+     case Canychar:
+     {
+	NEXTCHAR(ch);
+	if (ch == '\n')
+	   goto fail;