Markus Mottl avatar Markus Mottl committed 5bd8216

Updated to pcre-5.0

Comments (0)

Files changed (5)

+2006-06-11:  Updated to pcre-5.0!
+
+             New representation for callbacks: they now take only one
+             argument (a record of the callback data).
+
+             Added partial matching and auto callouts.
+
 2006-01-16:  Updated OCamlMakefile.
 
              Removed a superfluous binding.
 name="pcre"
-version="5.08.1"
+version="5.9.0"
 description="Perl Compatibility Regular Expressions"
 requires=""
 archive(byte)="pcre.cma"
 (*
    PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
 
-   Copyright (C) 1999-2005  Markus Mottl
+   Copyright (C) 1999-2006  Markus Mottl
    email: markus.mottl@gmail.com
    WWW:   http://www.ocaml.info
 
 
 (* Public exceptions and their registration with the C runtime *)
 
+exception Partial
+exception BadPartial
 exception BadPattern of string * int
 exception BadUTF8
 exception BadUTF8Offset
 (* Registers exceptions with the C runtime and caches polymorphic variants *)
 let _ =
   Callback.register_exception "Pcre.Not_found" Not_found;
+  Callback.register_exception "Pcre.Partial" Partial;
+  Callback.register_exception "Pcre.BadPartial" BadPartial;
   Callback.register_exception "Pcre.BadPattern" (BadPattern ("", 0));
   Callback.register_exception "Pcre.BadUTF8" BadUTF8;
   Callback.register_exception "Pcre.BadUTF8Offset" BadUTF8Offset;
 (* Compilation flags *)
 
 type cflag =
-  [ `CASELESS
+  [
+  | `CASELESS
   | `MULTILINE
   | `DOTALL
   | `EXTENDED
   | `UNGREEDY
   | `UTF8
   | `NO_UTF8_CHECK
-  | `NO_AUTO_CAPTURE ]
+  | `NO_AUTO_CAPTURE
+  | `AUTO_CALLOUT
+  ]
 
 let int_of_cflag = function
   | `CASELESS -> 0x0001
   | `UTF8 -> 0x0800
   | `NO_AUTO_CAPTURE -> 0x1000
   | `NO_UTF8_CHECK -> 0x2000
+  | `AUTO_CALLOUT -> 0x4000
 
 let coll_icflag icflag flag = int_of_cflag flag lor icflag
 let cflags flags = List.fold_left coll_icflag 0 flags
 (* Runtime flags *)
 
 type rflag =
-  [ `ANCHORED
+  [
+  | `ANCHORED
   | `NOTBOL
   | `NOTEOL
-  | `NOTEMPTY ]
+  | `NOTEMPTY
+  | `PARTIAL
+  ]
 
 let int_of_rflag = function
   | `ANCHORED -> 0x0010
   | `NOTBOL   -> 0x0080
   | `NOTEOL   -> 0x0100
   | `NOTEMPTY -> 0x0400
+  | `PARTIAL -> 0x8000
 
 let coll_irflag irflag flag = int_of_rflag flag lor irflag
 let rflags flags = List.fold_left coll_irflag 0 flags
   | 0x0080 -> `NOTBOL
   | 0x0100 -> `NOTEOL
   | 0x0400 -> `NOTEMPTY
+  | 0x8000 -> `PARTIAL
   | _ -> failwith "Pcre.rflag_list: unknown runtime flag"
 
-let all_rflags = [0x0010; 0x0080; 0x0100; 0x0400]
+let all_rflags = [0x0010; 0x0080; 0x0100; 0x0400; 0x8000]
 
 let rflag_list irflags =
   let coll flag_list flag =
 
 type substrings = string * int array
 
+type callout_data =
+  {
+    callout_number : int;
+    substrings : substrings;
+    start_match : int;
+    current_position : int;
+    capture_top : int;
+    capture_last : int;
+    pattern_position : int;
+    next_item_length : int;
+  }
+
+type callout = callout_data -> unit
+
 let get_subject (subj, _) = subj
 
 let num_of_subs (_, ovector) = Array.length ovector / 3
 let get_named_substring_ofs rex name substrings =
   get_substring_ofs substrings (get_stringnumber rex name)
 
-type callout = substrings -> int -> int -> int -> int -> int -> unit
-
 external unsafe_pcre_exec :
   irflag -> regexp -> int -> string ->
   int -> int array -> callout option
 (*
    PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
 
-   Copyright (C) 1999-2005  Markus Mottl
+   Copyright (C) 1999-2006  Markus Mottl
    email: markus.mottl@gmail.com
    WWW:   http://www.ocaml.info
 
 
 (** {6 Exceptions} *)
 
+(** [Partial] gets raised when a string matched the pattern partially. *)
+exception Partial
+
+(** [BadPartial] gets raised when a pattern contains items that cannot
+    be used together with partial matching. *)
+exception BadPartial
+
 (** [BadPattern (msg, pos)] gets raised when the regular expression is
     malformed. The reason is in [msg], the position of the error in the
     pattern in [pos]. *)
                          efficiency reasons. WARNING: invalid UTF8
                          strings may cause a crash then! *)
   | `NO_AUTO_CAPTURE (** Disables the use of numbered capturing parentheses *)
+  | `AUTO_CALLOUT    (** Automatically inserts callouts with id 255
+                         before each pattern item *)
   ]
 
 val cflags : cflag list -> icflag
 
 (** Runtime flags *)
 type rflag =
-  [ `ANCHORED   (** Treats pattern as if it were anchored *)
-  | `NOTBOL     (** Beginning of string is not treated as beginning of line *)
-  | `NOTEOL     (** End of string is not treated as end of line *)
-  | `NOTEMPTY   (** Empty strings are not considered to be a valid match *)
+  [ `ANCHORED  (** Treats pattern as if it were anchored *)
+  | `NOTBOL    (** Beginning of string is not treated as beginning of line *)
+  | `NOTEOL    (** End of string is not treated as end of line *)
+  | `NOTEMPTY  (** Empty strings are not considered to be a valid match *)
+  | `PARTIAL   (** Turns on partial matching *)
   ]
 
 val rflags : rflag list -> irflag
 
 (** {6 Callouts} *)
 
+type callout_data =
+  {
+    callout_number : int; (** Callout number *)
+    substrings : substrings; (** Substrings matched so far *)
+    start_match : int;  (** Subject start offset of current match attempt *)
+    current_position : int;  (** Subject offset of current match pointer *)
+    capture_top : int;  (** Number of the highest captured substring so far *)
+    capture_last : int;  (** Number of the most recently captured substring *)
+    pattern_position : int;  (** Offset of next match item in pattern string *)
+    next_item_length : int;  (** Length of next match item in pattern string *)
+  }
+
 (** Type of callout functions *)
-type callout =
-  substrings -> (** Substrings matched so far *)
-  int ->        (** Offset at which current match attempt started in subject *)
-  int ->        (** Offset within the subject of the current match pointer *)
-  int ->        (** Number of the highest captured substring so far *)
-  int ->        (** Number of the most recently captured substring *)
-  int ->        (** Callout number *)
-  unit
-(** Callout functions have the form:
-
-    [callout
-      substrings match_start current_position
-      capture_top capture_last callout_number]
-
-    They are indicated in patterns as "(?Cn)" where "n" is a
-    [callout_number] ranging from 0 to 255. Substrings captured so far
-    are accesible as usual via [substrings]. You will have to consider
+type callout = callout_data -> unit
+(** Callouts are referred to in patterns as "(?Cn)" where "n" is a
+    [callout_number] ranging from 0 to 255.  Substrings captured so far
+    are accessible as usual via [substrings].  You will have to consider
     [capture_top] and [capture_last] to know about the current state of
     valid substrings.
 
     By raising exception [Backtrack] within a callout function, the user
     can force the pattern matching engine to backtrack to other possible
-    solutions. Other exceptions will terminate matching immediately and
-    return control to OCaml. *)
+    solutions.  Other exceptions will terminate matching immediately
+    and return control to OCaml.
+*)
 
 
 (** {6 Matching of patterns and subpattern extraction} *)
 /*
    PCRE-OCAML - Perl Compatibility Regular Expressions for OCaml
 
-   Copyright (C) 1999-2005  Markus Mottl
+   Copyright (C) 1999-2006  Markus Mottl
    email: markus.mottl@gmail.com
    WWW:   http://www.ocaml.info
 
 
 /* Contents of callout data */
 struct cod {
-  value v_substrings;  /* Substrings matched so far */
-  value v_cof;         /* Callout function */
-  value v_exn;         /* Possible exception raised by callout function */
+  value *v_substrings_p;  /* Pointer to substrings matched so far */
+  value *v_cof_p;         /* Pointer to callout function */
+  value v_exn;            /* Possible exception raised by callout function */
 };
 
 /* Cache for exceptions */
 static value *pcre_exc_Not_found     = NULL;  /* Exception [Not_found] */
+static value *pcre_exc_Partial       = NULL;  /* Exception [Partial] */
+static value *pcre_exc_BadPartial    = NULL;  /* Exception [BadPartial] */
 static value *pcre_exc_BadPattern    = NULL;  /* Exception [BadPattern] */
 static value *pcre_exc_BadUTF8       = NULL;  /* Exception [BadUTF8] */
 static value *pcre_exc_BadUTF8Offset = NULL;  /* Exception [BadUTF8Offset] */
   if (cod != NULL) {
     /* Callout is available */
     value v_res;
-    const value v_substrings = cod->v_substrings;
+
+    /* Set up parameter array */
+    value v_callout_data = caml_alloc_small(6, 0);
+
+    const value v_substrings = *cod->v_substrings_p;
 
     const int capture_top = cb->capture_top;
     int subgroups2 = capture_top << 1;
     const int *ovec_src = cb->offset_vector + subgroups2_1;
     long int *ovec_dst = &Field(Field(v_substrings, 1), 0) + subgroups2_1;
 
-    value params[6];
-    value *pptr = &params[0];
-
     /* Copy preliminary substring information */
     while (subgroups2--) {
       *ovec_dst = Val_int(*ovec_src);
       --ovec_src; --ovec_dst;
     }
 
-    /* Set up parameter array */
-    *pptr = v_substrings; ++pptr;
-    *pptr = Val_int(cb->start_match); ++pptr;
-    *pptr = Val_int(cb->current_position); ++pptr;
-    *pptr = Val_int(capture_top); ++pptr;
-    *pptr = Val_int(cb->capture_last); ++pptr;
-    *pptr = Val_int(cb->callout_number);
+    Field(v_callout_data, 0) = Val_int(cb->callout_number);
+    Field(v_callout_data, 1) = v_substrings;
+    Field(v_callout_data, 2) = Val_int(cb->start_match);
+    Field(v_callout_data, 3) = Val_int(cb->current_position);
+    Field(v_callout_data, 4) = Val_int(capture_top);
+    Field(v_callout_data, 5) = Val_int(cb->capture_last);
+    Field(v_callout_data, 6) = Val_int(cb->pattern_position);
+    Field(v_callout_data, 7) = Val_int(cb->next_item_length);
 
     /* Perform callout */
-    v_res = callbackN_exn(cod->v_cof, 6, params);
+    v_res = callback_exn(*cod->v_cof_p, v_callout_data);
 
     if (Is_exception_result(v_res)) {
       /* Callout raised an exception */
 CAMLprim value pcre_ocaml_init(value unit)
 {
   pcre_exc_Not_found     = caml_named_value("Pcre.Not_found");
+  pcre_exc_Partial       = caml_named_value("Pcre.Partial");
+  pcre_exc_BadPartial    = caml_named_value("Pcre.BadPartial");
   pcre_exc_BadPattern    = caml_named_value("Pcre.BadPattern");
   pcre_exc_BadUTF8       = caml_named_value("Pcre.BadUTF8");
   pcre_exc_InternalError = caml_named_value("Pcre.InternalError");
       if (ret < 0) {
         switch(ret) {
           case PCRE_ERROR_NOMATCH : raise_constant(*pcre_exc_Not_found);
+          case PCRE_ERROR_PARTIAL : raise_constant(*pcre_exc_Partial);
           case PCRE_ERROR_MATCHLIMIT : raise_constant(*pcre_exc_MatchLimit);
+          case PCRE_ERROR_BADPARTIAL : raise_constant(*pcre_exc_BadPartial);
           case PCRE_ERROR_BADUTF8 : raise_constant(*pcre_exc_BadUTF8);
           case PCRE_ERROR_BADUTF8_OFFSET :
             raise_constant(*pcre_exc_BadUTF8Offset);
       char *subj = malloc(sizeof(char) * len);
       int *ovec = malloc(sizeof(int) * subgroups3);
       int ret;
-      struct cod cod = { (value) NULL, (value) NULL, (value) NULL };
+      struct cod cod = { (value *) NULL, (value *) NULL, (value) NULL };
       struct pcre_extra new_extra = { PCRE_EXTRA_CALLOUT_DATA, NULL, 0, NULL };
 
       memcpy(subj, ocaml_subj, len);
 
-      Begin_roots2(v_rex, v_cof);
+      Begin_roots3(v_rex, v_cof, v_substrings);
         Begin_roots2(v_subj, v_ovec);
           v_substrings = caml_alloc_small(2, 0);
         End_roots();
         Field(v_substrings, 0) = v_subj;
         Field(v_substrings, 1) = v_ovec;
 
-        cod.v_substrings = v_substrings;
-        cod.v_cof = v_cof;
+        cod.v_substrings_p = &v_substrings;
+        cod.v_cof_p = &v_cof;
         new_extra.callout_data = &cod;
 
         if (extra == NULL) {
         free(ovec);
         switch(ret) {
           case PCRE_ERROR_NOMATCH : raise_constant(*pcre_exc_Not_found);
+          case PCRE_ERROR_PARTIAL : raise_constant(*pcre_exc_Partial);
           case PCRE_ERROR_MATCHLIMIT : raise_constant(*pcre_exc_MatchLimit);
+          case PCRE_ERROR_BADPARTIAL : raise_constant(*pcre_exc_BadPartial);
           case PCRE_ERROR_BADUTF8 : raise_constant(*pcre_exc_BadUTF8);
           case PCRE_ERROR_BADUTF8_OFFSET :
             raise_constant(*pcre_exc_BadUTF8Offset);
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.