Commits

camlspotter committed d92a872

escape char fix

  • Participants
  • Parent commits 935c960

Comments (0)

Files changed (10)

+exception Error of int * int * string
+
 type ctype =
     | Cd (* [d], [i], [n], [l], [L], or [N]: convert an integer
 	    argument to signed decimal. *)
 
 val compile_conversion : conversion -> [`String of string | `Star] list
 
-val from_string : char list -> string -> t
+val from_string : char list -> string -> t * int * string 
+(** Tokens and stop position and remains. 
+    Lexing stops when it finds a non-escaped special char 
+*)
 
 val from_string_to_classic :
   char list
   -> t 
   * (int list * [`Applied of (inlined_arg * int) | `Var of int] list) 
   * string
+  * int (** end pos *)
+  * string  (** remains *)
+
 		    inlined_arg = inlined_arg }) :: st) lexbuf
     }
 	
-  | ([^ '%' '$' '\\']+ as s) { 
-      format specials (String s :: st) lexbuf 
+  | ([^ '%' '$' '\\'] as s) { 
+      if List.mem s specials then
+        (* Special char. We must stop here *)
+        List.rev st
+      else 
+        format specials (String (String.make 1 s) :: st) lexbuf 
     }
 
   | "\\" (['\\' 'n' 't' 'r' 'b']  as char) {
 
 {
 
-let from_string specials s = format specials [] (Lexing.from_string s)
+let from_string specials s = 
+  let lexbuf = Lexing.from_string s in
+  let tokens = format specials [] lexbuf in
+  let endpos = lexeme_end lexbuf in
+  tokens,
+  endpos,
+  String.sub s endpos (String.length s - endpos)
 ;;
 
 
 
 (* CR jfuruse: bad name *)
 let from_string_to_classic specials s =
-  let t = from_string specials s in
+  let t, end_pos, rems = from_string specials s in
   let parameters, t' = parameters t in
   let application = parameters_to_application parameters in
-  t, application, to_string t'
+  t, application, to_string t', end_pos, rems
 ;;
 }
     named_callouts : (string * int) list; (* caml extension *)
   }
 
-val from_string : string -> token list
-  (* parse "regexp" *)
+val from_string : string -> string * token list * string
+(** Parse "regexp". Returns regexp in string and tokens, and its flags *)
 
-val replace_from_string : string -> token list * string * string
+val replace_from_string : string -> string * token list * string
   (* parse "regexp/replace".
     return (token * regexp * replace) 
   *)
 	let res = tokens lexbuf in
 	re :: res
       }
-  | ['^' '$' '|'] 
+  | ['^' '$' '|']  (* CR jfuruse: $var is not supported *)
       {
         let re = MetaChar (char_lexeme lexbuf) in
 	let res = tokens lexbuf in
 
 let from_string s =
   match from_string_gen s with
-  | regexp, _, "" -> regexp
-  | _, endpos, s -> 
+  | regexp, _, "" -> s, regexp, ""
+  | regexp, endpos, rem when rem.[0] = '/' -> (* flags *)
+      String.sub s 0 endpos, regexp, String.sub rem 1 (String.length rem - 1)
+  | _, endpos, rem -> 
       raise (Error (endpos, endpos + 1,
-		   Printf.sprintf "unknown regexp character %C" s.[0]))
+		    Printf.sprintf "unknown regexp character %C" rem.[0]))
 
 let replace_from_string s =
   match from_string_gen s with
 	raise (Error (endpos, endpos + 1, 
 		     Printf.sprintf "unknown regexp character %C" rem.[0]))
       else 
-	( regexp, 
-	  String.sub s 0 endpos,
+	( String.sub s 0 endpos,
+	  regexp, 
 	  String.sub rem 1 (String.length rem - 1) )
 
 type typ = {
   module Top = Pa_top.Register(Syntax)
 
   let parse specials _loc _loc_var_opt s =
-    let t = Lexformat.from_string specials s in
-    let e, inlined_f = Compile.compile _loc t in
-    inlined_f (Top.may_put_in_top e)
+    match Lexformat.from_string specials s with
+    | _, pos, rem when rem <> "" -> 
+        raise (Lexformat.Error (pos, pos + 1,
+		                Printf.sprintf "Unescaped special character %C found" rem.[0]))
+    | t, _, _ -> 
+        let e, inlined_f = Compile.compile _loc t in
+        inlined_f (Top.may_put_in_top e)
   ;;
 end
 
 
   let parse _loc _loc_var_opt s =
     match Pa_format.parse ['`'] _loc _loc_var_opt s with
-    | `Const s -> <:expr<command $s$>>
-    | `Fun (_abss, f) -> 
+    | (`Const _ | `Fun _), pos, rem when rem <> "" -> 
+        raise (Lexformat.Error (pos, pos + 1,
+		                Printf.sprintf "Unescaped special character %C found" rem.[0]))
+    | `Const s, _, _ -> <:expr<command $s$>>
+    | `Fun (_abss, f), _, _ -> 
 	f <:expr<
 	    Printf.ksprintf command
 	  >>
   (* open Printf *)
   open Lexformat
 
-  let parse  specials _loc _loc_var_opt s =
-    let t, (abss, apps), fmt = Lexformat.from_string_to_classic specials s in
+  let parse specials _loc _loc_var_opt s =
+    let t, (abss, apps), fmt, end_pos, rems = Lexformat.from_string_to_classic specials s in
     let string_constant =
       List.for_all (function 
 	| String _ | Escaped _ -> true
 	| Conv _ -> false) t
     in
-    if string_constant then `Const <:expr<$str:fmt$>>
-    else 
-      `Fun (abss, fun fnct ->
-	let base = <:expr<$fnct$ $str:fmt$>> in
-	let id n = Printf.sprintf "id%d" n in
-	let rec put_apps e = function
-	  | [] -> e
-	  | `Var n ::xs -> 
-	      let id = id n in
-	      put_apps <:expr<$exp:e$ $lid:id$>> xs
-	  | `Applied ((Arg_expr str | Arg_var str), _pos) :: xs ->
-	      put_apps 
-	      <:expr<$exp:e$ $exp:AntiquotSyntax.parse_expr _loc str$>> 
-		xs
-	  | `Applied (Arg_rex_ref var, _pos) :: xs ->
-              let meth = match var with
-                | '0' .. '9' -> Printf.sprintf "_%c" var
-                | '`' -> "_left"
-                | '\'' -> "_right"
-                | '&' -> "_0"
-                | '+' -> "_last"
-                | _ -> assert false
-              in
-	      put_apps 
-	      <:expr<$exp:e$ __rex_group#$meth$ >> 
-		xs
-	in
-	let rec put_abss e = function
-	  | [] -> e
-	  | n::ns ->
-	      let id = id n in 
-	      put_abss <:expr<fun $lid:id$ -> $exp:e$>> ns
-	in
-	put_abss (put_apps base apps) abss)
+    let v = 
+      if string_constant then `Const <:expr<$str:fmt$>>
+      else 
+        `Fun (abss, fun fnct ->
+	  let base = <:expr<$fnct$ $str:fmt$>> in
+	  let id n = Printf.sprintf "id%d" n in
+	  let rec put_apps e = function
+	    | [] -> e
+	    | `Var n ::xs -> 
+	        let id = id n in
+	        put_apps <:expr<$exp:e$ $lid:id$>> xs
+	    | `Applied ((Arg_expr str | Arg_var str), _pos) :: xs ->
+	        put_apps 
+	        <:expr<$exp:e$ $exp:AntiquotSyntax.parse_expr _loc str$>> 
+		  xs
+	    | `Applied (Arg_rex_ref var, _pos) :: xs ->
+                let meth = match var with
+                  | '0' .. '9' -> Printf.sprintf "_%c" var
+                  | '`' -> "_left"
+                  | '\'' -> "_right"
+                  | '&' -> "_0"
+                  | '+' -> "_last"
+                  | _ -> assert false
+                in
+	        put_apps 
+	        <:expr<$exp:e$ __rex_group#$meth$ >> 
+		  xs
+	  in
+	  let rec put_abss e = function
+	    | [] -> e
+	    | n::ns ->
+	        let id = id n in 
+	        put_abss <:expr<fun $lid:id$ -> $exp:e$>> ns
+	  in
+	  put_abss (put_apps base apps) abss)
+    in
+    v, end_pos, rems
 end
     -> 'a (* CR jfuruse: bad type *)
     -> string 
     -> [> `Const of expr
-       | `Fun of int list * (expr -> expr) ]
+       | `Fun of int list * (expr -> expr) ] * int (** end_pos *) * string (** remains *)
 end
 
   let parse_rex_quotation _loc q =
     let loc = Loc.join (Loc.move `start q.q_shift _loc) in
-    let rex, flags = 
-      match split_by_non_escaped_slash q.q_contents with
-      | [ rex; flags ] -> rex, flags
-      | [ rex ] -> rex, ""
-      | [] -> "", ""
-      | _ -> raise (Stream.Error 
-		       (Printf.sprintf "too many / in pcre match expr %S" q.q_contents))
-    in
+    let rex, tokens, flags = Lexrex.from_string q.q_contents in
     let parse_flag = function
       | 'i' -> <:expr<`CASELESS>>
       | 'm' -> <:expr<`MULTLINE>>
       List.fold_left (fun acc sw ->
 	<:expr<$sw$ :: $acc$>>) <:expr<[]>> (iter [] 0)
     in
-    let tokens = Lexrex.from_string rex in
     let typ = Lexrex.type_regexp tokens in
     build_rex loc rex typ flags
   ;;
 
   let parse_rex_replace_quotation _loc q =
     let loc = Loc.join (Loc.move `start q.q_shift _loc) in
+    let rex, tokens, replace_ = Lexrex.replace_from_string q.q_contents in
+    let replace, _, flags = Pa_format.parse ['/'] _loc None replace_ in
 
-    (* CR jfuruse: concating back with '/' is ugly *)
-    let rex_replace, flags = 
-      match split_by_non_escaped_slash q.q_contents with
-      | [ rex; replace; flags ] -> 
-(*
-          Format.eprintf "s3 %s // %s // %s@." rex replace flags;
-*)
-          rex ^ "/" ^ replace, flags
-      | [ rex; replace ] -> 
-(*
-          Format.eprintf "s2 %s // %s@." rex replace;
-*)
-          rex ^ "/" ^ replace, ""
-      | [] -> raise (Stream.Error 
-		       (Printf.sprintf "no / in pcre replace expr %S" q.q_contents))
-      | _ -> raise (Stream.Error 
-		       (Printf.sprintf "too many /s in pcre replace expr %S" q.q_contents))
-    in
     let replace_global = ref false in
     let parse_flag = function
       | 'i' -> Some <:expr<`CASELESS>>
       List.fold_left (fun acc sw ->
 	<:expr<$sw$ :: $acc$>>) <:expr<[]>> (iter [] 0)
     in
-    let tokens, rex, replace = Lexrex.replace_from_string rex_replace in
     let typ = Lexrex.type_regexp tokens in
     let rex = build_rex loc rex typ flags in
     (* CR jfuruse: we can unify these to substitute_substrings(_first) *)
-    match Pa_format.parse ['/'] _loc(*?*) None replace  with
+    match replace with
     | `Const replace ->
         if !replace_global then
           <:expr<
           >>
     | `Fun (_abss, _) -> 
         raise (Stream.Error 
-		  (Printf.sprintf "non closed template %S" replace))
+		  (Printf.sprintf "non closed template %S" replace_))
   ;;
 
 (* manual call of quotation expander, since <:rex<>> has special
 
   let parse _loc _loc_var_opt s =
     match Pa_format.parse ['"'] _loc _loc_var_opt s with
-    | `Const c -> c
-    | `Fun (_abss, f) -> f <:expr<Printf.sprintf>>
+    | (`Const _ | `Fun _), pos, rem when rem <> "" -> 
+        raise (Lexformat.Error (pos, pos + 1,
+		                Printf.sprintf "Unescaped special character %C found" rem.[0]))
+    | `Const c, _, _ -> c
+    | `Fun (_abss, f), _, _ -> f <:expr<Printf.sprintf>>
   ;;
 
   let _ =