camlspotter avatar camlspotter committed 68da189

better escaping implemented

Comments (0)

Files changed (12)

 exception Error of int * int * string
+val errorf : Lexing.lexbuf -> ('a, unit, string, 'b) format4 -> 'a
+val errorf_at : int -> int -> ('a, unit, string, 'b) format4 -> 'a
 
 type ctype =
     | Cd (* [d], [i], [n], [l], [L], or [N]: convert an integer
 
 and token =
     | String of string (* invariant: no $ % *)
+    | Char of char (* invariant: no $ % *)
     | Conv of conversion
     | Escaped of char
 
 
 val compile_conversion : conversion -> [`String of string | `Star] list
 
-val from_string : char list -> string -> t * int * string 
-(** Tokens and stop position and remains. 
+val from_string : char list -> string -> t * string option
+(** Tokens and remains if stopped at special chars.
     Lexing stops when it finds a non-escaped special char 
 *)
 
   -> t 
   * (int list * [`Applied of (inlined_arg * int) | `Var of int] list) 
   * string
-  * int (** end pos *)
-  * string  (** remains *)
+  * string option (** remains *)
 
 
 and token =
     | String of string (* invariant: no $ % *)
+    | Char of char (* invariant: no $ % *)
     | Conv of conversion
     | Escaped of char
 
   | String s -> s
   | Escaped '"' -> "\\\""
   | Escaped '\\' -> "\\\\"
-  | Escaped char -> String.make 1 char
+  | Char char | Escaped char -> String.make 1 char
   | Conv conv -> conversion_to_string conv
 
 and to_string tokens =
+  (* CR jfuruse: Char tends to continue! Inefficient! Use Buffer! *)
   String.concat "" (List.map token_to_string tokens)
 
 exception Error of int * int * string
 
-let error lexbuf s =
-  raise (Error (lexeme_start lexbuf, lexeme_end lexbuf, s))
+let errorf lexbuf fmt =
+  Printf.ksprintf (fun s -> raise (Error (lexeme_start lexbuf, lexeme_end lexbuf, s)))
+    fmt
+
+let errorf_at start end_ fmt =
+  Printf.ksprintf (fun s -> raise (Error (start, end_, s)))
+    fmt
 
 let check_conversion lexbuf conv =
   let no_inlined_arg () =
     if conv.inlined_arg <> None then
-      error lexbuf 
-	"This conversion cannot take an inlined argument"
+      errorf lexbuf "This conversion cannot take an inlined argument"
   in
   let no_flag_width_precision () =
     if conv.flags <> [] then 
-      error lexbuf "This conversion cannot take flags";
+      errorf lexbuf "This conversion cannot take flags";
     if conv.width <> None then
-      error lexbuf "This conversion cannot take a width";
+      errorf lexbuf "This conversion cannot take a width";
     if conv.precision <> None then
-      error lexbuf "This conversion cannot take a precision";
+      errorf lexbuf "This conversion cannot take a precision";
   in
   (* just no perfect quick checks *)
   begin match conv.ctype with
 ;;
 }
 
-rule ctype spcs = parse
+rule ctype stopat = parse
   | ( "d" | "i" | "n" | "l" | "L" | "N" ) { Cd }
   | "u" { Cu }
   | "x" { Cx }
   | "a" { Ca }
   | "t" { Ct }
   | "{" { 
-      let fmt = format spcs [] lexbuf in
-      enclose '}' lexbuf;
-      Cformat fmt 
+      match format stopat [] lexbuf with
+      | fmt, None -> 
+          enclose '}' lexbuf;
+          Cformat fmt 
+      | _, Some (pos, c) ->
+          errorf_at pos (pos+1) "illegal format type %C" c
     }
   | "(" { 
-      let fmt = format spcs [] lexbuf in
-      enclose ')' lexbuf;
-      Cformat_subst fmt
+      match format stopat [] lexbuf with
+      | fmt, None -> 
+          enclose ')' lexbuf;
+          Cformat_subst fmt
+      | _, Some (pos, c) -> 
+          errorf_at pos (pos+1) "illegal format type %C" c
     }
   | "!" { Cflush }
   | "%" { Cpercent }
-  | _ as c { error lexbuf (Printf.sprintf "illegal format type %C" c) }
+  | _ as c { errorf lexbuf "illegal format type %C" c }
 
 and enclose str = parse
   | "%" ([ ')' '}' ] as s) {
       if s = str then ()
-      else error lexbuf 
-	(Printf.sprintf "wrong format closing (%%%c expected)" str)
+      else errorf lexbuf "wrong format closing (%%%c expected)" str
     }
 
 and flag = parse
 	Arg_expr exp, pos
       }
   | "" {
-      error lexbuf "illegal $-expression"
+      errorf lexbuf "illegal $-expression"
     }
 
 and inlined_arg = parse
 		    inlined_arg = inlined_arg }) :: st) lexbuf
     }
 	
-  | ([^ '%' '$' '\\'] as s) { 
-      if List.mem s specials then
+  | ([^ '%' '$' '\\'] as c) { 
+      if List.mem c specials then
         (* Special char. We must stop here *)
-        List.rev st
-      else 
-        format specials (String (String.make 1 s) :: st) lexbuf 
+        List.rev st, Some (lexeme_start lexbuf (* the char is already loaded! *), c)
+      else
+        format specials (Char c :: st) lexbuf 
     }
 
   | "\\" (['\\' 'n' 't' 'r' 'b']  as char) {
     }
 
   | "\\" (_ as char) {
-      if List.mem char specials then
         format specials (Escaped char :: st) lexbuf
-      else
-        failwith (Printf.sprintf "Lexformat: illegal escaped char %C" char)
       }
 
   | "$" 
 		      ctype = Cs;
 		      inlined_arg = Some exp_pos }) :: st) lexbuf 
       }
-  | eof { List.rev st }
+  | eof { List.rev st, None }
 
 and exp st = parse
   | "}" { st }
   | "\\" { exp (st ^ "\\") lexbuf }
   | ([^ '\\' '}']+ as s) { exp (st ^ s) lexbuf }
   | _ as c { 
-      error lexbuf (Printf.sprintf "illegal char in ${exp}: %C" c) }
+      errorf lexbuf "illegal char in ${exp}: %C" c }
   | eof { 
-      error lexbuf "unterminated ${exp}"
+      errorf lexbuf "unterminated ${exp}"
     }
 
 {
 
 let from_string specials s = 
   let lexbuf = Lexing.from_string s in
-  let tokens = format specials [] lexbuf in
-  let endpos = lexeme_end lexbuf in
-  tokens,
-  endpos,
-  String.sub s endpos (String.length s - endpos)
+  let tokens, rest = format specials [] lexbuf in
+  let rems = match rest with
+    | Some (pos, _) -> Some (String.sub s pos (String.length s - pos))
+    | None -> None
+  in
+  tokens, rems
 ;;
 
 
   conv'
 
 and parameters_token token = match token with
-  | String _ | Escaped _ -> [], token
+  | String _ | Char _ | Escaped _ -> [], token
   | Conv conv -> 
       let params, conv' = parameters_conv conv in
       params, Conv conv'
 
 (* CR jfuruse: bad name *)
 let from_string_to_classic specials s =
-  let t, end_pos, rems = from_string specials s in
+  let t, rems = from_string specials s in
   let parameters, t' = parameters t in
   let application = parameters_to_application parameters in
-  t, application, to_string t', end_pos, rems
+  t, application, to_string t', rems
 ;;
 }
 	raise (Error (endpos, endpos + 1, 
 		     Printf.sprintf "unknown regexp character %C" rem.[0]))
       else 
-	( String.sub s 0 endpos,
-	  regexp, 
-	  String.sub rem 1 (String.length rem - 1) )
+        let rex = String.sub s 0 endpos in
+	let rem = String.sub rem 1 (String.length rem - 1) in
+        rex, regexp, rem
 
 type typ = {
     num_of_groups : int;
 
     let compile_token _loc buf pos = function
       | String s -> Buffer.add_string buf s; [], []
+      | Char c -> Buffer.add_char buf c; [], []
       | Escaped c -> Buffer.add_char buf c; [], []
       | Conv conv -> compile_conv _loc buf pos conv
     ;;
 
   let parse specials _loc _loc_var_opt s =
     match Lexformat.from_string specials s with
-    | _, pos, rem when rem <> "" -> 
+    | _, Some rem -> 
+        let pos = String.length s - String.length rem in
         raise (Lexformat.Error (pos, pos + 1,
 		                Printf.sprintf "Unescaped special character %C found" rem.[0]))
-    | t, _, _ -> 
+    | t, None -> 
         let e, inlined_f = Compile.compile _loc t in
         inlined_f (Top.may_put_in_top e)
   ;;
   module Parser = Parser_compiler(Syntax)
 
   let _ = 
+    Syntax.Quotation.add "$fmt"
+      Syntax.Quotation.DynAst.expr_tag (Parser.parse ['"']);
+
     Syntax.Quotation.add "fmt"
-      Syntax.Quotation.DynAst.expr_tag (Parser.parse ['"']);
+      Syntax.Quotation.DynAst.expr_tag (Parser.parse []);
+
+    Syntax.Quotation.add "$fmt"
+      Syntax.Quotation.DynAst.str_item_tag
+      (fun _loc _loc_var_opt s ->
+	let e = Parser.parse ['"'] _loc _loc_var_opt s in
+	<:str_item< $exp:e$ >>);
 
     Syntax.Quotation.add "fmt"
       Syntax.Quotation.DynAst.str_item_tag
       (fun _loc _loc_var_opt s ->
-	let e = Parser.parse ['"'] _loc _loc_var_opt s in
+	let e = Parser.parse [] _loc _loc_var_opt s in
 	<:str_item< $exp:e$ >>);
   ;;
 end
 
   module Pa_format = Pa_format.Make(Syntax) 
 
-  let parse _loc _loc_var_opt s =
-    match Pa_format.parse ['`'] _loc _loc_var_opt s with
-    | (`Const _ | `Fun _), pos, rem when rem <> "" -> 
+  let parse stopat _loc _loc_var_opt s =
+    match Pa_format.parse stopat _loc _loc_var_opt s with
+    | (`Const _ | `Fun _), Some rem ->
+        let pos = String.length s - String.length rem in
         raise (Lexformat.Error (pos, pos + 1,
 		                Printf.sprintf "Unescaped special character %C found" rem.[0]))
-    | `Const s, _, _ -> <:expr<command $s$>>
-    | `Fun (_abss, f), _, _ -> 
+    | `Const s, _ -> <:expr<command $s$>>
+    | `Fun (_abss, f), _ -> 
 	f <:expr<
 	    Printf.ksprintf command
 	  >>
   let _ =
     Syntax.Quotation.add "qx"
       Syntax.Quotation.DynAst.expr_tag
-      parse;
+      (parse []);
 
-   Syntax.Quotation.add "qx"
+    Syntax.Quotation.add "$qx"
+      Syntax.Quotation.DynAst.expr_tag
+      (parse ['`']);
+
+    Syntax.Quotation.add "qx"
       Syntax.Quotation.DynAst.str_item_tag
       (fun _loc _loc_var_opt s ->
-	let e = parse _loc _loc_var_opt s in
+	let e = parse [] _loc _loc_var_opt s in
+	<:str_item< $exp:e$ >>);
+
+    Syntax.Quotation.add "$qx"
+      Syntax.Quotation.DynAst.str_item_tag
+      (fun _loc _loc_var_opt s ->
+	let e = parse ['`'] _loc _loc_var_opt s in
 	<:str_item< $exp:e$ >>)
     ;;
 end
   open Lexformat
 
   let parse specials _loc _loc_var_opt s =
-    let t, (abss, apps), fmt, end_pos, rems = Lexformat.from_string_to_classic specials s in
+    let t, (abss, apps), fmt, rems = Lexformat.from_string_to_classic specials s in
     let string_constant =
       List.for_all (function 
-	| String _ | Escaped _ -> true
+	| String _ | Char _ | Escaped _ -> true
 	| Conv _ -> false) t
     in
     let v = 
 	  in
 	  put_abss (put_apps base apps) abss)
     in
-    v, end_pos, rems
+    v, rems
 end
     -> 'a (* CR jfuruse: bad type *)
     -> string 
     -> [> `Const of expr
-       | `Fun of int list * (expr -> expr) ] * int (** end_pos *) * string (** remains *)
+       | `Fun of int list * (expr -> expr) ] * string option (** remains *)
 end
   let parse_rex_replace_quotation _loc q =
     let loc = Loc.join (Loc.move `start q.q_shift _loc) in
     let rex, tokens, replace_ = Lexrex.replace_from_string q.q_contents in
-    let replace, _, flags = Pa_format.parse ['/'] _loc None replace_ in
-
+    let replace, flags_ = Pa_format.parse ['/'] _loc None replace_ in
+    let flags = match flags_ with
+      | None -> ""
+      | Some flags_ -> String.sub flags_ 1 (String.length flags_ - 1)
+    in
     let replace_global = ref false in
     let parse_flag = function
       | 'i' -> Some <:expr<`CASELESS>>
 
   module Pa_format = Pa_format.Make(Syntax) 
 
-  let parse _loc _loc_var_opt s =
-    match Pa_format.parse ['"'] _loc _loc_var_opt s with
-    | (`Const _ | `Fun _), pos, rem when rem <> "" -> 
+  let parse stopat _loc _loc_var_opt s =
+    match Pa_format.parse stopat _loc _loc_var_opt s with
+    | (`Const _ | `Fun _), Some rem ->
+        let pos = String.length s - String.length rem in
         raise (Lexformat.Error (pos, pos + 1,
 		                Printf.sprintf "Unescaped special character %C found" rem.[0]))
-    | `Const c, _, _ -> c
-    | `Fun (_abss, f), _, _ -> f <:expr<Printf.sprintf>>
+    | `Const c, _ -> c
+    | `Fun (_abss, f), _ -> f <:expr<Printf.sprintf>>
   ;;
 
   let _ = 
+    Syntax.Quotation.add "$qq"
+      Syntax.Quotation.DynAst.expr_tag (parse ['"']);
+
     Syntax.Quotation.add "qq"
-      Syntax.Quotation.DynAst.expr_tag parse;
+      Syntax.Quotation.DynAst.expr_tag (parse []);
+
+    Syntax.Quotation.add "$qq"
+      Syntax.Quotation.DynAst.str_item_tag
+      (fun _loc _loc_var_opt s ->
+	let e = parse ['"'] _loc _loc_var_opt s in
+	<:str_item< $exp:e$ >>);
 
     Syntax.Quotation.add "qq"
       Syntax.Quotation.DynAst.str_item_tag
       (fun _loc _loc_var_opt s ->
-	let e = parse _loc _loc_var_opt s in
+	let e = parse [] _loc _loc_var_opt s in
 	<:str_item< $exp:e$ >>);
   ;;
     
 	  | _ -> err (Unknown_pcre_command cmd) (Loc.of_lexbuf lexbuf)
 	} 
 
-    | "$`" { QUOTATION { q_name = "qx"; (* Quote for eXecution *)
+    | "$`" { QUOTATION { q_name = "$qx"; (* Quote for eXecution *)
 			 q_loc = "";
 			 q_shift = 2; (* precedent chars? *)
 			 q_contents =  parse (xdollar '`' None) c } }
-    | "$\"" { QUOTATION { q_name = "qq"; (* double Quote *)
+    | "$\"" { QUOTATION { q_name = "$qq"; (* double Quote *)
 			  q_loc = "";
 			  q_shift = 2; (* precedent chars? *)
 			  q_contents =  parse (xdollar '"' None) c } }
-    | "$%\"" { QUOTATION { q_name = "fmt"; (* ForMaT *)
+    | "$%\"" { QUOTATION { q_name = "$fmt"; (* ForMaT *)
 			   q_loc = "";
 			   q_shift = 3; (* precedent chars? *)
 			   q_contents =  parse (xdollar '"' None) c } }

test/test_cformat.ml

   prerr_endline "special char escaping test";
   assert (Cformat.sprintf $%"hello\"%d\"world" 2 = "hello\"2\"world");
   assert (Cformat.sprintf <:fmt<hello\"%d\"world>> 2 = "hello\"2\"world");
-  assert (Cformat.sprintf <:fmt<h"ello\"%d\"world>> 2 = "hello\"2\"world"); (* CR jfuruse: BUGGY! *)
-  
+  assert (Cformat.sprintf <:fmt<h"ello\"%d\"world>> 2 = "h\"ello\"2\"world"); (* CR jfuruse: BUGGY! *)

test/test_regexp_subst.ml

 *)
 
 let s = $s/l+/x/g "hello world";;
-let _ = assert (s = "hexo worxd");;
+let () = assert (s = "hexo worxd");;
 
 let s = $s/l+/$&$0/g "hello world";;
-let _ = assert (s = "hellllo worlld");;
+let () = assert (s = "hellllo worlld");;
 
 let s = <:s<l+/$&$0>> "hello world";;
-let _ = assert (s = "hellllo world");;
+let () = assert (s = "hellllo world");;
 
 let s = <:s<l+/$&$0/g>> "hello world";;
-let _ = assert (s = "hellllo worlld");;
+let () = assert (s = "hellllo worlld");;
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.