Commits

Anonymous committed bf5d96d

+ javascript escape's unescape enumeratee

Comments (0)

Files changed (2)

 
   end
 ;
+
+
+module Js_escape(IO : It_Types.MonadIO)
+ :
+  sig
+    exception Js_bad_escape of string;
+
+    value unescape : (Iteratees.Make(IO)).enumeratee char uchar 'a;
+
+    module I : sig exception Iteratees_err_msg of exn; end;
+  end
+ =
+  struct
+
+    exception Js_bad_escape of string;
+
+    module I = Iteratees.Make(IO);
+
+    open I;
+
+    value it_bad_esc fmt = Printf.ksprintf
+      (fun s -> throw_err (Js_bad_escape s)) fmt
+    ;
+
+    value it_hex exp_chars =
+      joinI (limit exp_chars
+        (gather_to_string))
+      >>= fun str ->
+      let str_len = String.length str in
+      if str_len <> exp_chars
+      then
+        it_bad_esc "expected hex of %i chars, found only %i chars"
+          exp_chars str_len
+      else
+        match
+          (try Scanf.sscanf str "%x%!" (fun n -> Some n)
+           with [ _ -> None ])
+        with
+        [ None -> it_bad_esc "expected hex number, found %S" str
+        | Some n -> return n
+        ]
+    ;
+
+    value unescape_utf16 : enumeratee char utf16item 'a = fun it ->
+      let rec unescape_utf16 (it : iteratee int 'a) =
+        break_feed ((=) '%') gather_to_string >>= fun it_str ->
+        (joinI & return it_str) >>= fun str ->
+        let it = feed_it
+          it
+          (let arr = Array.init
+             (String.length str) (fun i -> Char.code str.[i]) in
+           Chunk (S.of_array arr)
+          ) in
+        get_stream_eof >>= fun opt_opt_err ->
+        match opt_opt_err with
+        [ Some None -> return it
+        | Some (Some err) -> throw_err err
+        | None ->
+            match it with
+            [ IE_done _ | IE_cont (Some _) _ -> return it
+            | IE_cont None k ->
+                junk (* '%' *) >>= fun () ->
+                peek >>= fun opt_c ->
+                match opt_c with
+                [ None -> it_bad_esc "eof after '%%'"
+                | Some c ->
+                    (if c = 'u' || c = 'U'
+                     then
+                       junk >>= fun () ->
+                       it_hex 4
+                     else
+                       it_hex 2
+                    ) >>= fun code ->
+                    let it =
+                      liftI
+                        ( (k (chunk_of code)) >>% fun (it, _sl) ->
+                          IO.return it
+                        )
+                    in
+                      unescape_utf16 it
+                ]
+            ]
+        ]
+      in
+        unescape_utf16 it
+    ;
+
+
+    value unescape it = joinI & unescape_utf16 (UTF8.utf8_of_utf16 it)
+    ;
+
+  end
+;
+
+
+
+
+
+
+
+
+
+
 value () = P.printf "after functor app\n%!";
 open I;
 
+module Je = It_misc.Js_escape(IO);
+
 value mprintf fmt =
   Printf.ksprintf
     (fun s ->
 value string_of_e e =
   let (e, p) =
     match e with
-    [ Iteratees_err_msg e -> (e, "")
+    [ Iteratees_err_msg e | Je.I.Iteratees_err_msg e -> (e, "")
     | EIO (e, p) -> (e, p)
     | e -> (e, "")
     ]
 ;
 
 
+value test_js_unescape () =
+( dbg "test_js_unescape"
+; assert ((
+    runA & enum_pure_nchunk
+           (expl "ab%2C*.cd%u0439%u0446%u0443ef%uD834%uDF06gh") 1
+           (joinI & Je.unescape stream2list)
+    ) = `Ok
+           [ Char.code 'a'; Char.code 'b'; Char.code ','
+           ; Char.code '*'; Char.code '.'; Char.code 'c'
+           ; Char.code 'd'
+           ; 0x439; 0x446; 0x443; Char.code 'e'; Char.code 'f'
+           ; 0x1D306
+           ; Char.code 'g'; Char.code 'h'
+           ]
+  )
+; dbg "passed"
+)
+;
+
+
 value () =
   ( P.printf "TESTS BEGIN.\n"
 
 
   ; test_forms ()
 
+  ; test_js_unescape ()
+
   ; P.printf "TESTS END.\n"
   );