Commits

Dmitry Grebeniuk  committed 973b55f

+ Cd_Buffer.Buffer.Copy_on_modify, + Strings.Utf8.escaped

  • Participants
  • Parent commits 7ffe253

Comments (0)

Files changed (6)

 Library cadastr
   Path:       src
   BuildDepends: num
-  Modules:    Cadastr, Monoid, Cd_All, Cd_Int, Cd_List, Cd_Ops, Cd_Byte, Cd_Bytes, Cd_Chars, Cd_Strings, Cd_Array, Cd_Typeinfo, Cdt, Cd_Types, Cd_Option, Cd_Num, Cd_Tuples, Cd_Bool, TlsArray, TlsArray_st, TlsArray_mt, TlsRef, Cd_Ref, Cd_Partapp, Cd_Int64, Cd_Ser, Cd_SortedArray, Cd_SortedArraySet, Cd_Utf8
+  Modules:    Cadastr, Monoid, Cd_All, Cd_Int, Cd_List, Cd_Ops, Cd_Byte, Cd_Bytes, Cd_Chars, Cd_Strings, Cd_Array, Cd_Typeinfo, Cdt, Cd_Types, Cd_Option, Cd_Num, Cd_Tuples, Cd_Bool, TlsArray, TlsArray_st, TlsArray_mt, TlsRef, Cd_Ref, Cd_Partapp, Cd_Int64, Cd_Ser, Cd_SortedArray, Cd_SortedArraySet, Cd_Utf8, Cd_Buffer, Cd_StringsCommon
   NativeOpt:       -w A
   ByteOpt:         -w A
 
 (* setup.ml generated for the first time by OASIS v0.2.1~alpha1 *)
 
 (* OASIS_START *)
-(* DO NOT EDIT (digest: 111c334fd57d418d56fbcf6c7e5d3ccd) *)
+(* DO NOT EDIT (digest: 9c144fc0bd0d50d3be834222339ab3ad) *)
 (*
    Regenerated by OASIS v0.2.1~alpha1
    Visit http://oasis.forge.ocamlcore.org for more information and
                            "Cd_Ser";
                            "Cd_SortedArray";
                            "Cd_SortedArraySet";
-                           "Cd_Utf8"
+                           "Cd_Utf8";
+                           "Cd_Buffer";
+                           "Cd_StringsCommon"
                         ];
                       lib_internal_modules = [];
                       lib_findlib_parent = None;

File src/cadastr.mllib

 # OASIS_START
-# DO NOT EDIT (digest: f5b86c8c387d7e4e1b259e92e5e4cfbc)
+# DO NOT EDIT (digest: f92214abc54f998f4ed261c0b0f78ea2)
 Cadastr
 Monoid
 Cd_All
 Cd_SortedArray
 Cd_SortedArraySet
 Cd_Utf8
+Cd_Buffer
+Cd_StringsCommon
 # OASIS_STOP

File src/cd_Buffer.ml

+module Buffer
+ =
+  struct
+
+
+    include Buffer;
+
+
+
+    module Copy_on_modify
+     :
+      sig
+        type t;
+        value on_string : string -> t;
+        value add_char : t -> char -> unit;
+        value add_substring : t -> string -> int -> int -> unit;
+        value add_string : t -> string -> unit;
+        value contents : t -> string;
+      end
+     =
+      struct
+
+        module Scom = Cd_StringsCommon;
+
+        type t = ref tcont
+         and tcont =
+          [ Orig of string and ref int  (* current offset *)
+          | Buf of Buffer.t
+          ]
+        ;
+
+        value on_string s = ref (Orig s (ref 0))
+        ;
+
+        value contents t =
+          match t.val with
+          [ Orig base { val = base_ofs } ->
+              if base_ofs = String.length base
+              then base
+              else String.sub base 0 base_ofs
+          | Buf b ->
+              Buffer.contents b
+          ]
+        ;
+
+        value rec add_substring t str ofs len =
+          let () = assert (len >= 0) in
+          if len = 0
+          then
+            ()
+          else
+            if not (Scom.is_valid_substring str ofs len)
+            then
+              invalid_arg "Cd_Buffer.Copy_on_write.add_substring"
+            else
+              match t.val with
+              [ Buf b ->
+                  Buffer.add_substring b str ofs len
+              | Orig base ref_base_ofs ->
+                  let base_ofs = ref_base_ofs.val in
+                  let base_len = String.length base in
+                  let copy_rerun () =
+                    let new_buf = Buffer.create
+                      (max base_len (base_ofs + len)) in
+                    ( Buffer.add_substring new_buf base 0 base_ofs
+                    ; t.val := Buf new_buf
+                    ; add_substring t str ofs len
+                    )
+                  in
+                  let base_left = base_len - base_ofs in
+                  let () = assert (base_left >= 0) in
+                  if base_left = 0
+                  then
+                    copy_rerun ()
+                  else
+                    let piece_len = min base_left len in
+                    let () = assert (piece_len > 0) in
+                    if Scom.substring_eq
+                         ~s1:base ~ofs1:base_ofs
+                         ~s2:str ~ofs2:ofs
+                         ~len:piece_len
+                    then
+                      ( ref_base_ofs.val := base_ofs + piece_len
+                      ; add_substring t
+                          str (ofs + piece_len) (len - piece_len)
+                      )
+                    else
+                      copy_rerun ()
+              ]
+        ;
+
+        value add_string t s = add_substring t s 0 (String.length s)
+        ;
+
+        (* to optimize later, maybe: *)
+        value add_char t c = add_string t (String.make 1 c)
+        ;
+        (* как вариант -- сделать строку из "\000..\255" и брать
+           оттуда через add_substring.*)
+
+      end
+    ;
+
+
+  end
+;

File src/cd_Strings.ml

 
             value chop_prefix : ~string:string -> ~prefix:string -> string;
 
+            (* follows the conventions of stdlib's String.escaped,
+               but doesn't escape unicode characters. *)
+            value escaped : string -> string;
+
           end
          =
           struct
             (* some real utf8 work *)
 
             module U = Cd_Utf8;
-
-            (* does not check bounds! *)
-            value substring_eq ~s1 ~ofs1 ~s2 ~ofs2 ~len =
-              inner 0
-              where rec inner i =
-                if i = len
-                then True
-                else
-                  if s1.[ofs1 + i] = s2.[ofs2 + i]
-                  then inner (i + 1)
-                  else False
-            ;
+            module Scom = Cd_StringsCommon;
 
             value prefix_bytes ~string ~prefix =
               let sb = length_bytes string
                     then raise
                       (U.Bad_utf8 "end of string, incomplete character")
                     else
-                      if substring_eq
+                      if Scom.substring_eq
                            ~s1:string ~ofs1:i
                            ~s2:prefix ~ofs2:i
                            ~len:sc
               else string
             ;
 
+
+            module Com = Cd_Buffer.Buffer.Copy_on_modify;
+
+            value escaped s =
+              let r = Com.on_string s in
+              let len = length_bytes s in
+              let esc_dec c =
+                ( Com.add_char r '\\'
+                ; Com.add_string r (string_of_int (Char.code c))
+                )
+              in
+              ( loop 0
+              ; Com.contents r
+              )
+              where rec loop i =
+                if i = len
+                then
+                  ()
+                else
+                  let advance =
+                    try
+                      let cl = U.char_length s i in
+                      if i + cl > len
+                      then
+                        (* incomplete utf8 char *)
+                        let left = len - i in
+                        let () =
+                          for j = 0 to left - 1 do
+                            ( esc_dec s.[i + j] )
+                          done
+                        in
+                          left
+                      else
+                        if cl = 1
+                        then
+                          ( match s.[i] with
+                            [ '\x08' -> Com.add_string r "\\b"
+                            | '\x09' -> Com.add_string r "\\t"
+                            | '\x0A' -> Com.add_string r "\\n"
+                            | '\x0D' -> Com.add_string r "\\r"
+                            | '\"' -> Com.add_string r "\\\""
+                            | '\\' -> Com.add_string r "\\\\"
+                            | c when c < '\x20' -> esc_dec c
+                            | c -> Com.add_char r c
+                            ]
+                          ; 1
+                          )
+                        else
+                          ( Com.add_substring r s i cl
+                          ; cl
+                          )
+                    with
+                    [ U.Bad_utf8 _ -> ( esc_dec s.[i] ; 1 )
+                    ]
+                  in
+                    loop (i + advance)
+            ;
+
+
           end  (* Utf8.String *)
         ;
 

File src/cd_StringsCommon.ml

+(* common functions used both in Cd_Strings and Cd_Buffer
+   which depend on each other
+*)
+
+
+value is_valid_substring str ofs len =
+     ofs >= 0
+  && len >= 0
+  && ofs + len <= String.length str
+;
+
+(* does not check bounds! *)
+value substring_eq ~s1 ~ofs1 ~s2 ~ofs2 ~len =
+  if    not (is_valid_substring s1 ofs1 len)
+     || not (is_valid_substring s2 ofs2 len)
+  then invalid_arg "Cd_StringsCommon.substring_eq"
+  else
+  inner 0
+  where rec inner i =
+    if i = len
+    then True
+    else
+      if s1.[ofs1 + i] = s2.[ofs2 + i]
+      then inner (i + 1)
+      else False
+;
+