Commits

jhwoodyatt  committed 6fe6402

Initial submit of OCaml NAE XML Support Library.

  • Participants
  • Parent commits e4d6b77

Comments (0)

Files changed (1)

File xml/xml_parser.ml

+(*---------------------------------------------------------------------------*
+  IMPLEMENTATION  xml_parser.ml
+
+  Copyright (c) 2003-2004, James H. Woodyatt
+  All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+
+    Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+    Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+  FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+  COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+  INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+  STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+  ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+  OF THE POSSIBILITY OF SUCH DAMAGE. 
+ *---------------------------------------------------------------------------*)
+
+(*
+module X = struct
+    let tag = "[Xml_parser] "
+    
+    let printf fmt =
+        print_string tag;
+        Printf.printf fmt
+    
+    let sprintf (fmt : ('a, unit, string) format) =
+        Printf.sprintf (Obj.magic (tag ^ (Obj.magic fmt)))
+end
+*)
+
+type 'a t = (Xml_event.t, 'a) Cf_parser.t
+
+open Cf_parser.Op
+open Cf_lexer.Op
+
+let character_data =
+    Cf_parser.tok begin function
+        | Xml_event.T_character_data data, _ -> Some data
+        | _ -> None
+    end
+
+let element_start =
+    Cf_parser.tok begin function
+        | Xml_event.T_element_start (tag, attrs), _ -> Some (tag, attrs)
+        | _ -> None
+    end
+
+let element_end =
+    Cf_parser.tok begin function
+        | Xml_event.T_element_end tag, _ -> Some tag
+        | _ -> None
+    end
+
+let processing_instruction =
+    Cf_parser.tok begin function
+        | Xml_event.T_processing_instruction pi, _ -> Some pi
+        | _ -> None
+    end
+
+let cdata_start =
+    let sat =
+        Cf_parser.sat begin function
+            | Xml_event.T_cdata_start, _ -> true
+            | _ -> false
+        end
+    in
+    sat >>= fun _ -> ~:()
+
+let cdata_end =
+    let sat =
+        Cf_parser.sat begin function
+            | Xml_event.T_cdata_end, _ -> true
+            | _ -> false
+        end
+    in
+    sat >>= fun _ -> ~:()
+
+let comment =
+    Cf_parser.tok begin function
+        | Xml_event.T_comment text, _ -> Some text
+        | _ -> None
+    end
+
+let default_text =
+    Cf_parser.tok begin function
+        | Xml_event.T_default_text text, _ -> Some text
+        | _ -> None
+    end
+
+let xml_decl =
+    Cf_parser.tok begin function
+        | Xml_event.T_xml_decl decl, _ -> Some decl
+        | _ -> None
+    end
+
+let end_of_document =
+    let sat =
+        Cf_parser.sat begin function
+            | Xml_event.T_end_of_document, _ -> true
+            | _ -> false
+        end
+    in
+    sat >>= fun _ -> ~:()
+
+let accumulated_character_data =
+    let rec inner_loop q s =
+        match character_data s with
+        | None ->
+            if Cf_deque.empty q then None else Some (q, s)
+        | Some (data, s) ->
+            inner_loop (Cf_deque.A.push (data, 0, String.length data) q) s
+    in
+    let cdata_section ()=
+        cdata_start >>= fun _ ->
+        inner_loop Cf_deque.nil >>= fun q ->
+        cdata_end >>= fun _ ->
+        ~:q
+    in
+    let outer_pass =
+        Cf_parser.alt [
+            inner_loop Cf_deque.nil;
+            cdata_section ();
+        ]
+    in
+    let rec outer_loop q s =
+        match outer_pass s with
+        | None -> Some (q, s)
+        | Some (q', s) -> outer_loop (Cf_deque.catenate q q') s
+    in
+    outer_loop Cf_deque.nil >>= fun q ->
+    ~:(Cf_seq.reverse (Cf_deque.B.to_seq q))
+
+let optional_whitespace =
+    let rec get seq =
+        match character_data seq with
+        | Some (s, seq) when loop s (String.length s) 0 -> get seq
+        | Some (_, seq) -> Some ((), seq)
+        | _ ->
+            match default_text seq with
+            | Some (s, seq) when loop s (String.length s) 0 -> get seq
+            | _ -> Some ((), seq)
+    and loop s n i =
+        if i >= n then
+            true
+        else
+            match String.unsafe_get s i with
+            | (' ' | '\t' | '\r' | '\n' as ch) -> loop s n (succ i)
+            | ch -> false
+    in
+    fun seq -> get seq
+
+exception Invalid of Xml_event.t
+
+let invalid seq =
+    match Lazy.force seq with
+    | Cf_seq.Z -> None
+    | Cf_seq.P (event, _) -> raise (Invalid event)
+
+type space_handling_t = S_default | S_preserve
+
+(*
+val validated_element:
+    tag:string -> attr:(Xml_event.position_t -> 'a -> string * string -> 'a) ->
+    content:('a -> 'a t) -> ?space:space_handling_t -> 'a -> 'a t
+*)
+
+let validated_element ~tag ~attr ~content ?(space = S_default) =
+    let rec attributes ~pos ~acc = function
+        | [] -> acc
+        | hd :: tl -> attributes ~pos ~acc:(attr pos acc hd) tl
+    and e_start acc seq =
+        match Lazy.force seq with
+        | Cf_seq.P ((Xml_event.T_element_start (tag', attlist), pos), tl) ->
+            if tag = tag' then
+                Some (attributes ~pos ~acc attlist, tl)
+            else
+                None
+        | _ ->
+            None
+    and e_end seq =
+        match Lazy.force seq with
+        | Cf_seq.P ((Xml_event.T_element_end tag', pos as event), tl) ->
+            if tag = tag' then Some ((), tl) else raise (Invalid event)
+        | _ ->
+            None
+    in
+    let content acc =
+        match space with
+        | S_preserve ->
+            content acc
+        | S_default ->
+            optional_whitespace >>= fun () ->
+            content acc >>= fun x ->
+            optional_whitespace >>= fun () ->
+            ~:x
+    in
+    fun acc ->
+        e_start acc >>= fun acc ->
+        content acc >>= fun acc ->
+        e_end >>= fun () ->
+        ~:acc
+
+let split_attribute_value =
+    let is_white x = function
+        | '\009' | '\010' | '\013' | '\032' -> x
+        | _ -> not x
+    in
+    let x_white = !+(!^(is_white true)) in
+    let x_token = !+(!^(is_white false)) in
+    let cursor = new Cf_lexer.cursor 0 in
+    let lexer = Cf_lexer.create !@[
+        x_token $^ (fun x -> Some x);
+        x_white $= None;
+    ] in
+    fun s ->
+        let rec loop acc sx =
+            match lexer sx with
+            | None -> Some (List.rev acc, Cf_seq.first sx)
+            | Some (None, sx) -> loop acc sx
+            | Some (Some tok, sx) -> loop (tok :: acc) sx
+        in
+        loop [] (Cf_parser.X.weave cursor s)
+
+let standalone_document content =
+    xml_decl >>= fun decl ->
+    match decl with
+    | {
+        Xml_event.xml_version = Some "1.0";
+        Xml_event.xml_standalone = Xml_expat.SA_yes;
+      } ->
+        begin
+            optional_whitespace >>= fun () ->
+            content >>= fun x ->
+            optional_whitespace >>= fun () ->
+            end_of_document >>= fun () ->
+            ~:x
+        end
+    | _ ->
+        Cf_parser.nil
+
+(*--- End of File [ xml_parser.ml ] ---*)