ysulsky avatar ysulsky committed 03de30f Merge

merge

Comments (0)

Files changed (36)

 glob:base/sexplib/_oasis
 glob:base/sexplib/_tags
 glob:base/sexplib/configure
+glob:base/sexplib/conv_test.byte
 glob:base/sexplib/lib/META
 glob:base/sexplib/lib/lexer.ml
 glob:base/sexplib/lib/parser.ml
 glob:base/sexplib/setup.data
 glob:base/sexplib/setup.log
 glob:base/sexplib/setup.ml
+glob:base/sexplib/sexp_test.byte
 glob:base/sexplib/syntax/pa_sexp_conv.mllib
 glob:base/sexplib/top/sexplib_top.mllib
 glob:base/type_conv/INSTALL

base/async/unix/lib/reader.ml

 
 let gen_read_sexp ?parse_pos t parse =
   Deferred.create (fun result ->
-    let rec loop ~ws_only parse_fun =
+    let rec loop ~cont_state parse_fun =
       nonempty_buffer t (function
         | `Eof ->
-          if ws_only then Ivar.fill result `Eof
-          else begin
-            (* The sexp parser doesn't know that a token ends at EOF, so we add a space to
-               be sure. *)
+          begin
+            (* The sexp parser doesn't know that a token ends at EOF, so we
+               add a space to be sure. *)
             match parse_fun ~pos:0 ~len:1 space with
             | Sexp.Done (sexp, parse_pos) ->
-              Ivar.fill result (`Ok (sexp, parse_pos))
+                Ivar.fill result (`Ok (sexp, parse_pos))
+            | Sexp.Cont (Sexp.Cont_state.Parsing_whitespace, _) ->
+                Ivar.fill result `Eof
             | Sexp.Cont _ ->
-              failwiths "Reader.read_sexp got unexpected eof" t <:sexp_of< t >>
+                failwiths "Reader.read_sexp got unexpected eof"
+                  t <:sexp_of< t >>
           end
         | `Ok ->
           match parse_fun ~pos:t.pos ~len:t.available t.buf with
           | Sexp.Done (sexp, parse_pos) ->
-            consume t (parse_pos.Sexp.Parse_pos.buf_pos - t.pos);
-            Ivar.fill result (`Ok (sexp, parse_pos));
-          | Sexp.Cont (ws_only, parse_fun) ->
-            t.available <- 0;
-            loop ~ws_only parse_fun)
+              consume t (parse_pos.Sexp.Parse_pos.buf_pos - t.pos);
+              Ivar.fill result (`Ok (sexp, parse_pos));
+          | Sexp.Cont (cont_state, parse_fun) ->
+              t.available <- 0;
+              loop ~cont_state parse_fun)
     in
     let parse ~pos ~len buf =
-      (* [parse_pos] will be threaded through the entire reading process by the sexplib
-         code.  Every occurrence of [parse_pos] above will be identical to the [parse_pos]
-         defined here. *)
+      (* [parse_pos] will be threaded through the entire reading process by
+         the sexplib code.  Every occurrence of [parse_pos] above will be
+         identical to the [parse_pos] defined here. *)
       let parse_pos =
         match parse_pos with
         | None -> Sexp.Parse_pos.create ~buf_pos:pos ()
       in
       parse ?parse_pos:(Some parse_pos) ?len:(Some len) buf
     in
-    loop ~ws_only:true parse)
+    loop ~cont_state:Sexp.Cont_state.Parsing_whitespace parse)
 ;;
 
 type 'a read = ?parse_pos : Sexp.Parse_pos.t -> 'a

base/bin_prot/lib/bin_prot.odocl

+# OASIS_START
+# DO NOT EDIT (digest: 587ff1270b3446e9490aea8b3ff16a98)
+Binable
+Nat0
+Common
+Unsafe_common
+Unsafe_write_c
+Unsafe_read_c
+Size
+Write_ml
+Read_ml
+Write_c
+Read_c
+Std
+Type_class
+Map_to_safe
+Utils
+# OASIS_STOP

base/bin_prot/oasis.sh

 
           flag ["compile"; "ocaml"; "cpp"] cpp;
 
+          flag ["compile"; "ocaml"] (S [A "-w"; A "@Ae" ]);
+
           let cflags =
             let flags =
               [

base/core/extended/lib/command.ml

     | `Ok tbl -> (fun flag ->
       match partial_match tbl flag with
       | `Exact (_, v)
-      | `Partial (_, ({full_flag_required = false} as v)) -> Some v.spec
-      | `Partial (_, ({full_flag_required = true} as v)) ->
+      | `Partial (_, ({full_flag_required = false; _} as v)) -> Some v.spec
+      | `Partial (_, ({full_flag_required = true; _} as v)) ->
         eprintf "Note: cannot abbreviate flag \"%s\".\n%!" v.name; None
       | `Ambiguous l ->
         eprintf "Note: flag \"%s\" is an ambiguous prefix: %s\n%!"
       | `None -> None)
   ;;
 
-  let help { name = name; doc = doc; aliases = aliases}  =
+  let help { name = name; doc = doc; aliases = aliases; _}  =
     if String.is_prefix doc ~prefix:" " then
       (name, String.lstrip doc) ::
         List.map aliases

base/core/extended/lib/core_command.ml

             (x ^ " " ^ arg, sprintf "same as \"%s\"" name))
   end
 
-  let align {name; doc; aliases; action = _} =
+  let align {name; doc; aliases; _} =
     let (name, doc) =
       match String.lsplit2 doc ~on:' ' with
       | None | Some ("", _) -> (name, String.strip doc)

base/core/extended/lib/documented_match_statement.ml

 
 let match_ t x =
   match List.filter t.specific_cases
-    ~f:(fun { pattern = x' } -> List.exists x' ~f:(fun y -> x = y)) with
+    ~f:(fun { pattern = x'; _ } -> List.exists x' ~f:(fun y -> x = y)) with
   | case1::case2::_ -> failwithf "pattern appears twice in documented_match (%s,%s)"
     case1.documentation case2.documentation ()
   | [case] -> case.value ()

base/core/extended/lib/exception_check.ml

       U.set_nonblock s;
       while true do
         try
-          let { U.Select_fds.read = rd } =
+          let { U.Select_fds.read = rd; _ } =
             U.select ~read:(s :: !clients) ~write:[] ~except:[]
               ~timeout:(- 1.0) ()
           in

base/core/extended/lib/loggers.ml

     let
         {
           tm_year = m_year; tm_mon = m_month; tm_mday = m_mday;
-          tm_hour = m_hour; tm_min = m_min; tm_sec = m_sec;
+          tm_hour = m_hour; tm_min = m_min; tm_sec = m_sec; _
         } = localtime mtime in
     let m_sec = float m_sec +. mod_float mtime 1. in
     sprintf "%04d-%02d-%02d/%02d:%02d:%05.2f"

base/core/extended/lib/sys_utils.ml

 
   let sample_exn pid =
     let module P = Procfs.Process in
-    let {P.Stat.utime; stime} = (Procfs.with_pid_exn pid).P.stat in
+    let {P.Stat.utime; stime; _} = (Procfs.with_pid_exn pid).P.stat in
     { jiffies = Big_int.add_big_int utime stime;
       time = Time.now () }
 
     t.s0 <- t.s1;
     t.s1 <- sample_exn t.pid
 
-  let cpu_use {jps; s0={jiffies=j0;time=t0}; s1={jiffies=j1;time=t1}} =
+  let cpu_use {jps; s0={jiffies=j0;time=t0}; s1={jiffies=j1;time=t1}; _} =
     let my_jps =
       Big_int.float_of_big_int (Big_int.sub_big_int j1 j0)
       /. Time.Span.to_sec (Time.diff t1 t0)

base/core/extended/oasis.sh

       List.concat (List.map f flags)
     in
     flag ["compile"; "c"] (S cflags);
+    flag ["compile"; "ocaml"] (S [A "-w"; A "@Ae" ]);
 
     dispatch_default e
   | e -> dispatch_default e

base/core/lib/bigstring_stubs.c

 
 typedef off_t file_offset;
 
-#define IO_BUFFER_SIZE 4096
+#define IO_BUFFER_SIZE 65536
 
 struct channel {
   int fd;                       /* Unix file descriptor */

base/core/lib/in_channel.ml

 let set_binary_mode = Pervasives.set_binary_mode_in
 
 let input_all t =
-  (* We use 4096 because that is the size of OCaml's IO buffers. *)
-  let buf_size = 4096 in
+  (* We use 65536 because that is the size of OCaml's IO buffers. *)
+  let buf_size = 65536 in
   let buf = String.create buf_size in
   let buffer = Buffer.create buf_size in
   let rec loop () =

base/core/lib/misc.c

 /* Fix the broken close_(in/out) function which does not release the
    caml lock. */
 
-#define IO_BUFFER_SIZE 4096
+#define IO_BUFFER_SIZE 65536
 
 typedef long file_offset;
 

base/core/oasis.sh

       List.concat (List.map f flags)
     in
     flag ["compile"; "c"] (S cflags);
+    flag ["compile"; "ocaml"] (S [A "-w"; A "@Ae" ]);
 
     dispatch_default e
   | e -> dispatch_default e

base/sexplib/.hgignore.in

 _build
 _tags
 lib/sexplib.odocl
+conv_test.byte
+sexp_test.byte

base/sexplib/README

-    
+      
                                         
                           README: library "Sexplib"
                           *************************
             =====================================================
                              Author: Markus Mottl 
                             ======================
-                             New York, 2012-03-20
+                             New York, 2012-04-20
                              ====================
   
 
 4.1  Lexical conventions of S-expression
 ========================================
   
-  Whitespace, which consists of space, newline, carriage return, horizontal
-tab and form feed, is ignored unless within an OCaml-string, where it is
-treated according to OCaml-conventions. The semicolon introduces comments.
-Comments are ignored, and range up to the next newline character. The left
-parenthesis opens a new list, the right parenthesis closes it again. Lists can
-be empty. The double quote denotes the beginning and end of a string following
-the lexical conventions of OCaml (see OCaml-manual for details). All
-characters other than double quotes, left- and right parentheses, and
-whitespace are considered part of a contiguous string.
+  Whitespace, which consists of space, newline, horizontal tab, and form feed,
+is ignored unless within an OCaml-string, where it is treated according to
+OCaml-conventions. The left parenthesis opens a new list, the right one closes
+it again. Lists can be empty. The double quote denotes the beginning and end
+of a string following the lexical conventions of OCaml (see the OCaml-manual
+for details). All characters other than double quotes, left- and right
+parentheses, whitespace, carriage return, and comment-introducing characters
+or sequences (see next paragraph) are considered part of a contiguous string.
+  
+  A line comment is introduced using a semicolon, which comments out all text
+up to the end of the next newline character. The sequence '%;' introduces an
+S-expression comment. This means that the next S-expression, which must be
+syntactically correct and may be an atom (quoted or unquoted) or list,
+following this two-character sequence will be ignored. Whitespace or other
+comments between this sequence and the subsequent S-expression are ignored.
+Block comments are opened with '#|' and closed with '|#'. They can be nested
+and require that double-quotes within the block balance and contain
+syntactically correct OCaml-strings, similar to quoted atoms. These
+OCaml-strings may contain comment characters without causing parsing problems.
   
 
 4.2  Grammar of S-expressions
       {
         x : int option;
         y : int sexp_option;
-      }
+      } with sexp
 >>
   
   The type 'sexp_option' is equivalent to ordinary options, but is treated
 similar to the type 'sexp_option'. They assume the empty list, empty array,
 and false value respectively as default values.
   
+  More complex default values can be specified explicitly using several
+constructs, e.g.:
+<<  let z_test v = v > 42
+  
+    type t =
+      {
+        x : int with default(42);
+        y : int with default(3), sexp_drop_default;
+        z : int with default(3), sexp_drop_if(z_test);
+      } with sexp
+>>
+  
+  The 'default' record field extension above is supported by the underlying
+preprocessor library 'type_conv' and specifies the intended default value for
+a record field in its argument. Sexplib will use this information to generate
+code which will set this record field to the default value if an S-expression
+omits this field. If a record is converted to an S-expression, record fields
+with default values will be emitted as usual. Specifying 'sexp_drop_default'
+will add a test for polymorphic equality to the generated code such that a
+record field containing its default value will be suppressed in the resulting
+S-expression. This option requires the presence of a default value.
+'sexp_drop_if' on the other hand does not require a default. Its argument must
+be a function, which will receive the current record value. If the result of
+this function is 'true', then the record field will be suppressed in the
+resulting S-expression.
+  
+  The above extensions can be quite creatively used together with manifest
+types, functors, and first-class modules to make the emission of record fields
+or the definition of their default values configurable at runtime.
+  
 
 4.7  Conversion of sum types
 ============================

base/sexplib/doc/README.tex

   Copyright \quad (C) \quad \theyear \quad \janeshort \quad\\
   Author: Markus Mottl
 }
-\date{New York, 2012-03-20}
+\date{New York, 2012-04-20}
 
 % DOCUMENT
 \begin{document}
 
 \subsection{Lexical conventions of S-expression}
 
-Whitespace, which consists of space, newline, carriage return, horizontal
-tab and form feed, is ignored unless within an OCaml-string, where it
-is treated according to OCaml-conventions.  The semicolon introduces
-comments.  Comments are ignored, and range up to the next newline
-character.  The left parenthesis opens a new list, the right parenthesis
-closes it again.  Lists can be empty.  The double quote denotes the
-beginning and end of a string following the lexical conventions of OCaml
-(see OCaml-manual for details).  All characters other than double quotes,
-left- and right parentheses, and whitespace are considered part of a
-contiguous string.
+Whitespace, which consists of space, newline, horizontal tab, and form feed,
+is ignored unless within an OCaml-string, where it is treated according to
+OCaml-conventions.  The left parenthesis opens a new list, the right one closes
+it again.  Lists can be empty.  The double quote denotes the beginning and end
+of a string following the lexical conventions of OCaml (see the OCaml-manual
+for details).  All characters other than double quotes, left- and right
+parentheses, whitespace, carriage return, and comment-introducing characters
+or sequences (see next paragraph) are considered part of a contiguous string.\\
+\\
+A line comment is introduced using a semicolon, which comments out all
+text up to the end of the next newline character.  The sequence \verb=%;=
+introduces an S-expression comment.  This means that the next S-expression,
+which must be syntactically correct and may be an atom (quoted or unquoted)
+or list, following this two-character sequence will be ignored.  Whitespace
+or other comments between this sequence and the subsequent S-expression are
+ignored.  Block comments are opened with \verb=#|= and closed with \verb=|#=.
+They can be nested and require that double-quotes within the block balance
+and contain syntactically correct OCaml-strings, similar to quoted atoms.
+These OCaml-strings may contain comment characters without causing parsing
+problems.
 
 \subsection{Grammar of S-expressions}
 
     {
       x : int option;
       y : int sexp_option;
-    }
+    } with sexp
 \end{verbatim}
 
 The type \verb=sexp_option= is equivalent to ordinary options, but is
 \\
 The types \verb=sexp_list=, \verb=sexp_array=, and \verb=sexp_bool= can be
 used in ways similar to the type \verb=sexp_option=.  They assume the empty
-list, empty array, and false value respectively as default values.
+list, empty array, and false value respectively as default values.\\
+\\
+More complex default values can be specified explicitly using several
+constructs, e.g.:
+
+\begin{verbatim}
+  let z_test v = v > 42
+
+  type t =
+    {
+      x : int with default(42);
+      y : int with default(3), sexp_drop_default;
+      z : int with default(3), sexp_drop_if(z_test);
+    } with sexp
+\end{verbatim}
+
+\noindent The \verb=default= record field extension above is supported by the
+underlying preprocessor library \verb=type_conv= and specifies the intended
+default value for a record field in its argument.  Sexplib will use this
+information to generate code which will set this record field to the default
+value if an S-expression omits this field.  If a record is converted to an
+S-expression, record fields with default values will be emitted as usual.
+Specifying \verb=sexp_drop_default= will add a test for polymorphic equality
+to the generated code such that a record field containing its default value
+will be suppressed in the resulting S-expression.  This option requires the
+presence of a default value.  \verb=sexp_drop_if= on the other hand does
+not require a default.  Its argument must be a function, which will receive
+the current record value.  If the result of this function is \verb=true=,
+then the record field will be suppressed in the resulting S-expression.\\
+\\
+The above extensions can be quite creatively used together with manifest
+types, functors, and first-class modules to make the emission of record
+fields or the definition of their default values configurable at runtime.
 
 \subsection{Conversion of sum types}
 

base/sexplib/lib/conv.ml

         | Parse_error pe ->
             let ppos =
               match pe.parse_state with
-              | `Sexp { parse_pos } | `Annot { parse_pos } -> parse_pos
+              | `Sexp { parse_pos; _ } | `Annot { parse_pos; _ } -> parse_pos
             in
             List [
               Atom "Sexplib.Sexp.Parse_error";

base/sexplib/lib/lexer.mll

   open Parser
 
   let char_for_backslash = function
-    | 'n' -> '\n'
-    | 't' -> '\t'
-    | 'b' -> '\b'
-    | 'r' -> '\r'
-    | c   -> c
+    | 'n' -> '\010'
+    | 'r' -> '\013'
+    | 'b' -> '\008'
+    | 't' -> '\009'
+    | c -> c
 
-  let double_nl = "\013\010"
+  let lf = '\010'
 
   let dec_code c1 c2 c3 =
     100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
       else d2 - 48 in
     val1 * 16 + val2
 
-  let found_newline lexbuf diff =
-    let curr_p = lexbuf.lex_curr_p in
+  let found_newline ({ lex_curr_p; _ } as lexbuf) diff =
     lexbuf.lex_curr_p <-
       {
-        curr_p with
-        pos_lnum = curr_p.pos_lnum + 1;
-        pos_bol = max 1 (curr_p.pos_cnum - diff);
+        lex_curr_p with
+        pos_lnum = lex_curr_p.pos_lnum + 1;
+        pos_bol = lex_curr_p.pos_cnum - diff;
       }
 
-  let get_lexeme_len lexbuf = lexbuf.lex_curr_pos - lexbuf.lex_start_pos
+  let lexeme_len lexbuf = lexeme_end lexbuf - lexeme_start lexbuf
+
+  let main_failure lexbuf msg =
+    let { pos_lnum; pos_bol; pos_cnum; _ } = lexeme_start_p lexbuf in
+    let msg =
+      sprintf
+        "Sexplib.Lexer.main: %s at line %d char %d"
+        msg pos_lnum (pos_cnum - pos_bol)
+    in
+    failwith msg
 }
 
-let newline = ('\010' | '\013' | "\013\010")
-let space = [' ' '\009' '\012']
-let whitespace = [' ' '\010' '\013' '\009' '\012']
-let backslash_escapes = ['\\' '"' '\'' 'n' 't' 'b' 'r']
+let lf = '\010'
+let lf_cr = ['\010' '\013']
+let dos_newline = "\013\010"
+let blank = [' ' '\009' '\012']
+let unquoted = [^ ';' '(' ')' '"'] # blank # lf_cr
+let digit = ['0'-'9']
+let hexdigit = digit | ['a'-'f' 'A'-'F']
+
+let unquoted_start =
+  unquoted # ['#' '|'] | '#' unquoted # ['|'] | '|' unquoted # ['#']
 
 rule main buf = parse
-  | newline { found_newline lexbuf 1; main buf lexbuf }
-  | space+ | ';' [^ '\n' '\r']* { main buf lexbuf }
+  | lf | dos_newline { found_newline lexbuf 0; main buf lexbuf }
+  | blank+ | ';' (_ # lf_cr)* { main buf lexbuf }
   | '(' { LPAREN }
   | ')' { RPAREN }
   | '"'
       {
-        scan_string buf lexbuf;
+        scan_string buf (lexeme_start_p lexbuf) lexbuf;
         let str = Buffer.contents buf in
         Buffer.clear buf;
         STRING str
       }
-  | ([^ ';' '(' ')' '"'] # whitespace)+ as str { STRING str }
+  | "#;" { SEXP_COMMENT }
+  | "#|"
+      {
+        scan_block_comment buf [lexeme_start_p lexbuf] lexbuf;
+        main buf lexbuf
+      }
+  | "|#" { main_failure lexbuf "illegal end of comment" }
+  | unquoted_start unquoted* ("#|" | "|#") unquoted*
+      { main_failure lexbuf "comment tokens in unquoted atom" }
+  | "#" | unquoted_start unquoted* as str { STRING str }
   | eof { EOF }
 
-and scan_string buf = parse
+and scan_string buf start = parse
   | '"' { () }
-  | '\\' ['\010' '\013'] [' ' '\009']*
+  | '\\' lf [' ' '\t']*
       {
-        let len = get_lexeme_len lexbuf in
-        found_newline lexbuf (len - 2);
-        scan_string buf lexbuf
+        found_newline lexbuf (lexeme_len lexbuf - 2);
+        scan_string buf start lexbuf
       }
-  | '\\' "\013\010" [' ' '\009']*
+  | '\\' dos_newline [' ' '\t']*
       {
-        let len = get_lexeme_len lexbuf in
-        found_newline lexbuf (len - 3);
-        scan_string buf lexbuf
+        found_newline lexbuf (lexeme_len lexbuf - 3);
+        scan_string buf start lexbuf
       }
-  | '\\' (backslash_escapes as c)
+  | '\\' (['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] as c)
       {
         Buffer.add_char buf (char_for_backslash c);
-        scan_string buf lexbuf
+        scan_string buf start lexbuf
       }
-  | '\\' (['0'-'9'] as c1) (['0'-'9'] as c2) (['0'-'9']  as c3)
+  | '\\' (digit as c1) (digit as c2) (digit as c3)
       {
         let v = dec_code c1 c2 c3 in
         if v > 255 then (
-          let pos = lexbuf.lex_curr_p in
+          let { pos_lnum; pos_bol; pos_cnum; _ } = lexeme_end_p lexbuf in
           let msg =
             sprintf
               "Sexplib.Lexer.scan_string: \
                illegal escape at line %d char %d: `\\%c%c%c'"
-              pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
+              pos_lnum (pos_cnum - pos_bol - 3)
               c1 c2 c3 in
           failwith msg);
         Buffer.add_char buf (Char.chr v);
-        scan_string buf lexbuf
+        scan_string buf start lexbuf
       }
-  | '\\' 'x' (['0'-'9' 'a'-'f' 'A'-'F'] as c1) (['0'-'9' 'a'-'f' 'A'-'F'] as c2)
+  | '\\' 'x' (hexdigit as c1) (hexdigit as c2)
       {
         let v = hex_code c1 c2 in
-        if v > 255 then (
-          let pos = lexbuf.lex_curr_p in
-          let msg =
-            sprintf
-              "Sexplib.Lexer.scan_string: \
-               illegal escape at line %d char %d: `\\x%c%c'"
-              pos.pos_lnum (pos.pos_cnum - pos.pos_bol - 3)
-              c1 c2 in
-          failwith msg);
         Buffer.add_char buf (Char.chr v);
-        scan_string buf lexbuf
+        scan_string buf start lexbuf
       }
   | '\\' (_ as c)
       {
         Buffer.add_char buf '\\';
         Buffer.add_char buf c;
-        scan_string buf lexbuf
+        scan_string buf start lexbuf
       }
-  | ['\010' '\013'] as c
+  | lf
       {
-        found_newline lexbuf 1;
-        Buffer.add_char buf c;
-        scan_string buf lexbuf
+        found_newline lexbuf 0;
+        Buffer.add_char buf lf;
+        scan_string buf start lexbuf
       }
-  | "\013\010"
+  | ([^ '\\' '"'] # lf)+
       {
-        found_newline lexbuf 2;
-        Buffer.add_string buf double_nl;
-        scan_string buf lexbuf
+        let ofs = lexeme_start lexbuf in
+        let len = lexeme_end lexbuf - ofs in
+        Buffer.add_substring buf lexbuf.lex_buffer ofs len;
+        scan_string buf start lexbuf
       }
-  | [^ '\\' '"']+
+  | eof
       {
-        let ofs = lexbuf.lex_start_pos in
-        let len = lexbuf.lex_curr_pos - ofs in
-        Buffer.add_substring buf lexbuf.lex_buffer ofs len;
-        scan_string buf lexbuf
+        let msg =
+          sprintf
+            "Sexplib.Lexer.scan_string: unterminated string at line %d char %d"
+            start.pos_lnum (start.pos_cnum - start.pos_bol)
+        in
+        failwith msg
       }
-  | eof { failwith "Sexplib.Lexer.scan_string: unterminated string" }
+
+and scan_block_comment buf locs = parse
+  | ('#'* | '|'*) lf
+      { found_newline lexbuf 0; scan_block_comment buf locs lexbuf }
+  | (('#'* | '|'*) [^ '"' '#' '|'] # lf)+ { scan_block_comment buf locs lexbuf }
+  | ('#'* | '|'*) '"'
+      {
+        let cur = lexeme_end_p lexbuf in
+        let start = { cur with pos_cnum = cur.pos_cnum - 1 } in
+        scan_string buf start lexbuf;
+        Buffer.clear buf;
+        scan_block_comment buf locs lexbuf
+      }
+  | '#'+ '|'
+    {
+      let cur = lexeme_end_p lexbuf in
+      let start = { cur with pos_cnum = cur.pos_cnum - 2 } in
+      scan_block_comment buf (start :: locs) lexbuf
+    }
+  | '|'+ '#'
+      {
+        match locs with
+        | [_] -> ()
+        | _ :: t -> scan_block_comment buf t lexbuf
+        | [] -> assert false  (* impossible *)
+      }
+  | eof
+      {
+        match locs with
+        | [] -> assert false
+        | { pos_lnum; pos_bol; pos_cnum; _ } :: _ ->
+            let msg =
+              sprintf "Sexplib.Lexer.scan_block_comment: \
+                unterminated block comment at line %d char %d"
+                pos_lnum (pos_cnum - pos_bol)
+            in
+            failwith msg
+      }
 
 {
   let main ?buf =

base/sexplib/lib/parser.mly

 %}
 
 %token <string> STRING
-%token LPAREN RPAREN EOF
+%token LPAREN RPAREN SEXP_COMMENT EOF
 
 %start sexp
 %type <Type.t> sexp
   | LPAREN rev_sexps_aux RPAREN { Type.List (List.rev $2) }
   | error { parse_failure "sexp" }
 
+sexp_comment
+  : SEXP_COMMENT sexp { () }
+  | SEXP_COMMENT sexp_comments sexp { () }
+
+sexp_comments
+  : sexp_comment { () }
+  | sexp_comments sexp_comment { () }
+
 sexp_opt
   : sexp { Some $1 }
+  | sexp_comments sexp { Some $2 }
   | EOF { None }
 
 rev_sexps_aux
   : sexp { [$1] }
+  | sexp_comment { [] }
   | rev_sexps_aux sexp { $2 :: $1 }
+  | rev_sexps_aux sexp_comment { $1 }
 
 rev_sexps
   : rev_sexps_aux { $1 }

base/sexplib/lib/pre_sexp.ml

 
 (* Escaping of strings used as atoms in S-expressions *)
 
-let is_special_char c =
-  c <= ' ' || c = '"' || c = '(' || c = ')' || c = ';' || c = '\\'
-
 let must_escape str =
   let len = String.length str in
   len = 0 ||
-    let rec loop ix = is_special_char str.[ix] || ix > 0 && loop (ix - 1) in
+    let rec loop ix =
+      match str.[ix] with
+      | '"' | '(' | ')' | ';' | '\\' -> true
+      | '|' -> ix > 0 && let next = ix - 1 in str.[next] = '#' || loop next
+      | '#' -> ix > 0 && let next = ix - 1 in str.[next] = '|' || loop next
+      | c -> c <= ' ' || ix > 0 && loop (ix - 1)
+    in
     loop (len - 1)
 
 let maybe_esc_str str =
 let get_main_buf buf =
   let buf =
     match buf with
-    | None -> Buffer.create 64
+    | None -> Buffer.create 128
     | Some buf -> buf in
   Lexer.main ~buf
 
   let with_buf_pos t buf_pos = { t with buf_pos }
 end
 
+module Cont_state = struct
+  type t =
+    | Parsing_whitespace
+    | Parsing_atom
+    | Parsing_list
+    | Parsing_sexp_comment
+    | Parsing_block_comment
+
+  let to_string = function
+    | Parsing_whitespace -> "Parsing_whitespace"
+    | Parsing_atom -> "Parsing_atom"
+    | Parsing_list -> "Parsing_list"
+    | Parsing_sexp_comment -> "Parsing_sexp_comment"
+    | Parsing_block_comment -> "Parsing_block_comment"
+end
+
 type ('a, 't) parse_result =
   | Done of 't * Parse_pos.t
-  | Cont of bool * ('a, 't) parse_fun
+  | Cont of Cont_state.t * ('a, 't) parse_fun
 
 and ('a, 't) parse_fun = pos : int -> len : int -> 'a -> ('a, 't) parse_result
 
 
 exception Parse_error of parse_error
 
-let bump_text_line { parse_pos } =
+let bump_text_line { parse_pos; _ } =
   parse_pos.Parse_pos.text_line <- parse_pos.Parse_pos.text_line + 1;
   parse_pos.Parse_pos.text_char <- 0
 
-let bump_text_pos { parse_pos } =
+let bump_text_pos { parse_pos; _ } =
   parse_pos.Parse_pos.text_char <- parse_pos.Parse_pos.text_char + 1
 
 let bump_pos_cont state str ~max_pos ~pos cont =
   parse_pos.Parse_pos.buf_pos <- buf_pos;
   parse_pos.Parse_pos.global_offset <- parse_pos.Parse_pos.global_offset + len
 
-let mk_parse_pos { parse_pos } buf_pos =
+let mk_parse_pos { parse_pos; _ } buf_pos =
   set_parse_pos parse_pos buf_pos;
   parse_pos
 
 let raise_parse_error parse_state location buf_pos err_msg =
-  begin
-    match parse_state with
-    | `Sexp { parse_pos } | `Annot { parse_pos } ->
-        set_parse_pos parse_pos buf_pos;
-        parse_pos.Parse_pos.text_char <- parse_pos.Parse_pos.text_char + 1;
-  end;
-  let parse_error = { location; err_msg; parse_state } in
-  raise (Parse_error parse_error)
+  match parse_state with
+  | `Sexp { parse_pos; _ } | `Annot { parse_pos; _ } ->
+      set_parse_pos parse_pos buf_pos;
+      let parse_error = { location; err_msg; parse_state } in
+      raise (Parse_error parse_error)
 
 let raise_unexpected_char parse_state location buf_pos c =
   let err_msg = sprintf "unexpected character: '%c'" c in
   raise_parse_error parse_state location buf_pos err_msg
 
-(* The code below is derived from lexer.mll in the OCaml distribution,
-   which also uses ASCII codes instead of escape sequences to denote
-   special characters. *)
+let mk_cont_parser cont_parse = (); fun _state str ~max_pos ~pos ->
+  let len = max_pos - pos + 1 in
+  cont_parse ~pos ~len str
 
 (* Macro for generating parsers *)
 #define MK_PARSER( \
     if pos_len > str_len then invalid_arg (loc ^ ": pos + len > str_len"); \
     pos_len - 1 \
   \
-  let mk_cont name cont state = \
-    let ws_only = GET_PSTACK = [] && Buffer.length state.pbuf = 0 in \
+  let mk_cont_state name cont state ~cont_state = \
     let parse_fun = \
       let used_ref = ref false in \
       fun ~pos ~len str -> \
           cont state str ~max_pos ~pos \
         end \
     in \
-    Cont (ws_only, parse_fun) \
+    Cont (cont_state, parse_fun) \
+  \
+  let mk_cont name cont state = \
+    let cont_state = \
+      match GET_PSTACK = [], Buffer.length state.pbuf = 0 with \
+      | true, true -> Cont_state.Parsing_whitespace \
+      | false, true -> Cont_state.Parsing_list \
+      | _, false -> Cont_state.Parsing_atom \
+    in \
+    mk_cont_state name cont state ~cont_state \
   \
   let rec PARSE state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse" PARSE state \
                   bump_pos_cont state str ~max_pos ~pos PARSE) \
       | ' ' | '\009' | '\012' -> bump_pos_cont state str ~max_pos ~pos PARSE \
       | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \
-      | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \
+      | '\013' -> bump_pos_cont state str ~max_pos ~pos parse_nl \
       | ';' -> bump_pos_cont state str ~max_pos ~pos parse_comment \
       | '"' -> \
           REGISTER_POS1 \
           bump_pos_cont state str ~max_pos ~pos parse_quoted \
       | c -> \
           REGISTER_POS \
-          add_bump_pos state str ~max_pos ~pos c parse_atom \
+          let parse = \
+            match c with \
+            | '#' -> maybe_parse_comment \
+            | '|' -> maybe_parse_close_comment \
+            | _ -> parse_atom \
+          in \
+          add_bump_pos state str ~max_pos ~pos c parse \
   \
   and parse_nl state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_nl" parse_nl state \
     else \
-      let pos = if GET_CHAR = '\010' then pos + 1 else pos in \
-      PARSE state str ~max_pos ~pos \
+      let c = GET_CHAR in \
+      if c = '\010' then bump_line_cont state str ~max_pos ~pos PARSE \
+      else raise_unexpected_char (MK_PARSE_STATE state) "parse_nl" pos c \
   \
   and parse_comment state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_comment" parse_comment state \
     else \
       match GET_CHAR with \
       | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \
-      | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \
+      | '\013' -> bump_pos_cont state str ~max_pos ~pos parse_nl \
       | _ -> bump_pos_cont state str ~max_pos ~pos parse_comment \
   \
+  and maybe_parse_comment state str ~max_pos ~pos = \
+    if pos > max_pos then \
+      mk_cont "maybe_parse_comment" maybe_parse_comment state \
+    else \
+      match GET_CHAR with \
+      | ';' -> bump_pos_cont state str ~max_pos ~pos parse_sexp_comment \
+      | '|' -> bump_pos_cont state str ~max_pos ~pos parse_block_comment \
+      | _ -> parse_atom state str ~max_pos ~pos \
+  \
+  and maybe_parse_close_comment state str ~max_pos ~pos = \
+    if pos > max_pos then \
+      mk_cont "maybe_parse_close_comment" maybe_parse_close_comment state \
+    else \
+      if GET_CHAR <> '#' then parse_atom state str ~max_pos ~pos \
+      else \
+        let err_msg = "end of block comment without start" in \
+        raise_parse_error (MK_PARSE_STATE state) \
+          "maybe_parse_close_comment" pos err_msg \
+  \
+  and parse_sexp_comment state str ~max_pos ~pos = \
+    let pbuf_str = "" in \
+    ignore (MK_ATOM); \
+    let old_pstack = GET_PSTACK in \
+    let pstack = [] in \
+    SET_PSTACK; \
+    let rec loop parse state str ~max_pos ~pos = \
+      Buffer.clear state.pbuf; \
+      match parse state str ~max_pos ~pos with \
+      | Done (_sexp, { Parse_pos.buf_pos = pos; _ }) -> \
+          Buffer.clear state.pbuf; \
+          let pstack = old_pstack in \
+          SET_PSTACK; \
+          PARSE state str ~max_pos ~pos \
+      | Cont (_, cont_parse) -> \
+          let parse = mk_cont_parser cont_parse in \
+          mk_cont_state "parse_sexp_comment" (loop parse) state \
+            ~cont_state:Cont_state.Parsing_sexp_comment \
+    in \
+    loop PARSE state str ~max_pos ~pos \
+  \
+  and parse_block_comment ({ pbuf; _ } as state) str ~max_pos ~pos = \
+    let pbuf_str = "" in \
+    ignore (MK_ATOM); \
+    Buffer.clear pbuf; \
+    let rec loop depth state str ~max_pos ~pos = \
+      let rec parse_block_depth state str ~max_pos ~pos = \
+        if pos > max_pos then \
+          mk_cont "parse_block_depth" parse_block_depth state \
+        else \
+          match GET_CHAR with \
+          | '\010' -> bump_line_cont state str ~max_pos ~pos parse_block_depth \
+          | '"' -> \
+              let rec parse_block_quote parse state str ~max_pos ~pos = \
+                match parse state str ~max_pos ~pos with \
+                | Done (_sexp, { Parse_pos.buf_pos = pos; _ }) -> \
+                    Buffer.clear pbuf; \
+                    parse_block_depth state str ~max_pos ~pos \
+                | Cont (_, cont_parse) -> \
+                    let parse = mk_cont_parser cont_parse in \
+                    mk_cont_state "parse_block_quote" \
+                      (parse_block_quote parse) state \
+                      ~cont_state:Cont_state.Parsing_block_comment \
+              in \
+              bump_pos_cont state str ~max_pos ~pos \
+                (parse_block_quote parse_quoted) \
+          | '#' -> bump_pos_cont state str ~max_pos ~pos parse_open_block \
+          | '|' -> bump_pos_cont state str ~max_pos ~pos parse_close_block \
+          | _ -> bump_pos_cont state str ~max_pos ~pos parse_block_depth \
+      and parse_open_block state str ~max_pos ~pos = \
+        if pos > max_pos then \
+          mk_cont "parse_open_block" parse_open_block state \
+        else \
+          if GET_CHAR = '|' then \
+            bump_pos_cont state str ~max_pos ~pos (loop (depth + 1)) \
+          else parse_block_depth state str ~max_pos ~pos \
+      and parse_close_block state str ~max_pos ~pos = \
+        if pos > max_pos then \
+          mk_cont "parse_close_block" parse_close_block state \
+        else \
+          if GET_CHAR = '#' then \
+            let parse = if depth = 1 then PARSE else loop (depth - 1) in \
+            bump_pos_cont state str ~max_pos ~pos parse \
+          else parse_block_depth state str ~max_pos ~pos \
+      in \
+      parse_block_depth state str ~max_pos ~pos \
+    in \
+    loop 1 state str ~max_pos ~pos \
+  \
   and parse_atom state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_atom" parse_atom state \
     else \
       match GET_CHAR with \
       | ' ' | '\009' | '\012' -> \
           bump_found_atom bump_text_pos state str ~max_pos ~pos PARSE \
+      | '#' as c -> \
+          add_bump_pos state str ~max_pos ~pos c maybe_parse_bad_atom_hash \
+      | '|' as c -> \
+          add_bump_pos state str ~max_pos ~pos c maybe_parse_bad_atom_pipe \
       | '(' -> \
           let pbuf = state.pbuf in \
           let pbuf_str = Buffer.contents pbuf in \
                   bump_pos_cont state str ~max_pos ~pos PARSE) \
       | '\010' -> bump_found_atom bump_text_line state str ~max_pos ~pos PARSE \
       | '\013' -> \
-          bump_found_atom bump_text_line state str ~max_pos ~pos parse_nl \
+          bump_found_atom bump_text_pos state str ~max_pos ~pos parse_nl \
       | ';' -> \
           bump_found_atom bump_text_pos state str ~max_pos ~pos parse_comment \
       | '"' -> \
             bump_text_pos state str ~max_pos ~pos reg_parse_quoted \
       | c -> add_bump_pos state str ~max_pos ~pos c parse_atom \
   \
+  and maybe_parse_bad_atom_pipe state str ~max_pos ~pos = \
+    if pos > max_pos then \
+      mk_cont "maybe_parse_bad_atom_pipe" maybe_parse_bad_atom_pipe state \
+    else \
+      match GET_CHAR with \
+      | '#' -> \
+          let err_msg = "illegal end of block comment in unquoted atom" in \
+          raise_parse_error (MK_PARSE_STATE state) "maybe_parse_bad_atom_pipe" \
+            pos err_msg \
+      | _ -> parse_atom state str ~max_pos ~pos \
+  \
+  and maybe_parse_bad_atom_hash state str ~max_pos ~pos = \
+    if pos > max_pos then \
+      mk_cont "maybe_parse_bad_atom_hash" maybe_parse_bad_atom_hash state \
+    else \
+      match GET_CHAR with \
+      | '|' -> \
+          let err_msg = "illegal start of block comment in unquoted atom" in \
+          raise_parse_error (MK_PARSE_STATE state) "maybe_parse_bad_atom_hash" \
+            pos err_msg \
+      | _ -> parse_atom state str ~max_pos ~pos \
+  \
   and reg_parse_quoted state str ~max_pos ~pos = \
     REGISTER_POS \
     parse_quoted state str ~max_pos ~pos \
+  \
   and parse_quoted state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_quoted" parse_quoted state \
     else \
               bump_pos_cont state str ~max_pos ~pos PARSE) \
       | '\\' -> bump_pos_cont state str ~max_pos ~pos parse_escaped \
       | '\010' as c -> add_bump_line state str ~max_pos ~pos c parse_quoted \
-      | '\013' as c -> add_bump_line state str ~max_pos ~pos c parse_quoted_nl \
       | c -> add_bump_pos state str ~max_pos ~pos c parse_quoted \
   \
-  and parse_quoted_nl state str ~max_pos ~pos = \
-    if pos > max_pos then mk_cont "parse_quoted_nl" parse_quoted_nl state \
-    else \
-      let pos = \
-        let c = '\010' in \
-        if GET_CHAR = c then ( \
-          Buffer.add_char state.pbuf c; \
-          pos + 1 \
-        ) \
-        else pos \
-      in \
-      parse_quoted state str ~max_pos ~pos \
-  \
   and parse_escaped state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_escaped" parse_escaped state \
     else \
       match GET_CHAR with \
       | '\010' -> bump_line_cont state str ~max_pos ~pos parse_skip_ws \
-      | '\013' -> bump_line_cont state str ~max_pos ~pos parse_skip_ws_nl \
+      | '\013' -> bump_pos_cont state str ~max_pos ~pos parse_skip_ws_nl \
       | '0' .. '9' as c -> \
           bump_text_pos state; \
           let d = Char.code c - 48 in \
   and parse_skip_ws_nl state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_skip_ws_nl" parse_skip_ws_nl state \
     else \
-      let pos = if GET_CHAR = '\010' then pos + 1 else pos in \
-      parse_skip_ws state str ~max_pos ~pos \
+      if GET_CHAR = '\010' then \
+        bump_line_cont state str ~max_pos ~pos parse_skip_ws \
+      else begin \
+        Buffer.add_char state.pbuf '\013'; \
+        parse_quoted state str ~max_pos ~pos \
+      end \
   \
   and parse_dec state str ~max_pos ~pos ~count ~d = \
     if pos > max_pos then mk_cont "parse_dec" (parse_dec ~count ~d) state \
   parse_pos.Parse_pos.global_offset + pos - parse_pos.Parse_pos.buf_pos
 
 let mk_annot_pos
-      ({ Parse_pos.text_line = line; text_char = col } as parse_pos) pos =
+      ({ Parse_pos.text_line = line; text_char = col; _ } as parse_pos) pos =
   { Annot.line; col; offset = get_glob_ofs parse_pos pos }
 
 let mk_annot_pos1
-      ({ Parse_pos.text_line = line; text_char = col } as parse_pos) pos =
+      ({ Parse_pos.text_line = line; text_char = col; _ } as parse_pos) pos =
   { Annot.line; col = col + 1; offset = get_glob_ofs parse_pos pos }
 
-let add_annot_pos { parse_pos; pstack } pos =
+let add_annot_pos { parse_pos; pstack; _ } pos =
   pstack.Annot.positions <- mk_annot_pos parse_pos pos :: pstack.Annot.positions
 
-let add_annot_pos1 { parse_pos; pstack } pos =
+let add_annot_pos1 { parse_pos; pstack; _ } pos =
   pstack.Annot.positions <-
     mk_annot_pos1 parse_pos pos :: pstack.Annot.positions
 
-let get_annot_range { parse_pos; pstack } pos =
+let get_annot_range { parse_pos; pstack; _ } pos =
   let start_pos =
     match pstack.Annot.positions with
     | [] -> assert false  (* impossible *)
 let gen_input_rev_sexps my_parse ?parse_pos ?(buf = String.create 8192) ic =
   let rev_sexps_ref = ref [] in
   let buf_len = String.length buf in
-  let rec loop this_parse ~pos ~len ~is_incomplete =
+  let rec loop this_parse ~pos ~len ~cont_state =
     if len > 0 then
       match this_parse ~pos ~len buf with
-      | Done (sexp, ({ Parse_pos.buf_pos } as parse_pos)) ->
+      | Done (sexp, ({ Parse_pos.buf_pos; _ } as parse_pos)) ->
           rev_sexps_ref := sexp :: !rev_sexps_ref;
           let n_parsed = buf_pos - pos in
           let this_parse = mk_this_parse ~parse_pos my_parse in
+          let cont_state = Cont_state.Parsing_whitespace in
           if n_parsed = len then
             let new_len = input ic buf 0 buf_len in
-            loop this_parse ~pos:0 ~len:new_len ~is_incomplete:false
-          else
-            loop this_parse
-              ~pos:buf_pos ~len:(len - n_parsed) ~is_incomplete:false
-      | Cont (ws_only, this_parse) ->
-          loop this_parse
-            ~pos:0 ~len:(input ic buf 0 buf_len) ~is_incomplete:(not ws_only)
-    else if is_incomplete then
-      failwith
-        "Sexplib.Sexp.input_rev_sexps: reached EOF with incomplete S-expression"
-    else !rev_sexps_ref
+            loop this_parse ~pos:0 ~len:new_len ~cont_state
+          else loop this_parse ~pos:buf_pos ~len:(len - n_parsed) ~cont_state
+      | Cont (cont_state, this_parse) ->
+          loop this_parse ~pos:0 ~len:(input ic buf 0 buf_len) ~cont_state
+    else
+      if cont_state = Cont_state.Parsing_whitespace then !rev_sexps_ref
+      else
+        failwith (
+          "Sexplib.Sexp.input_rev_sexps: reached EOF while in state "
+          ^ Cont_state.to_string cont_state)
   in
   let len = input ic buf 0 buf_len in
   let this_parse = mk_this_parse ?parse_pos my_parse in
-  loop this_parse ~pos:0 ~len ~is_incomplete:false
+  loop this_parse ~pos:0 ~len ~cont_state:Cont_state.Parsing_whitespace
 
 let input_rev_sexps ?parse_pos ?buf ic =
   gen_input_rev_sexps parse ?parse_pos ?buf ic
 
 let of_string_bigstring loc this_parse ws_buf get_len get_sub str =
   match this_parse str with
-  | Done (_, { Parse_pos.buf_pos }) when buf_pos <> get_len str ->
+  | Done (_, { Parse_pos.buf_pos; _ }) when buf_pos <> get_len str ->
       let prefix_len = min (get_len str - buf_pos) 20 in
       let prefix = get_sub str buf_pos prefix_len in
       let msg =
       in
       failwith msg
   | Done (sexp, _) -> sexp
-  | Cont (ws_only, this_parse) ->
-      if ws_only then failwith (sprintf "Sexplib.Sexp.%s: whitespace only" loc);
+  | Cont (_, this_parse) ->
       (* When parsing atoms, the incremental parser cannot tell whether
-         it is at the end until it hits whitespace.  We therefore feed
-         it one space to determine whether it is finished. *)
+         it is at the end until it hits whitespace.  We therefore feed it
+         one space to determine whether it is finished. *)
       match this_parse ~pos:0 ~len:1 ws_buf with
       | Done (sexp, _) -> sexp
-      | Cont _ ->
+      | Cont (cont_state, _) ->
+          let cont_state_str = Cont_state.to_string cont_state in
           failwith (
-            sprintf "Sexplib.Sexp.%s: got incomplete S-expression: %s"
-              loc (get_sub str 0 (get_len str)))
+            sprintf
+              "Sexplib.Sexp.%s: incomplete S-expression while in state %s: %s"
+              loc cont_state_str (get_sub str 0 (get_len str)))
 
 let of_string str =
   of_string_bigstring "of_string" parse " " String.length String.sub str
 let gen_load_sexp my_parse ?(strict = true) ?(buf = String.create 8192) file =
   let buf_len = String.length buf in
   let ic = open_in file in
-  let rec loop this_parse =
+  let rec loop this_parse ~cont_state =
     let len = input ic buf 0 buf_len in
     if len = 0 then
-      failwith (sprintf "Sexplib.Sexp.gen_load_sexp: end of file: %s" file)
+      failwith (
+        sprintf "Sexplib.Sexp.gen_load_sexp: EOF in %s while in state %s"
+          file (Cont_state.to_string cont_state))
     else
       match this_parse ~pos:0 ~len buf with
-      | Done (sexp, ({ Parse_pos.buf_pos } as parse_pos))
-        when strict ->
+      | Done (sexp, ({ Parse_pos.buf_pos; _ } as parse_pos)) when strict ->
           let rec strict_loop this_parse ~pos ~len =
             match this_parse ~pos ~len buf with
-            | Done _ | Cont (false, _) ->
+            | Done _ ->
                 failwith (
                   sprintf
-                    "Sexplib.Sexp.gen_load_sexp: more than one S-expression: %s"
-                      file)
-            | Cont (true, this_parse) ->
+                    "Sexplib.Sexp.gen_load_sexp: \
+                     more than one S-expression in file %s"
+                    file)
+            | Cont (cont_state, this_parse) ->
                 let len = input ic buf 0 buf_len in
-                if len = 0 then sexp
-                else strict_loop this_parse ~pos:0 ~len
+                if len > 0 then strict_loop this_parse ~pos:0 ~len
+                else if cont_state = Cont_state.Parsing_whitespace then sexp
+                else
+                  failwith (
+                    sprintf
+                      "Sexplib.Sexp.gen_load_sexp: \
+                      additional incomplete data in state %s loading file %s"
+                      (Cont_state.to_string cont_state) file)
           in
           let this_parse = mk_this_parse ~parse_pos my_parse in
           strict_loop this_parse ~pos:buf_pos ~len:(len - buf_pos)
       | Done (sexp, _) -> sexp
-      | Cont (_, this_parse) -> loop this_parse
+      | Cont (cont_state, this_parse) -> loop this_parse ~cont_state
   in
   try
-    let sexp = loop (mk_this_parse my_parse) in
+    let sexp =
+      loop (mk_this_parse my_parse) ~cont_state:Cont_state.Parsing_whitespace
+    in
     close_in ic;
     sexp
   with exc -> close_in_noerr ic; raise exc
 
   let get_conv_exn ~file ~exc annot_sexp =
     let range = get_range annot_sexp in
-    let { start_pos = { line; col } } = range in
+    let { start_pos = { line; col; _ }; _ } = range in
     let loc = sprintf "%s:%d:%d" file line col in
     Of_sexp_error (Annot.Conv_exn (loc, exc), get_sexp annot_sexp)
 end

base/sexplib/lib/sexp_intf.ml

         [buf_pos] is set to [pos]. *)
   end
 
+  module Cont_state : sig
+    (** State of parser continuations *)
+    type t = Pre_sexp.Cont_state.t =
+      | Parsing_whitespace
+      | Parsing_atom
+      | Parsing_list
+      | Parsing_sexp_comment
+      | Parsing_block_comment
+
+    val to_string : t -> string
+    (** [to_string cont_state] converts state of parser continuation
+        [cont_state] to a string. *)
+  end
+
   (** Type of result from calling {!Sexp.parse}. *)
   type ('a, 't) parse_result = ('a, 't) Pre_sexp.parse_result =
     | Done of 't * Parse_pos.t  (** [Done (t, parse_pos)] finished parsing
                                     an S-expression.  Current parse position
                                     is [parse_pos]. *)
-    | Cont of bool * ('a, 't) parse_fun
-      (** [Cont (ws_only, parse_fun)] met the end of input before completely
+    | Cont of Cont_state.t * ('a, 't) parse_fun
+      (** [Cont (cont_state, parse_fun)] met the end of input before completely
           parsing an S-expression.  The user has to call [parse_fun] to
-          continue parsing the S-expression in another buffer.  If [ws_only]
-          is true, only whitespace has been parsed so far (or comments!).
+          continue parsing the S-expression in another buffer.  [cont_state]
+          is the current parsing state of the continuation.
           NOTE: the continuation may only be called once and will raise
           [Failure] otherwise! *)
 

base/sexplib/lib_test/conv_test.ml

 
 type labeled = string -> foo : unit -> ?bar : int -> float -> float with sexp
 
-let f str ~foo ?(bar = 3) n = float_of_string str +. n +. float bar
+let f str ~foo:_ ?(bar = 3) n = float_of_string str +. n +. float bar
 
 let labeled_sexp : Sexp.t = sexp_of_labeled f
 let labeled : labeled lazy_t = lazy (labeled_of_sexp (labeled_sexp : Sexp.t))

base/sexplib/lib_test/test.sexp

 
   "This string contains decimal \255, hex \xff codes, \
    and other \\ \n escapes."
+
+  A# # ## #x|
 )
+
+; Line comment
+
+#; (
+  S-expression comment
+)
+
+#| #| Nested |# block comment "|#" |#

base/sexplib/oasis.sh

             (S [A "-ppopt"; P "syntax/pa_sexp_conv.cma"]);
           flag ["compile"; "ocaml"; "use_pa_sexp_conv"]
             (S [A "-ppopt"; P "syntax/pa_sexp_conv.cma"]);
+          flag ["compile"; "ocaml"] (S [A "-w"; A "@Ae" ]);
           dispatch_default e
       | e -> dispatch_default e
   end

base/sexplib/syntax/pa_sexp_conv.ml

 open Camlp4
 open PreCast
 
+open Syntax
+
 module Gen = Pa_type_conv.Gen
 
 (* Utility functions *)
 
 (* Generator for converters of OCaml-values to S-expressions *)
 module Generate_sexp_of = struct
+  (* Handling of record defaults *)
+
+  type record_field_handler = [ `keep | `drop_default | `drop_if of Ast.expr ]
+
+  let record_field_handlers = Hashtbl.create 0
+
+  let get_record_field_handler loc =
+    try Hashtbl.find record_field_handlers loc
+    with Not_found -> `keep
+
+  let check_record_field_handler loc =
+    if Hashtbl.mem record_field_handlers loc then
+      Loc.raise loc (Failure "sexp record field handler defined twice")
+
+  let () =
+    Pa_type_conv.add_record_field_generator "sexp_drop_default" (fun loc ->
+      check_record_field_handler loc;
+      Hashtbl.replace record_field_handlers ~key:loc ~data:`drop_default)
+
+  let () =
+    Pa_type_conv.add_record_field_generator_with_arg "sexp_drop_if"
+      Syntax.expr (fun expr_opt loc ->
+        check_record_field_handler loc;
+        let test =
+          match expr_opt with
+          | Some expr -> expr
+          | None -> Loc.raise loc (Failure "could not parse expression")
+        in
+        Hashtbl.replace record_field_handlers ~key:loc ~data:(`drop_if test))
+
+  (* Make abstract calls *)
   let mk_abst_call loc tn rev_path =
     <:expr@loc<
       $id:Gen.ident_of_rev_path loc (("sexp_of_" ^ tn) :: rev_path)$
     let p = <:patt@loc< $lid:name$ = $lid:"v_" ^ name$ >> in
     <:patt@loc< $patt$; $p$ >>
 
-  let sexp_of_default_field patt expr name tp sexp_of empty =
+  let sexp_of_record_field patt expr name tp ?sexp_of test =
     let loc = Ast.loc_of_ctyp tp in
     let patt = mk_rec_patt loc patt name in
     let cnv_expr =
       | `Match matchings ->
           <:expr@loc< fun el -> match el with [ $matchings$ ] >>
     in
+    let cnv_expr =
+      match sexp_of with
+      | None -> cnv_expr
+      | Some sexp_of -> <:expr@loc< $sexp_of$ $cnv_expr$ >>
+    in
     let expr =
       let v_name = <:expr@loc< $lid: "v_" ^ name$ >> in
       <:expr@loc<
         let bnds =
-          if $v_name$ = $empty$ then bnds
+          if $test$ $v_name$ then bnds
           else
-            let arg = $sexp_of$ $cnv_expr$ $v_name$ in
+            let arg = $cnv_expr$ $v_name$ in
             let bnd =
               Sexplib.Sexp.List [Sexplib.Sexp.Atom $str:name$; arg]
             in
     in
     patt, expr
 
+  let sexp_of_default_field patt expr name tp ?sexp_of default =
+    let loc = Ast.loc_of_expr default in
+    sexp_of_record_field patt expr name tp ?sexp_of
+      <:expr@loc< (=) $default$ >>
+
   let sexp_of_record flds_ctyp =
     let flds = Ast.list_of_ctyp flds_ctyp [] in
     let rec coll (patt, expr) = function
           patt, expr
       | <:ctyp@loc< $lid:name$ : mutable sexp_list $tp$ >>
       | <:ctyp@loc< $lid:name$ : sexp_list $tp$ >> ->
-          sexp_of_default_field
-            patt expr name tp <:expr@loc< sexp_of_list >> <:expr@loc< [] >>
+          sexp_of_default_field patt expr name tp
+            ~sexp_of:<:expr@loc< sexp_of_list >> <:expr@loc< [] >>
       | <:ctyp@loc< $lid:name$ : mutable sexp_array $tp$ >>
       | <:ctyp@loc< $lid:name$ : sexp_array $tp$ >> ->
-          sexp_of_default_field
-            patt expr name tp <:expr@loc< sexp_of_array >> <:expr@loc< [||] >>
+          sexp_of_default_field patt expr name tp
+            ~sexp_of:<:expr@loc< sexp_of_array >> <:expr@loc< [||] >>
       | <:ctyp@loc< $lid:name$ : mutable $tp$ >>
       | <:ctyp@loc< $lid:name$ : $tp$ >> ->
-          let patt = mk_rec_patt loc patt name in
-          let vname = <:expr@loc< $lid:"v_" ^ name$ >> in
-          let cnv_expr = unroll_cnv_fp loc vname (sexp_of_type tp) in
-          let expr =
-            <:expr@loc<
-              let arg = $cnv_expr$ in
-              let bnd = Sexplib.Sexp.List [Sexplib.Sexp.Atom $str:name$; arg] in
-              let bnds = [ bnd :: bnds ] in
-              $expr$
-            >>
-          in
-          patt, expr
+          let opt_default = Pa_type_conv.Gen.find_record_default loc in
+          let field_handler = get_record_field_handler loc in
+          begin match opt_default, field_handler with
+          | None, `drop_default -> Loc.raise loc (Failure "no default to drop")
+          | _, `drop_if test -> sexp_of_record_field patt expr name tp test
+          | Some default, `drop_default ->
+              sexp_of_default_field patt expr name tp default
+          | _, `keep ->
+              let patt = mk_rec_patt loc patt name in
+              let vname = <:expr@loc< $lid:"v_" ^ name$ >> in
+              let cnv_expr = unroll_cnv_fp loc vname (sexp_of_type tp) in
+              let expr =
+                <:expr@loc<
+                  let arg = $cnv_expr$ in
+                  let bnd =
+                    Sexplib.Sexp.List [Sexplib.Sexp.Atom $str:name$; arg]
+                  in
+                  let bnds = [ bnd :: bnds ] in
+                  $expr$
+                >>
+              in
+              patt, expr
+          end
       | _ -> assert false  (* impossible *)
     in
     let loc = Ast.loc_of_ctyp flds_ctyp in
       let rec loop (res_tpls, bi_lst, good_patts as acc) = function
         | <:ctyp@loc< $lid:nm$ : $tp$ >> ->
             let fld = <:expr@loc< $lid:nm ^ "_field"$.val >> in
+            let mk_default loc =
+              bi_lst, <:patt@loc< $lid:nm ^ "_value"$ >> :: good_patts
+            in
             let new_bi_lst, new_good_patts =
               match tp with
               | <:ctyp@loc< sexp_bool >> | <:ctyp@loc< mutable sexp_bool >>
               | <:ctyp@loc< sexp_list $_$ >>
               | <:ctyp@loc< mutable sexp_list $_$ >>
               | <:ctyp@loc< sexp_array $_$ >>
-              | <:ctyp@loc< mutable sexp_array $_$ >> ->
-                  bi_lst, <:patt@loc< $lid:nm ^ "_value"$ >> :: good_patts
-              | _ ->
-                  let loc = Ast.loc_of_ctyp tp in
-                  has_nonopt_fields := true;
-                  (
-                    <:expr@loc<
-                      (Pervasives.(=) $fld$ None, $str:nm$) >> :: bi_lst,
-                    <:patt@loc< Some $lid:nm ^ "_value"$ >> :: good_patts
-                  )
+              | <:ctyp@loc< mutable sexp_array $_$ >> -> mk_default loc
+              | <:ctyp@loc< $_$ >> ->
+                  match Pa_type_conv.Gen.find_record_default loc with
+                  | Some _ -> mk_default loc
+                  | None ->
+                      has_nonopt_fields := true;
+                      (
+                        <:expr@loc<
+                          (Pervasives.(=) $fld$ None, $str:nm$) >> :: bi_lst,
+                        <:patt@loc< Some $lid:nm ^ "_value"$ >> :: good_patts
+                      )
             in
             (
               <:expr@loc< $fld$ >> :: res_tpls,
         let rec loop = function
           | <:ctyp@loc< $tp1$; $tp2$ >> ->
               <:rec_binding@loc< $loop tp1$; $loop tp2$ >>
+          | <:ctyp@loc< $lid:nm$ : mutable sexp_list $_$ >>
           | <:ctyp@loc< $lid:nm$ : sexp_list $_$ >> ->
               <:rec_binding@loc<
                 $lid:nm$ =
                   match $lid:nm ^ "_value"$ with
                   [ None -> [] | Some v -> v ]
               >>
+          | <:ctyp@loc< $lid:nm$ : mutable sexp_array $_$ >>
           | <:ctyp@loc< $lid:nm$ : sexp_array $_$ >> ->
               <:rec_binding@loc<
                 $lid:nm$ =
                   match $lid:nm ^ "_value"$ with
                   [ None -> [||] | Some v -> v ]
               >>
+          | <:ctyp@loc< $lid:nm$ : mutable $_$ >>
           | <:ctyp@loc< $lid:nm$ : $_$ >> ->
-              <:rec_binding@loc< $lid:nm$ = $lid:nm ^ "_value"$ >>
+              begin match Pa_type_conv.Gen.find_record_default loc with
+              | None -> <:rec_binding@loc< $lid:nm$ = $lid:nm ^ "_value"$ >>
+              | Some default ->
+                  <:rec_binding@loc<
+                    $lid:nm$ =
+                      match $lid:nm ^ "_value"$ with
+                      [ None -> $default$ | Some v -> v ]
+                  >>
+              end
           | _ -> assert false  (* impossible *)
         in
         <:expr@loc< { $loop flds$ } >>
               match field_name with
               [ $mc_no_args_fields$ ];
               iter tail }
-        | [sexp :: _] ->
+        | [((Sexplib.Sexp.Atom _ | Sexplib.Sexp.List _) as sexp) :: _] ->
             Sexplib.Conv_error.record_only_pairs_expected _tp_loc sexp
         | [] -> () ]
       in
 module Quotations = struct
   let of_sexp_quote loc _loc_name_opt cnt_str =
     Pa_type_conv.set_conv_path_if_not_set loc;
-    let ctyp = Gram.parse_string Syntax.ctyp_quot loc cnt_str in
+    let ctyp = Gram.parse_string ctyp_quot loc cnt_str in
     let fp = Generate_of_sexp.type_of_sexp ctyp in
     let body =
       match fp with
       >>
 
   let () =
-    Syntax.Quotation.add "of_sexp" Syntax.Quotation.DynAst.expr_tag
-      of_sexp_quote
+    Quotation.add "of_sexp" Quotation.DynAst.expr_tag of_sexp_quote
 
   let sexp_of_quote loc _loc_name_opt cnt_str =
     Pa_type_conv.set_conv_path_if_not_set loc;
-    let ctyp = Gram.parse_string Syntax.ctyp_quot loc cnt_str in
+    let ctyp = Gram.parse_string ctyp_quot loc cnt_str in
     Generate_sexp_of.mk_cnv_expr ctyp
 
-  let () =
-    Syntax.Quotation.add "sexp_of" Syntax.Quotation.DynAst.expr_tag
-      sexp_of_quote
+  let () = Quotation.add "sexp_of" Quotation.DynAst.expr_tag sexp_of_quote
 end
 
 (* Add "of_sexp" and "sexp_of" as "sexp" to the set of generators *)

base/sexplib/vim/syntax/sexplib.vim

+" Vim syntax file
+" Language:     S-expressions as used in Sexplib
+" Filenames:    *.sexp
+" Maintainers:  Markus Mottl      <markus.mottl@gmail.com>
+" URL:          http://www.ocaml.info/vim/syntax/sexplib.vim
+" Last Change:  2012 Apr 24 - Added support for new comment styles (MM)
+"               2009 Apr 02 - First release (MM)
+
+" For version 5.x: Clear all syntax items
+" For version 6.x: Quit when a syntax file was already loaded
+if version < 600
+  syntax clear
+elseif exists("b:current_syntax") && b:current_syntax == "sexplib"
+  finish
+endif
+
+" Sexplib is case sensitive.
+syn case match
+
+" Comments
+syn keyword  sexplibTodo contained TODO FIXME XXX NOTE
+syn region   sexplibBlockComment matchgroup=sexplibComment start="#|" matchgroup=sexplibComment end="|#" contains=ALLBUT,sexplibQuotedAtom,sexplibUnquotedAtom,sexplibEncl
+syn match    sexplibSexpComment "#;" skipwhite skipempty nextgroup=sexplibQuotedAtomComment,sexplibUnquotedAtomComment,sexplibListComment,sexplibComment
+syn region   sexplibQuotedAtomComment start=+"+ skip=+\\\\\|\\"+ end=+"+ contained
+syn match    sexplibUnquotedAtomComment /\([^;()" \t#|]\|#[^;()" \t|]\||[^;()" \t#]\)[^;()" \t]*/ contained
+syn region   sexplibListComment matchgroup=sexplibComment start="(" matchgroup=sexplibComment end=")" contained contains=ALLBUT,sexplibEncl,sexplibString,sexplibQuotedAtom,sexplibUnquotedAtom,sexplibTodo,sexplibNumber,sexplibFloat
+syn match    sexplibComment ";.*" contains=sexplibTodo
+
+" Atoms
+syn match    sexplibUnquotedAtom /\([^;()" \t#|]\|#[^;()" \t|]\||[^;()" \t#]\)[^;()" \t]*/
+syn region   sexplibQuotedAtom    start=+"+ skip=+\\\\\|\\"+ end=+"+
+syn match    sexplibNumber        "\<-\=\d\(_\|\d\)*[l|L|n]\?\>"
+syn match    sexplibNumber        "\<-\=0[x|X]\(\x\|_\)\+[l|L|n]\?\>"
+syn match    sexplibNumber        "\<-\=0[o|O]\(\o\|_\)\+[l|L|n]\?\>"
+syn match    sexplibNumber        "\<-\=0[b|B]\([01]\|_\)\+[l|L|n]\?\>"
+syn match    sexplibFloat         "\<-\=\d\(_\|\d\)*\.\?\(_\|\d\)*\([eE][-+]\=\d\(_\|\d\)*\)\=\>"
+
+" Lists
+syn region   sexplibEncl transparent matchgroup=sexplibEncl start="(" matchgroup=sexplibEncl end=")" contains=ALLBUT,sexplibParenErr
+
+" Errors
+syn match    sexplibUnquotedAtomErr /\([^;()" \t#|]\|#[^;()" \t|]\||[^;()" \t#]\)[^;()" \t]*\(#|\||#\)[^;()" \t]*/
+syn match    sexplibParenErr ")"
+
+" Synchronization
+syn sync minlines=50
+syn sync maxlines=500
+
+" Define the default highlighting.
+" For version 5.7 and earlier: only when not done already
+" For version 5.8 and later: only when an item doesn't have highlighting yet
+if version >= 508 || !exists("did_sexplib_syntax_inits")
+  if version < 508
+    let did_sexplib_syntax_inits = 1
+    command -nargs=+ HiLink hi link <args>
+  else
+    command -nargs=+ HiLink hi def link <args>
+  endif
+
+  HiLink sexplibParenErr            Error
+  HiLink sexplibUnquotedAtomErr     Error
+
+  HiLink sexplibComment             Comment
+  HiLink sexplibSexpComment         Comment
+  HiLink sexplibQuotedAtomComment   Include
+  HiLink sexplibUnquotedAtomComment Comment
+  HiLink sexplibBlockComment        Comment
+  HiLink sexplibListComment         Comment
+
+  HiLink sexplibBoolean             Boolean
+  HiLink sexplibCharacter           Character
+  HiLink sexplibNumber              Number
+  HiLink sexplibFloat               Float
+  HiLink sexplibUnquotedAtom        Identifier
+  HiLink sexplibEncl                Identifier
+  HiLink sexplibQuotedAtom          Keyword
+
+  HiLink sexplibTodo                Todo
+
+  HiLink sexplibEncl                Keyword
+
+  delcommand HiLink
+endif
+
+let b:current_syntax = "sexplib"
+
+" vim: ts=8

base/type_conv/lib/pa_type_conv.ml

 (* Map of "with"-generators for exceptions in signatures *)
 let sig_exn_generators = Hashtbl.create 0
 
-(* Check that there is no argument for generators that do not expect
-   arguments *)
-let no_arg id e typ arg =
+(* Map of "with"-generators for record fields *)
+type record_field_generator = Loc.t -> unit
+
+let record_field_generators = Hashtbl.create 0
+
+(* Check that there is no argument for generators that do not expect any *)
+let no_arg id e arg typ =
   if arg = None then e typ
   else
     failwith (
 let safe_add_gen gens id entry e =
   if Hashtbl.mem gens id then
     failwith ("Pa_type_conv: generator '" ^ id ^ "' defined multiple times")
-  else Hashtbl.add gens id (fun typ arg -> e typ (parse_with entry arg))
+  else Hashtbl.add gens id (fun arg typ -> e (parse_with entry arg) typ)
 
 (* Register a "with"-generator for types in structures *)
 let add_generator_with_arg ?(is_exn = false) id entry e =
 let add_generator ?is_exn id e =
   add_generator_with_arg ?is_exn id ignore_tokens (no_arg id e)
 
-(* Removes a "with"-generator for types in structures *)
+(* Remove a "with"-generator for types in structures *)
 let rm_generator ?(is_exn = false) id =
   let gens = if is_exn then exn_generators else generators in
   Hashtbl.remove gens id
 let add_sig_generator ?is_exn id e =
   add_sig_generator_with_arg ?is_exn id ignore_tokens (no_arg id e)
 
-(* Removes a "with"-generator for types in signatures *)
+(* Remove a "with"-generator for types in signatures *)
 let rm_sig_generator ?(is_exn = false) id =
   let gens = if is_exn then sig_exn_generators else sig_generators in
   Hashtbl.remove gens id
 
+(* Register a "with"-generator for record fields *)
+let add_record_field_generator_with_arg id entry e =
+  safe_add_gen record_field_generators id entry e
+
+let add_record_field_generator id e =
+  add_record_field_generator_with_arg id ignore_tokens (no_arg id e)
+
+(* Remove a "with"-generator for record fields *)
+let rm_record_field_generator id = Hashtbl.remove record_field_generators id
+
 
 (* General purpose code generation module *)
 
 module Gen = struct
+  (* Map of record field source locations to their default expression *)
+  let record_defaults = Hashtbl.create 0
+
+  let find_record_default loc =
+    try Some (Hashtbl.find record_defaults loc) with Not_found -> None
+
   let gensym =
     let cnt = ref 0 in
     fun ?(prefix = "_x") () ->
 
 (* Functions for interpreting derivation types *)
 
-let find_generator ~name haystack = (); fun tp (needle, arg) ->
+let find_generator ~name haystack = (); fun entry (needle, arg) ->
   let genf =
     try Hashtbl.find haystack needle
     with Not_found ->
       in
       failwith msg
   in
-  genf tp arg
+  genf arg entry
 
 let generate = find_generator ~name:"type" generators
 
   let coll drv der_sis = <:sig_item< $der_sis$; $sig_generate tp drv$ >> in
   List.fold_right coll drvs (SgNil _loc)
 
-let sig_exn_generate = find_generator ~name:"signature exception" sig_exn_generators
+let sig_exn_generate =
+  find_generator ~name:"signature exception" sig_exn_generators
 
 let gen_derived_exn_sigs _loc tp drvs =
   let coll drv der_sis = <:sig_item< $der_sis$; $sig_exn_generate tp drv$ >> in
   List.fold_right coll drvs (SgNil _loc)
 
+let remember_record_field_generators el drvs =
+  let act drv =
+    let gen = find_generator ~name:"record field" record_field_generators in
+    gen el drv
+  in
+  List.iter act drvs
+
 
 (* Syntax extension *)
 
 DELETE_RULE Gram str_item: "module"; a_UIDENT; module_binding0 END;
 
 EXTEND Gram
-  GLOBAL: str_item sig_item;
+  GLOBAL: str_item sig_item label_declaration;
 
   str_item:
     [[
         <:str_item< exception $tds$; $gen_derived_exn_defs _loc tds drvs$ >>
     ]];
 
+  str_item:
+    [[
+      "module"; i = found_module_name; mb = module_binding0 ->
+        pop_conv_path ();
+        <:str_item< module $i$ = $mb$ >>
+    ]];
+
   sig_item:
     [[
       "type"; tds = type_declaration; "with"; drvs = LIST1 generator SEP "," ->
     ]];
 
   sig_item:
-   [[
-     "exception"; cd = constructor_declaration; "with";
-     drvs = LIST1 generator SEP "," ->
-       set_conv_path_if_not_set _loc;
-       <:sig_item< exception $cd$; $gen_derived_exn_sigs _loc cd drvs$ >>
+    [[
+      "exception"; cd = constructor_declaration; "with";
+      drvs = LIST1 generator SEP "," ->
+        set_conv_path_if_not_set _loc;
+        <:sig_item< exception $cd$; $gen_derived_exn_sigs _loc cd drvs$ >>
     ]];
 
-  str_item:
+  label_declaration:
     [[
-      "module"; i = found_module_name; mb = module_binding0 ->
-        pop_conv_path ();
-        <:str_item< module $i$ = $mb$ >>
+      name = a_LIDENT; ":"; tp = poly_type;
+      "with"; drvs = LIST1 generator SEP "," ->
+        remember_record_field_generators _loc drvs;
+        <:ctyp< $lid:name$ : $tp$ >>
+    | "mutable"; name = a_LIDENT; ":"; tp = poly_type;
+      "with"; drvs = LIST1 generator SEP "," ->
+        remember_record_field_generators _loc drvs;
+        <:ctyp< $lid:name$ : mutable $tp$ >>
     ]];
+END
 
-END
+(* Record field defaults *)
+
+(* Add "default" to set of record field generators *)
+let () =
+  add_record_field_generator_with_arg "default" Syntax.expr
+    (fun expr_opt loc ->
+      let default =
+        match expr_opt with
+        | Some expr -> expr
+        | None -> Loc.raise loc (Failure "could not parse default expression")
+      in
+      Hashtbl.replace Gen.record_defaults loc default)

base/type_conv/lib/pa_type_conv.mli

 
 (** {6 Generator registration} *)
 
+val set_conv_path_if_not_set : Loc.t -> unit
+(** [set_conv_path_if_not_set loc] sets the path to the file/module being
+    converted for improved error messages. *)
+
+val get_conv_path : unit -> string
+(** [get_conv_path ()] @return the name to module containing a type
+    as required for error messages. *)
+
 val add_generator : ?is_exn : bool -> string -> (ctyp -> str_item) -> unit
 (** [add_generator ?is_exn name gen] adds the code generator [gen],
     which maps type or exception declarations to structure items, where
 
 val add_generator_with_arg :
   ?is_exn : bool -> string -> 'a Camlp4.PreCast.Gram.Entry.t ->
-  (ctyp -> 'a option -> str_item) -> unit
+  ('a option -> ctyp -> str_item) -> unit
 (** [add_generator_with_arg ?is_exn name entry generator] same as
     [add_generator], but the generator may accept an argument, which is
     parsed with [entry]. *)
 
 val rm_generator : ?is_exn : bool -> string -> unit
-(** [rm_generator ?is_exn name] removes the code generator named [name]. *)
-
-val add_sig_generator :
-  ?is_exn : bool -> string -> (ctyp -> sig_item) -> unit
-(** [add_generator ?is_exn name gen] adds the code generator [gen],
-    which maps type or exception declarations to signature items, where
-    [is_exn] specifies whether the declaration is an exception.  Note that
-    the original type/exception declarations get added automatically in
-    any case.
+(** [rm_generator ?is_exn name] removes the code generator named [name]
+    for types if [is_exn] is [false], or exceptions otherwise.
 
     @param is_exn = [false]
 *)
 
-val set_conv_path_if_not_set : Loc.t -> unit
+val add_sig_generator :
+  ?is_exn : bool -> string -> (ctyp -> sig_item) -> unit
+(** [add_sig_generator ?is_exn name gen] adds the code generator [gen],
+    which maps type or exception declarations to signature items, where
+    [is_exn] specifies whether the declaration is an exception.  Note that the
+    original type/exception declarations get added automatically in any case.
+
+    @param is_exn = [false]
+*)
 
 val add_sig_generator_with_arg :
   ?is_exn : bool -> string -> 'a Camlp4.PreCast.Gram.Entry.t ->
-  (ctyp -> 'a option -> sig_item) -> unit
+  ('a option -> ctyp -> sig_item) -> unit
 (** [add_sig_generator_with_arg ?is_exn name entry generator] same as
     [add_sig_generator], but the generator may accept an argument,
     which is parsed with [entry]. *)
 
 val rm_sig_generator : ?is_exn : bool -> string -> unit
-(** [rm_sig_generator name] removes the code signature generator named
-    [name]. *)
+(** [rm_sig_generator ?is_exn name] removes the signature code generator named
+    [name] for types if [is_exn] is [false], or exceptions otherwise.
 
-val get_conv_path : unit -> string
-(** [get_conv_path ()] @return the name to module containing a type
-    as required for error messages. *)
+    @param is_exn = [false]
+*)
+
+(** Type of record field code generators *)
+type record_field_generator = Loc.t -> unit
+
+val add_record_field_generator : string -> record_field_generator -> unit
+(** [add_record_field_generator gen_name gen] adds the record field code
+    generator [gen] with name [gen_name], which acts on the location
+    identifiying the record field. *)
+
+val add_record_field_generator_with_arg :
+  string -> 'a Camlp4.PreCast.Gram.Entry.t ->
+  ('a option -> record_field_generator) -> unit
+(** [add_record_field_generator_with_arg name entry generator] same as
+    [add_record_field_generator], but the [generator] takes an argument,
+    which is parsed with [entry].  If [None] is passed to the generator,
+    parsing of the argument failed, otherwise [Some arg] will be passed,
+    where [arg] is the successfully parsed argument. *)
+
+val rm_record_field_generator : string -> unit
+(** [rm_record_field_generator name] removes the record field code generator
+    named [name]. *)
 
 
 (** {6 Utility functions} *)
   val drop_variance_annotations : ctyp -> ctyp
   (** [drop_variance_annotations tp] @return the type resulting from dropping
       all variance annotations in [tp]. *)
+
+  val find_record_default : Loc.t -> expr option
+  (** [find_record_default loc] @return the optional default expression
+      associated with the record field at source location [loc] if defined. *)
 end

base/type_conv/oasis.sh

 <lib/pa_type_conv.ml>: syntax_camlp4o
 EOF
 
+make_myocamlbuild $HERE/myocamlbuild.ml <<EOF
+Ocamlbuild_plugin.dispatch
+  begin
+    function
+      | After_rules as e ->
+          flag ["compile"; "ocaml"] (S [A "-w"; A "@Ae" ]);
+          dispatch_default e
+      | e -> dispatch_default e
+  end
+;;
+EOF
+
 cd $HERE
 oasis setup

base/type_conv/syntax/META

-# OASIS_START
-# DO NOT EDIT (digest: f1c75f691f33f8ee2a4b69f96a169153)
-version = "3.0.5"
-description = "Syntax extension for type_conv"
-requires = "camlp4"
-archive(syntax, preprocessor) = "pa_type_conv.cma"
-archive(syntax, toploop) = "pa_type_conv.cma"
-exists_if = "pa_type_conv.cma"
-# OASIS_STOP
-

base/type_conv/syntax/pa_type_conv.ml

-(* Pa_type_conv: Preprocessing Module for Registering Type Conversions *)
-
-open Printf
-open Lexing
-
-open Camlp4
-open PreCast
-open Ast
-
-(* Utility functions *)
-
-let both fa fb (a, b) = fa a, fb b
-
-let get_loc_err loc msg =
-  sprintf "File \"%s\", line %d, characters %d-%d: %s"
-    (Loc.file_name loc) (Loc.start_line loc)
-    (Loc.start_off loc - Loc.start_bol loc)
-    (Loc.stop_off loc - Loc.stop_bol loc)
-    msg
-
-(* To be deleted once the OCaml team fixes Mantis issue #4751. *)
-let hash_variant str =
-  let acc_ref = ref 0 in
-  for i = 0 to String.length str - 1 do