Commits

Markus Mottl committed 73428a0

Updated newline handling in S-expressions to conform with OCaml 4.0

Comments (0)

Files changed (2)

base/sexplib/lib/lexer.mll

     | 't' -> '\009'
     | c -> c
 
-  let dos_newline = "\013\010"
+  let lf = '\010'
 
   let dec_code c1 c2 c3 =
     100 * (Char.code c1 - 48) + 10 * (Char.code c2 - 48) + (Char.code c3 - 48)
     failwith msg
 }
 
+let lf = '\010'
 let lf_cr = ['\010' '\013']
 let dos_newline = "\013\010"
-let newline = lf_cr | dos_newline
 let blank = [' ' '\009' '\012']
 let unquoted = [^ ';' '(' ')' '"'] # blank # lf_cr
 let digit = ['0'-'9']
   unquoted # ['#' '|'] | '#' unquoted # ['|'] | '|' unquoted # ['#']
 
 rule main buf = parse
-  | newline { found_newline lexbuf 0; main buf lexbuf }
+  | lf | dos_newline { found_newline lexbuf 0; main buf lexbuf }
   | blank+ | ';' (_ # lf_cr)* { main buf lexbuf }
   | '(' { LPAREN }
   | ')' { RPAREN }
 
 and scan_string buf start = parse
   | '"' { () }
-  | '\\' lf_cr [' ' '\t']*
+  | '\\' lf [' ' '\t']*
       {
         found_newline lexbuf (lexeme_len lexbuf - 2);
         scan_string buf start lexbuf
         Buffer.add_char buf c;
         scan_string buf start lexbuf
       }
-  | lf_cr as c
+  | lf
       {
         found_newline lexbuf 0;
-        Buffer.add_char buf c;
+        Buffer.add_char buf lf;
         scan_string buf start lexbuf
       }
-  | dos_newline
-      {
-        found_newline lexbuf 0;
-        Buffer.add_string buf dos_newline;
-        scan_string buf start lexbuf
-      }
-  | ([^ '\\' '"'] # lf_cr)+
+  | ([^ '\\' '"'] # lf)+
       {
         let ofs = lexeme_start lexbuf in
         let len = lexeme_end lexbuf - ofs in
       }
 
 and scan_block_comment buf locs = parse
-  | ('#'* | '|'*) newline
-      {
-        found_newline lexbuf 0;
-        scan_block_comment buf locs lexbuf;
-      }
-  | (('#'* | '|'*) [^ '"' '#' '|'] # lf_cr)+
-      { scan_block_comment buf locs lexbuf }
+  | ('#'* | '|'*) lf
+      { found_newline lexbuf 0; scan_block_comment buf locs lexbuf }
+  | (('#'* | '|'*) [^ '"' '#' '|'] # lf)+ { scan_block_comment buf locs lexbuf }
   | ('#'* | '|'*) '"'
       {
         let cur = lexeme_end_p lexbuf in

base/sexplib/lib/pre_sexp.ml

                   bump_pos_cont state str ~max_pos ~pos PARSE) \
       | ' ' | '\009' | '\012' -> bump_pos_cont state str ~max_pos ~pos PARSE \
       | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \
-      | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \
+      | '\013' -> bump_pos_cont state str ~max_pos ~pos parse_nl \
       | ';' -> bump_pos_cont state str ~max_pos ~pos parse_comment \
       | '"' -> \
           REGISTER_POS1 \
   and parse_nl state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_nl" parse_nl state \
     else \
-      let pos = if GET_CHAR = '\010' then pos + 1 else pos in \
-      PARSE state str ~max_pos ~pos \
+      let c = GET_CHAR in \
+      if c = '\010' then bump_line_cont state str ~max_pos ~pos PARSE \
+      else raise_unexpected_char (MK_PARSE_STATE state) "parse_nl" pos c \
   \
   and parse_comment state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_comment" parse_comment state \
     else \
       match GET_CHAR with \
       | '\010' -> bump_line_cont state str ~max_pos ~pos PARSE \
-      | '\013' -> bump_line_cont state str ~max_pos ~pos parse_nl \
+      | '\013' -> bump_pos_cont state str ~max_pos ~pos parse_nl \
       | _ -> bump_pos_cont state str ~max_pos ~pos parse_comment \
   \
   and maybe_parse_comment state str ~max_pos ~pos = \
         else \
           match GET_CHAR with \
           | '\010' -> bump_line_cont state str ~max_pos ~pos parse_block_depth \
-          | '\013' -> \
-              bump_line_cont state str ~max_pos ~pos parse_block_depth_nl \
           | '"' -> \
               let rec parse_block_quote parse state str ~max_pos ~pos = \
                 match parse state str ~max_pos ~pos with \
           | '#' -> bump_pos_cont state str ~max_pos ~pos parse_open_block \
           | '|' -> bump_pos_cont state str ~max_pos ~pos parse_close_block \
           | _ -> bump_pos_cont state str ~max_pos ~pos parse_block_depth \
-      and parse_block_depth_nl state str ~max_pos ~pos = \
-        if pos > max_pos then \
-          mk_cont "parse_block_depth_nl" parse_block_depth_nl state \
-        else \
-          let pos = if GET_CHAR = '\010' then pos + 1 else pos in \
-          parse_block_depth state str ~max_pos ~pos \
       and parse_open_block state str ~max_pos ~pos = \
         if pos > max_pos then \
           mk_cont "parse_open_block" parse_open_block state \
                   bump_pos_cont state str ~max_pos ~pos PARSE) \
       | '\010' -> bump_found_atom bump_text_line state str ~max_pos ~pos PARSE \
       | '\013' -> \
-          bump_found_atom bump_text_line state str ~max_pos ~pos parse_nl \
+          bump_found_atom bump_text_pos state str ~max_pos ~pos parse_nl \
       | ';' -> \
           bump_found_atom bump_text_pos state str ~max_pos ~pos parse_comment \
       | '"' -> \
               bump_pos_cont state str ~max_pos ~pos PARSE) \
       | '\\' -> bump_pos_cont state str ~max_pos ~pos parse_escaped \
       | '\010' as c -> add_bump_line state str ~max_pos ~pos c parse_quoted \
-      | '\013' as c -> add_bump_line state str ~max_pos ~pos c parse_quoted_nl \
       | c -> add_bump_pos state str ~max_pos ~pos c parse_quoted \
   \
-  and parse_quoted_nl state str ~max_pos ~pos = \
-    if pos > max_pos then mk_cont "parse_quoted_nl" parse_quoted_nl state \
-    else \
-      let pos = \
-        let c = '\010' in \
-        if GET_CHAR = c then ( \
-          Buffer.add_char state.pbuf c; \
-          pos + 1 \
-        ) \
-        else pos \
-      in \
-      parse_quoted state str ~max_pos ~pos \
-  \
   and parse_escaped state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_escaped" parse_escaped state \
     else \
       match GET_CHAR with \
       | '\010' -> bump_line_cont state str ~max_pos ~pos parse_skip_ws \
-      | '\013' -> bump_line_cont state str ~max_pos ~pos parse_skip_ws_nl \
+      | '\013' -> bump_pos_cont state str ~max_pos ~pos parse_skip_ws_nl \
       | '0' .. '9' as c -> \
           bump_text_pos state; \
           let d = Char.code c - 48 in \
   and parse_skip_ws_nl state str ~max_pos ~pos = \
     if pos > max_pos then mk_cont "parse_skip_ws_nl" parse_skip_ws_nl state \
     else \
-      let pos = if GET_CHAR = '\010' then pos + 1 else pos in \
-      parse_skip_ws state str ~max_pos ~pos \
+      if GET_CHAR = '\010' then \
+        bump_line_cont state str ~max_pos ~pos parse_skip_ws \
+      else begin \
+        Buffer.add_char state.pbuf '\013'; \
+        parse_quoted state str ~max_pos ~pos \
+      end \
   \
   and parse_dec state str ~max_pos ~pos ~count ~d = \
     if pos > max_pos then mk_cont "parse_dec" (parse_dec ~count ~d) state \
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.