Commits

Peter Szilagyi  committed b91a972

ocaml/contrib/ocp-indent: git pull git://github.com/OCamlPro/ocp-indent master

  • Participants
  • Parent commits 6596fb9

Comments (0)

Files changed (80)

 
 bin/ocp-build
 bin/ocp-indent
+bin/ocp-indent.byte
 elisp/contrib/auctex/Makefile
 elisp/contrib/auctex/auctex.el
 elisp/contrib/auctex/auto-loads.el
 elisp/contrib/auctex/tex-site.el
 elisp/contrib/tex-site.el
 elisp/omake/scratch.el
+lib/ocp-indent
 ocaml/contrib/ocaml_inotify/inotify_test
 ocaml/contrib/ocp-indent/.git
 ocaml/contrib/ocp-indent/Makefile.config
 ocaml/omake/OMakefile
 ocp-indent/ocp.ml
 ocp-indent/tuareg.ml
-share/typerex/ocp-indent/ocp-indent.el
-share/typerex/ocp-indent/ocp-indent.vim
+share/typerex/ocp-indent

File ocaml/contrib/ocp-indent/.gitignore

 config.log
 config.status
 ocp-indent
-src/globals.ml
+src/indentVersion.ml
 *~
 version.ocp
 autom4te.cache

File ocaml/contrib/ocp-indent/CHANGELOG

+## 1.0.1
+* Indentation of comments now follows ocamldoc conventions properly
+* Partial indent adapts more closely to manual indentation
+* Various small fixes (indent on empty lines, freeform comments...)

File ocaml/contrib/ocp-indent/Makefile

 $(native) native asm: ocp-build.root ALWAYS
 	ocp-build
 
+bootstrap: ocp-indent
+	./ocp-indent -c match_clause=4 --inplace src/*.mli src/*.ml
 
 sanitize:
 	ocp-build -sanitize
 
 .PHONY: install
 install: ocp-indent
-	cp -f ocp-indent $(prefix)/bin/
+	@if ocp-build -installed | grep -q ocp-indent-bundle; then \
+	  ocp-build -uninstall ocp-indent-bundle; \
+	fi
+	ocp-build -install -install-bundle ocp-indent-bundle -install-lib $(prefix)/lib/ocp-indent -install-bin $(prefix)/bin
 	mkdir -p $(prefix)/share/typerex/ocp-indent/
 	cp -f tools/ocp-indent.el $(prefix)/share/typerex/ocp-indent/
 	cp -f tools/ocp-indent.vim $(prefix)/share/typerex/ocp-indent/
 	@echo
+	@echo
 	@echo "=== ocp-indent installed ==="
+	@echo
 	@echo "To setup tuareg-mode to use ocp-indent, please add the following"
 	@echo "line to your .emacs :"
 	@echo
 
 .PHONY: uninstall
 uninstall:
-	rm -f $(prefix)/bin/ocp-indent
 	rm -rf $(prefix)/share/typerex/ocp-indent
+	ocp-build -uninstall ocp-indent-bundle
 
 .PHONY: test
 test: ocp-indent
 	aclocal -I m4
 	autoconf
 
-ocp-build.root:
+version.ocp: configure.ac
+	@echo "version.ocp not up-to-date, please rerun ./configure"
+	@exit 1
+
+ocp-build.root: version.ocp
 	@if (ocp-build -version 2>/dev/null |\
 	     awk -F'.' '{ exit $$1 > 1 || ($$1 = 1 && $$2 >= 99) }'); then \
 	  echo "Error: you need ocp-build >= 1.99." >&2;\

File ocaml/contrib/ocp-indent/README.md

 
 License: GPL v3.0
 
-## Compilation
+## Installation
 
-You need OCaml and ocp-build installed to compile ocp-indent.
+### Using OPAM
+
+The simplest way to install `ocp-indent` is using [OPAM](http://opam.ocamlpro.com):
+
+```bash
+opam install ocp-indent
+```
+
+### From sources
+
+You can also install `ocp-indent` from sources. You'll need `ocaml (>= 3.12.1)`
+and `ocp-build (>= 1.99.3-beta)`:
 
 ```bash
 ./configure
 ```
 
 If you use opam and want it installed alongside ocaml, you may want to use
-`./configure --prefix $(opam config -var prefix)`.
+`./configure --prefix $(opam config var prefix)`.
 
 ## Usage
 
-* `ocp-indent foo.ml` or `ocp-indent <foo.ml` outputs the file re-indented
-* From emacs, load `emacs.el` to have tuareg-mode automatically use
-  ocp-indent. _Warning_: it is in a very early stage, intended for testing and
-  debug only.
-* From emacs, you can also do a quick test with `C-u M-| ocp-indent`
+The above installation step copies elisp and vim scripts to
+`<prefix>/share/typerex/ocp-indent/`. You then need to load them in
+the editor of your choice to automatically use ocp-indent.
 
-These options should be mostly useful for binding in editors:
-* `ocp-indent --lines <m>-<n>` reindents only from line `<m>` to line `<n>`
-* `ocp-indent --numeric` only outputs the indentation values as integers, for
-  the lines that should be reindented.
+### Emacs
+
+Run the following command to setup tuareg-mode to use `ocp-indent`: 
+
+```bash
+echo '(load-file "'"$(opam config var prefix)"'/share/typerex/ocp-indent/ocp-indent.el")' >>~/.emacs
+```
+
+### Vim
+
+Use the following command to tell Vim to use `ocp-indent`:
+
+```bash
+echo 'autocmd FileType ocaml source '"$(opam config var prefix)"'/share/typerex/ocp-indent/ocp-indent.vim' >>~/.vimrc
+```
+
+### Other editors
+
+As `ocp-indent` is a command-line tool, you can easily integrate it with other editors.
+
+```bash
+ocp-indent <src-file> > <dst-file>
+```
+
+You can also tell it to indent only a subsets of lines, and to output only the indentation level:
+
+```bash
+ocp-indent <src-file> --lines <l1>-<l2> --numeric
+```
+
+## How does it compare to tuareg ?
+
+We've run some benchmarks on real code-bases and the result is quite
+conclusive. Keep in mind than most of existing source files are
+either indented manually or following tuareg standards. You can
+see the results [here](http://htmlpreview.github.com/?https://github.com/AltGr/ocp-indent-tests/blob/master/status.html).
+
+Moreover, as `ocp-indent` has a deep understanding of the OCaml syntax
+it shines on specific cases. See for instance the collection of
+unit-tests
+[here](https://github.com/OCamlPro/ocp-indent/tree/master/tests/passing). The
+currently failing tests can be seen
+[here](http://htmlpreview.github.com/?https://github.com/OCamlPro/ocp-indent/blob/master/tests/failing.html).
+
 
 ## Testing
 
+It's hard to deliver a great indenter without tests. We've built
+`ocp-indent` based on a growing collection of unit-tests. If you find an
+indentation bug, feel free to send us a code snippet that we will
+incorporate into our test suite.
+
+The tests are organized as follows:
+
 * `tests/passing` contains tests that are properly indented and should be left
   unchanged by ocp-indent.
 * `tests/failing` contains tests for which ocp-indent currently returns the
   results in `tests/failing-output`, hence `meld tests/failing{,-output}` should
-  give an overview of currently known bugs.
+  give an overview of currently known bugs (also available online
+  [here](http://htmlpreview.github.com/?https://github.com/OCamlPro/ocp-indent/blob/master/tests/failing.html)).
 * `tests/test.sh` checks the current state against the reference state (checked
   into git).
 * `tests/test.sh --[git-]update` updates the current reference state.
 
 ## Configuration options
 
-A proper configuration engine is being written. For the time being you can use a
-few environment variables defined at the beginning of `src/block.ml`: of most
-use should be `match_clause_indent` (usually 2 or 4, default is 2) and
-`type_indent`.
+By default, `ocp-indent` comes with sensible default options. However,
+you can customize some of the indentation options using command-line
+options. For more details, see:
 
-## Testing ocp-indent with emacs/vim
+```bash
+ocp-indent --config help
+```
 
-`make install` will copy elisp and vim scripts to
-`<prefix>/share/typerex/ocp-indent/`. Just load them in the editor of your
-choice to automatically use ocp-indent (obviously, replacing `<prefix>`):
+There is no built-in support for in-file configuration directives. Yet, some
+editors already provide that features, and with emacs, starting your file with a
+line like:
 
-Emacs:
 ```
-echo '(load-file "<prefix>/share/typerex/ocp-indent/ocp-indent.el")' >>~/.emacs
+(* -*- ocp-indent-config: in=2 -*- *)
 ```
-will setup tuareg-mode to use ocp-indent.
 
-Vim:
-```
-echo 'autocmd FileType ocaml source <prefix>/share/typerex/ocp-indent/ocp-indent.vim' >>~/.vimrc
-```
+will enable you to have the indentation after `in` setup to 2 locally on this
+file.

File ocaml/contrib/ocp-indent/configure

 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for ocp-indent 0.6.2.
+# Generated by GNU Autoconf 2.69 for ocp-indent 1.0.1.
 #
 #
 # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
 # This configure script is free software; the Free Software Foundation
 # gives unlimited permission to copy, distribute and modify it.
 #
-# Copyright 2012 OcamlPro SAS
+# Copyright 2013 OcamlPro SAS
 ## -------------------- ##
 ## M4sh Initialization. ##
 ## -------------------- ##
 # Identity of this package.
 PACKAGE_NAME='ocp-indent'
 PACKAGE_TARNAME='ocp-indent'
-PACKAGE_VERSION='0.6.2'
-PACKAGE_STRING='ocp-indent 0.6.2'
+PACKAGE_VERSION='1.0.1'
+PACKAGE_STRING='ocp-indent 1.0.1'
 PACKAGE_BUGREPORT=''
 PACKAGE_URL=''
 
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures ocp-indent 0.6.2 to adapt to many kinds of systems.
+\`configure' configures ocp-indent 1.0.1 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of ocp-indent 0.6.2:";;
+     short | recursive ) echo "Configuration of ocp-indent 1.0.1:";;
    esac
   cat <<\_ACEOF
 
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-ocp-indent configure 0.6.2
+ocp-indent configure 1.0.1
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
 This configure script is free software; the Free Software Foundation
 gives unlimited permission to copy, distribute and modify it.
 
-Copyright 2012 OcamlPro SAS
+Copyright 2013 OcamlPro SAS
 _ACEOF
   exit
 fi
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by ocp-indent $as_me 0.6.2, which was
+It was created by ocp-indent $as_me 1.0.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by ocp-indent $as_me 0.6.2, which was
+This file was extended by ocp-indent $as_me 1.0.1, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-ocp-indent config.status 0.6.2
+ocp-indent config.status 1.0.1
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 

File ocaml/contrib/ocp-indent/configure.ac

-AC_INIT(ocp-indent,0.6.2)
-AC_COPYRIGHT(Copyright 2012 OcamlPro SAS)
+AC_INIT(ocp-indent,1.0.1)
+AC_COPYRIGHT(Copyright 2013 OcamlPro SAS)
 
 AC_CONFIG_MACRO_DIR([m4])
 AC_PROG_CC

File ocaml/contrib/ocp-indent/src/approx_common.mli

 (**************************************************************************)
 
 (* Instead of raising an error when a CHAR, INT, INT32, INT64 or NATIVEINT
-overflows, we just changed the returned value to take that into account. *)
+   overflows, we just changed the returned value to take that into account. *)
 type 'a overflow =
   | InRange of 'a
   | Overflow of string

File ocaml/contrib/ocp-indent/src/approx_lexer.mll

 
 (* The table of keywords *)
 
-let create_hashtable n list =
-  let t = Hashtbl.create n in
-  List.iter (fun (x,y) -> Hashtbl.add t x y) list;
+let keywords = [
+  "and", AND;
+  "as", AS;
+  "assert", ASSERT;
+  "begin", BEGIN;
+  "class", CLASS;
+  "constraint", CONSTRAINT;
+  "do", DO;
+  "done", DONE;
+  "downto", DOWNTO;
+  "else", ELSE;
+  "end", END;
+  "exception", EXCEPTION;
+  "external", EXTERNAL;
+  "false", FALSE;
+  "for", FOR;
+  "fun", FUN;
+  "function", FUNCTION;
+  "functor", FUNCTOR;
+  "if", IF;
+  "in", IN;
+  "include", INCLUDE;
+  "inherit", INHERIT;
+  "initializer", INITIALIZER;
+  "lazy", LAZY;
+  "let", LET;
+  "match", MATCH;
+  "method", METHOD;
+  "module", MODULE;
+  "mutable", MUTABLE;
+  "new", NEW;
+  "object", OBJECT;
+  "of", OF;
+  "open", OPEN;
+  "or", OR;
+  "private", PRIVATE;
+  "rec", REC;
+  "sig", SIG;
+  "struct", STRUCT;
+  "then", THEN;
+  "to", TO;
+  "true", TRUE;
+  "try", TRY;
+  "type", TYPE;
+  "val", VAL;
+  "virtual", VIRTUAL;
+  "when", WHEN;
+  "while", WHILE;
+  "with", WITH;
+
+  "mod", INFIXOP3("mod");
+  "land", INFIXOP3("land");
+  "lor", INFIXOP3("lor");
+  "lxor", INFIXOP3("lxor");
+  "lsl", INFIXOP4("lsl");
+  "lsr", INFIXOP4("lsr");
+  "asr", INFIXOP4("asr");
+]
+
+(* extensions *)
+let syntax_extensions = [
+  "lwt", [
+    "for_lwt", FOR;
+    "lwt", LET;
+    "match_lwt", MATCH;
+    "try_lwt", TRY;
+    "while_lwt", WHILE;
+    "finally", BAR;  (* -- no equivalence for this one, this is a hack ! *)
+  ];
+  "mll", [
+    "rule", LET;
+    "parse", FUNCTION;
+  ];
+  "stream", [
+    "parser", FUNCTION;
+  ];
+]
+
+let keyword_table =
+  let t = Hashtbl.create 149 in
+  List.iter (fun (x,y) -> Hashtbl.add t x y) keywords;
   t
 
-let keyword_table =
-  create_hashtable 149 [
-    "and", AND;
-    "as", AS;
-    "assert", ASSERT;
-    "begin", BEGIN;
-    "class", CLASS;
-    "constraint", CONSTRAINT;
-    "do", DO;
-    "done", DONE;
-    "downto", DOWNTO;
-    "else", ELSE;
-    "end", END;
-    "exception", EXCEPTION;
-    "external", EXTERNAL;
-    "false", FALSE;
-    "for", FOR;
-    "fun", FUN;
-    "function", FUNCTION;
-    "functor", FUNCTOR;
-    "if", IF;
-    "in", IN;
-    "include", INCLUDE;
-    "inherit", INHERIT;
-    "initializer", INITIALIZER;
-    "lazy", LAZY;
-    "let", LET;
-    "match", MATCH;
-    "method", METHOD;
-    "module", MODULE;
-    "mutable", MUTABLE;
-    "new", NEW;
-    "object", OBJECT;
-    "of", OF;
-    "open", OPEN;
-    "or", OR;
-(*  "parser", PARSER; *)
-    "private", PRIVATE;
-    "rec", REC;
-    "sig", SIG;
-    "struct", STRUCT;
-    "then", THEN;
-    "to", TO;
-    "true", TRUE;
-    "try", TRY;
-    "type", TYPE;
-    "val", VAL;
-    "virtual", VIRTUAL;
-    "when", WHEN;
-    "while", WHILE;
-    "with", WITH;
-
-    "mod", INFIXOP3("mod");
-    "land", INFIXOP3("land");
-    "lor", INFIXOP3("lor");
-    "lxor", INFIXOP3("lxor");
-    "lsl", INFIXOP4("lsl");
-    "lsr", INFIXOP4("lsr");
-    "asr", INFIXOP4("asr")
-]
+let available_extensions () = List.map fst syntax_extensions
+let enable_extension name =
+  List.iter
+    (fun (x,y) -> Hashtbl.add keyword_table x y)
+    (List.assoc name syntax_extensions)
+let disable_extensions () =
+  Hashtbl.clear keyword_table;
+  List.iter (fun (x,y) -> Hashtbl.add keyword_table x y) keywords
 
 (* To buffer string literals *)
 
 let store_string_char c =
   if !string_index >= String.length (!string_buff) then begin
     let new_buff = String.create (String.length (!string_buff) * 2) in
-      String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));
-      string_buff := new_buff
+    String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));
+    string_buff := new_buff
   end;
   String.unsafe_set (!string_buff) (!string_index) c;
   incr string_index
 
 let char_for_decimal_code i s =
   let c = 100 * (Char.code(s.[i]) - 48) +
-           10 * (Char.code(s.[i+1]) - 48) +
-                (Char.code(s.[i+2]) - 48) in
+      10 * (Char.code(s.[i+1]) - 48) +
+      (Char.code(s.[i+2]) - 48) in
   if (c < 0 || c > 255) then
     failwith "Bad escaped decimal char"
   else Char.chr c
       if dst >= l then s else String.sub s 0 dst
     else
       match s.[src] with
-          '_' -> remove (src + 1) dst
-        |  c  -> s.[dst] <- c; remove (src + 1) (dst + 1)
+        '_' -> remove (src + 1) dst
+      |  c  -> s.[dst] <- c; remove (src + 1) (dst + 1)
   in remove 0 0
 
 (* Update the current location with file name and line number. *)
     | None -> pos.pos_fname
     | Some s -> s
   in
-  lexbuf.lex_curr_p <- { pos with
+  lexbuf.lex_curr_p <- {
+    pos with
     pos_fname = new_file;
     pos_lnum = if absolute then line else pos.pos_lnum + line;
     pos_bol = pos.pos_cnum - chars;
   };
   lines_starts := (lexbuf.lex_curr_p.pos_lnum, lexbuf.lex_curr_p.pos_bol) :: !lines_starts;
+
 ;;
-
 }
 
 let newline = ('\010' | '\013' | "\013\010")
 let symbolchar =
   ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
 let decimal_literal =
-  ['0'-'9'] ['0'-'9' '_']*
-    let hex_literal =
-      '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*
-        let oct_literal =
-          '0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
-            let bin_literal =
-              '0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
-                let int_literal =
-                  decimal_literal | hex_literal | oct_literal | bin_literal
+  (['0'-'9'] ['0'-'9' '_']*)
+let hex_literal =
+  ('0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*)
+let oct_literal =
+  ('0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*)
+let bin_literal =
+  ('0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*)
+let int_literal =
+  (decimal_literal | hex_literal | oct_literal | bin_literal)
 let float_literal =
   ['0'-'9'] ['0'-'9' '_']*
     ('.' ['0'-'9' '_']* )?
-    (['e' 'E'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']*)?
+      (['e' 'E'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']*)?
 
 
-    rule token = parse
-      | newline
-          { update_loc lexbuf None 1 false 0;
-            token lexbuf
-          }
-      | blank +
-          { token lexbuf }
-      | "_"
-          { UNDERSCORE }
-      | "~"
-          { TILDE }
-      | "~" lowercase identchar * ':'
-          { let s = Lexing.lexeme lexbuf in
-            let name = String.sub s 1 (String.length s - 2) in
-            (*
-              if Hashtbl.mem keyword_table name then
-              raise (Error(Keyword_as_label name, Location.curr lexbuf));
-            *)
-            LABEL name }
-      | "?"  { QUESTION }
-      | "??" { QUESTIONQUESTION }
-      | "?" lowercase identchar * ':'
-          { let s = Lexing.lexeme lexbuf in
-            let name = String.sub s 1 (String.length s - 2) in
-            (*
-              if Hashtbl.mem keyword_table name then
-              raise (Error(Keyword_as_label name, Location.curr lexbuf));
-            *)
-            OPTLABEL name }
-      | lowercase identchar *
-          { let s = Lexing.lexeme lexbuf in
-            try
-              Hashtbl.find keyword_table s
-            with Not_found ->
-              LIDENT s }
-      | uppercase identchar *
-          { UIDENT(Lexing.lexeme lexbuf) }      (* No capitalized keywords *)
-      | int_literal
-          { INT (can_overflow cvt_int_literal lexbuf) }
-      | float_literal
-          { FLOAT (remove_underscores(Lexing.lexeme lexbuf)) }
-      | int_literal "l"
-          { INT32 (can_overflow cvt_int32_literal lexbuf) }
-      | int_literal "L"
-          { INT64 (can_overflow cvt_int64_literal lexbuf) }
-      | int_literal "n"
-          { NATIVEINT (can_overflow cvt_nativeint_literal lexbuf) }
-      | "\""
-          { reset_string_buffer();
-            let string_start = lexbuf.lex_start_p in
-            string_start_loc := Lexing.lexeme_start lexbuf;
-            let token = string lexbuf in
-            lexbuf.lex_start_p <- string_start;
-            token }
-      | "'" newline "'"
-          { update_loc lexbuf None 1 false 1;
-            CHAR (InRange (Lexing.lexeme_char lexbuf 1)) }
-      | "'" [^ '\\' '\'' '\010' '\013'] "'"
-          { CHAR( InRange (Lexing.lexeme_char lexbuf 1)) }
-      | "'\\" ['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] "'"
-          { CHAR( InRange (char_for_backslash (Lexing.lexeme_char lexbuf 2))) }
-      | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
-          { CHAR(can_overflow (char_for_decimal_code 2) lexbuf) }
-      | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
-          { CHAR( InRange (char_for_hexadecimal_code lexbuf 3)) }
-      | "'\\" _
-          { let l = Lexing.lexeme lexbuf in
-            CHAR ( Overflow l )
-          }
-      | "(*"
-          {
-            let comment_start = lexbuf.lex_start_p in
-            comment_start_loc := [Lexing.lexeme_start lexbuf];
-            let token= comment lexbuf in
-            lexbuf.lex_start_p <- comment_start;
-            token
-          }
-      | "*)"
-          {
-        (*      let loc = Location.curr lexbuf in *)
-        (*        Location.prerr_warning loc Warnings.Comment_not_end; *)
-            lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
-            let curpos = lexbuf.lex_curr_p in
-            lexbuf.lex_curr_p <- { curpos with pos_cnum = curpos.pos_cnum - 1 };
-            STAR
-          }
-      | "<:" identchar * "<"
-          {
-            let start = lexbuf.lex_start_p in
-            quotation_start_loc := Lexing.lexeme_start lexbuf;
-            let token = quotation lexbuf in
-            lexbuf.lex_start_p <- start;
-            token
-          }
-      | "#" [' ' '\t']* (['0'-'9']+ as _num) [' ' '\t']*
-          ("\"" ([^ '\010' '\013' '"' ] * as _name) "\"")?
-          [^ '\010' '\013'] * newline
-          { update_loc lexbuf None 1 false 0;
-            LINE_DIRECTIVE
-          }
-      | "#"  { SHARP }
-      | "&"  { AMPERSAND }
-      | "&&" { AMPERAMPER }
-      | "`"  { BACKQUOTE }
-      | "'"  { QUOTE }
-      | "("  { LPAREN }
-      | ")"  { RPAREN }
-      | "*"  { STAR }
-      | ","  { COMMA }
-      | "->" { MINUSGREATER }
-      | "."  { DOT }
-      | ".." { DOTDOT }
-      | ":"  { COLON }
-      | "::" { COLONCOLON }
-      | ":=" { COLONEQUAL }
-      | ":>" { COLONGREATER }
-      | ";"  { SEMI }
-      | ";;" { SEMISEMI }
-      | "<"  { LESS }
-      | "<-" { LESSMINUS }
-      | "="  { EQUAL }
-      | "["  { LBRACKET }
-      | "[|" { LBRACKETBAR }
-      | "[<" { LBRACKETLESS }
-      | "[>" { LBRACKETGREATER }
-      | "]"  { RBRACKET }
-      | "{"  { LBRACE }
-      | "{<" { LBRACELESS }
-      | "|"  { BAR }
-      | "||" { BARBAR }
-      | "|]" { BARRBRACKET }
-      | ">"  { GREATER }
-      | ">]" { GREATERRBRACKET }
-      | "}"  { RBRACE }
-      | ">}" { GREATERRBRACE }
-      | "!"  { BANG }
+rule token = parse
+  | newline
+      { update_loc lexbuf None 1 false 0;
+        token lexbuf
+      }
+  | blank +
+      { token lexbuf }
+  | "_"
+    { UNDERSCORE }
+  | "~"
+    { TILDE }
+  | "~" lowercase identchar * ':'
+      { let s = Lexing.lexeme lexbuf in
+        let name = String.sub s 1 (String.length s - 2) in
+        (*
+           if Hashtbl.mem keyword_table name then
+           raise (Error(Keyword_as_label name, Location.curr lexbuf));
+        *)
+        LABEL name }
+  | "?"  { QUESTION }
+  | "??" { QUESTIONQUESTION }
+  | "?" lowercase identchar * ':'
+      { let s = Lexing.lexeme lexbuf in
+        let name = String.sub s 1 (String.length s - 2) in
+        (*
+           if Hashtbl.mem keyword_table name then
+           raise (Error(Keyword_as_label name, Location.curr lexbuf));
+        *)
+        OPTLABEL name }
+  | lowercase identchar *
+    { let s = Lexing.lexeme lexbuf in
+      try
+        Hashtbl.find keyword_table s
+      with Not_found ->
+          LIDENT s }
+  | uppercase identchar *
+    { UIDENT(Lexing.lexeme lexbuf) }      (* No capitalized keywords *)
+  | int_literal
+    { INT (can_overflow cvt_int_literal lexbuf) }
+  | float_literal
+    { FLOAT (remove_underscores(Lexing.lexeme lexbuf)) }
+  | int_literal "l"
+    { INT32 (can_overflow cvt_int32_literal lexbuf) }
+  | int_literal "L"
+    { INT64 (can_overflow cvt_int64_literal lexbuf) }
+  | int_literal "n"
+    { NATIVEINT (can_overflow cvt_nativeint_literal lexbuf) }
+  | "\""
+    { reset_string_buffer();
+      let string_start = lexbuf.lex_start_p in
+      string_start_loc := Lexing.lexeme_start lexbuf;
+      let token = string lexbuf in
+      lexbuf.lex_start_p <- string_start;
+      token }
+  | "'" newline "'"
+    { update_loc lexbuf None 1 false 1;
+      CHAR (InRange (Lexing.lexeme_char lexbuf 1)) }
+  | "'" [^ '\\' '\'' '\010' '\013'] "'"
+    { CHAR( InRange (Lexing.lexeme_char lexbuf 1)) }
+  | "'\\" ['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] "'"
+    { CHAR( InRange (char_for_backslash (Lexing.lexeme_char lexbuf 2))) }
+  | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
+    { CHAR(can_overflow (char_for_decimal_code 2) lexbuf) }
+  | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
+    { CHAR( InRange (char_for_hexadecimal_code lexbuf 3)) }
+  | "'\\" _
+    { let l = Lexing.lexeme lexbuf in
+      CHAR ( Overflow l )
+    }
+  | "(*"
+    {
+      let comment_start = lexbuf.lex_start_p in
+      comment_start_loc := [Lexing.lexeme_start lexbuf];
+      let token= comment lexbuf in
+      lexbuf.lex_start_p <- comment_start;
+      token
+    }
+  | "*)"
+    {
+      (*      let loc = Location.curr lexbuf in *)
+      (*        Location.prerr_warning loc Warnings.Comment_not_end; *)
+      lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
+      let curpos = lexbuf.lex_curr_p in
+      lexbuf.lex_curr_p <- { curpos with pos_cnum = curpos.pos_cnum - 1 };
+      STAR
+    }
+  | "<:" identchar * "<"
+      {
+        let start = lexbuf.lex_start_p in
+        quotation_start_loc := Lexing.lexeme_start lexbuf;
+        let token = quotation lexbuf in
+        lexbuf.lex_start_p <- start;
+        token
+      }
+  | "#" [' ' '\t']* (['0'-'9']+ as _num) [' ' '\t']*
+    ("\"" ([^ '\010' '\013' '"' ] * as _name) "\"")?
+      [^ '\010' '\013'] * newline
+      { update_loc lexbuf None 1 false 0;
+        LINE_DIRECTIVE
+      }
+  | "#"  { SHARP }
+  | "&"  { AMPERSAND }
+  | "&&" { AMPERAMPER }
+  | "`"  { BACKQUOTE }
+  | "'"  { QUOTE }
+  | "("  { LPAREN }
+  | ")"  { RPAREN }
+  | "*"  { STAR }
+  | ","  { COMMA }
+  | "->" { MINUSGREATER }
+  | "."  { DOT }
+  | ".." { DOTDOT }
+  | ":"  { COLON }
+  | "::" { COLONCOLON }
+  | ":=" { COLONEQUAL }
+  | ":>" { COLONGREATER }
+  | ";"  { SEMI }
+  | ";;" { SEMISEMI }
+  | "<"  { LESS }
+  | "<-" { LESSMINUS }
+  | "="  { EQUAL }
+  | "["  { LBRACKET }
+  | "[|" { LBRACKETBAR }
+  | "[<" { LBRACKETLESS }
+  | "[>" { LBRACKETGREATER }
+  | "]"  { RBRACKET }
+  | "{"  { LBRACE }
+  | "{<" { LBRACELESS }
+  | "|"  { BAR }
+  | "||" { BARBAR }
+  | "|]" { BARRBRACKET }
+  | ">"  { GREATER }
+  | ">]" { GREATERRBRACKET }
+  | "}"  { RBRACE }
+  | ">}" { GREATERRBRACE }
+  | "!"  { BANG }
 
-      | "!=" { INFIXOP0 "!=" }
-      | "+"  { PLUS }
-      | "+." { PLUSDOT }
-      | "-"  { MINUS }
-      | "-." { MINUSDOT }
+  | "!=" { INFIXOP0 "!=" }
+  | "+"  { PLUS }
+  | "+." { PLUSDOT }
+  | "-"  { MINUS }
+  | "-." { MINUSDOT }
 
-      | "!" symbolchar +
-          { PREFIXOP(Lexing.lexeme lexbuf) }
-      | ['~' '?'] symbolchar +
-          { PREFIXOP(Lexing.lexeme lexbuf) }
-      | ['=' '<' '>' '|' '&' '$'] symbolchar *
-          { INFIXOP0(Lexing.lexeme lexbuf) }
-      | ['@' '^'] symbolchar *
-          { INFIXOP1(Lexing.lexeme lexbuf) }
-      | ['+' '-'] symbolchar *
-          { INFIXOP2(Lexing.lexeme lexbuf) }
-      | "**" symbolchar *
-          { INFIXOP4(Lexing.lexeme lexbuf) }
-      | ['*' '/' '%'] symbolchar *
-          { INFIXOP3(Lexing.lexeme lexbuf) }
+  | "!" symbolchar +
+    { PREFIXOP(Lexing.lexeme lexbuf) }
+  | ['~' '?'] symbolchar +
+    { PREFIXOP(Lexing.lexeme lexbuf) }
+  | ['=' '<' '>' '|' '&' '$'] symbolchar *
+    { INFIXOP0(Lexing.lexeme lexbuf) }
+  | ['@' '^'] symbolchar *
+    { INFIXOP1(Lexing.lexeme lexbuf) }
+  | ['+' '-'] symbolchar *
+    { INFIXOP2(Lexing.lexeme lexbuf) }
+  | "**" symbolchar *
+    { INFIXOP4(Lexing.lexeme lexbuf) }
+  | ['*' '/' '%'] symbolchar *
+    { INFIXOP3(Lexing.lexeme lexbuf) }
 
-      | eof { EOF }
-      | _
-          { ILLEGAL_CHAR (Lexing.lexeme_char lexbuf 0)      }
+  | eof { EOF }
+  | _
+    { ILLEGAL_CHAR (Lexing.lexeme_char lexbuf 0)      }
 
-    and quotation = parse
-        ">>" { QUOTATION }
-      | newline
-          { update_loc lexbuf None 1 false 0;
-            quotation lexbuf
-          }
-      | eof { EOF_IN_QUOTATION !quotation_start_loc }
-      | _ { quotation lexbuf }
+and quotation = parse
+    ">>" { QUOTATION }
+  | newline
+      { update_loc lexbuf None 1 false 0;
+        quotation lexbuf
+      }
+  | eof { EOF_IN_QUOTATION !quotation_start_loc }
+  | _ { quotation lexbuf }
 
-    and comment = parse
-    "(*"
+and comment = parse
+  | "(*"
       { comment_start_loc := (Lexing.lexeme_start lexbuf) :: !comment_start_loc;
         comment lexbuf;
       }
-      | "*)"
-          { match !comment_start_loc with
-            | [] -> assert false
-            | [x] ->
-              comment_start_loc := [];
-              comment_stack := (x, Lexing.lexeme_end lexbuf) :: !comment_stack;
-              COMMENT (x, Lexing.lexeme_end lexbuf)
-            | _ :: l -> comment_start_loc := l;
-              comment lexbuf;
-          }
-      | "\""
-          { reset_string_buffer();
-            string_start_loc := Lexing.lexeme_start lexbuf;
-            let s = string lexbuf in
-            reset_string_buffer ();
-            match s with
-              | EOF_IN_STRING _ ->
-                let pos = list_last !comment_start_loc in
-                comment_start_loc := [];
-                EOF_IN_COMMENT pos
-              | STRING _ -> comment lexbuf
-              | _ -> assert false
+  | "*)"
+      { match !comment_start_loc with
+        | [] -> assert false
+        | [x] ->
+            comment_start_loc := [];
+            comment_stack := (x, Lexing.lexeme_end lexbuf) :: !comment_stack;
+            COMMENT (x, Lexing.lexeme_end lexbuf)
+        | _ :: l -> comment_start_loc := l;
+            comment lexbuf;
+      }
+  | "\""
+    { reset_string_buffer();
+      string_start_loc := Lexing.lexeme_start lexbuf;
+      let s = string lexbuf in
+      reset_string_buffer ();
+      match s with
+      | EOF_IN_STRING _ ->
+          let pos = list_last !comment_start_loc in
+          comment_start_loc := [];
+          EOF_IN_COMMENT pos
+      | STRING _ -> comment lexbuf
+      | _ -> assert false
+    }
+  | "''"
+    { comment lexbuf }
+  | "'" newline "'"
+    { update_loc lexbuf None 1 false 1;
+      comment lexbuf
+    }
+  | "'" [^ '\\' '\'' '\010' '\013' ] "'"
+    { comment lexbuf }
+  | "'\\" ['\\' '"' '\'' 'n' 't' 'b' 'r' ' '] "'"
+    { comment lexbuf }
+  | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
+    { comment lexbuf }
+  | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
+    { comment lexbuf }
+  | eof
+    {
+      let pos = list_last !comment_start_loc in
+      comment_start_loc := [];
+      EOF_IN_COMMENT pos
+    }
+  | newline
+    { update_loc lexbuf None 1 false 0;
+      comment lexbuf
+    }
+  | _
+    { comment lexbuf }
 
-          }
-      | "''"
-          { comment lexbuf }
-      | "'" newline "'"
-          { update_loc lexbuf None 1 false 1;
-            comment lexbuf
-          }
-      | "'" [^ '\\' '\'' '\010' '\013' ] "'"
-          { comment lexbuf }
-      | "'\\" ['\\' '"' '\'' 'n' 't' 'b' 'r' ' '] "'"
-          { comment lexbuf }
-      | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
-          { comment lexbuf }
-      | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
-          { comment lexbuf }
-      | eof
-          {
-            let pos = list_last !comment_start_loc in
-            comment_start_loc := [];
-            EOF_IN_COMMENT pos
-          }
-      | newline
-          { update_loc lexbuf None 1 false 0;
-            comment lexbuf
-          }
-      | _
-          { comment lexbuf }
-
-    and string = parse
+and string = parse
     '"'
       { STRING (get_stored_string ()) }
-      | '\\' newline ([' ' '\t'] * as space)
-          { update_loc lexbuf None 1 false (String.length space);
-            string lexbuf
-          }
-      | '\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
-          { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
-            string lexbuf }
-      | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
-          { (match can_overflow (char_for_decimal_code 1) lexbuf with
-            | Overflow _ ->
-                let s = Lexing.lexeme lexbuf in
-                for i = 0 to String.length s - 1 do store_string_char s.[i] done
-            | InRange c -> store_string_char c);
-            string lexbuf }
-      | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']
-          { store_string_char(char_for_hexadecimal_code lexbuf 2);
-            string lexbuf }
-      | '\\' _
-          { if in_comment ()
-            then string lexbuf
-            else begin
-          (*  Should be an error, but we are very lax.
-              raise (Error (Illegal_escape (Lexing.lexeme lexbuf),
+  | '\\' newline ([' ' '\t'] * as space)
+      { update_loc lexbuf None 1 false (String.length space);
+        string lexbuf
+      }
+  | '\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
+    { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
+      string lexbuf }
+  | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
+    { (match can_overflow (char_for_decimal_code 1) lexbuf with
+      | Overflow _ ->
+          let s = Lexing.lexeme lexbuf in
+          for i = 0 to String.length s - 1 do store_string_char s.[i] done
+      | InRange c -> store_string_char c);
+      string lexbuf }
+  | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']
+    { store_string_char(char_for_hexadecimal_code lexbuf 2);
+      string lexbuf }
+  | '\\' _
+    { if in_comment ()
+      then string lexbuf
+      else begin
+        (*  Should be an error, but we are very lax.
+            raise (Error (Illegal_escape (Lexing.lexeme lexbuf),
               Location.curr lexbuf))
-          *)
-              store_string_char (Lexing.lexeme_char lexbuf 0);
-              store_string_char (Lexing.lexeme_char lexbuf 1);
-              string lexbuf
-            end
-          }
-      | newline
-          {
-            update_loc lexbuf None 1 false 0;
-            let s = Lexing.lexeme lexbuf in
-            for i = 0 to String.length s - 1 do
-              store_string_char s.[i];
-            done;
-            string lexbuf
-          }
-      | eof
-          { EOF_IN_STRING !string_start_loc }
-      | _
-          { store_string_char(Lexing.lexeme_char lexbuf 0);
-            string lexbuf }
+        *)
+        store_string_char (Lexing.lexeme_char lexbuf 0);
+        store_string_char (Lexing.lexeme_char lexbuf 1);
+        string lexbuf
+      end
+    }
+  | newline
+    {
+      update_loc lexbuf None 1 false 0;
+      let s = Lexing.lexeme lexbuf in
+      for i = 0 to String.length s - 1 do
+        store_string_char s.[i];
+      done;
+      string lexbuf
+    }
+  | eof
+    { EOF_IN_STRING !string_start_loc }
+  | _
+    { store_string_char(Lexing.lexeme_char lexbuf 0);
+      string lexbuf }
 
-          {
+{
 
-          let rec token_locs lexbuf =
-            match token lexbuf with
-                COMMENT _ -> token_locs lexbuf
-              | EOF_IN_COMMENT _ ->
-                EOF, ( lexbuf.lex_start_p, lexbuf.lex_start_p)
-              | EOF_IN_STRING _ ->
-                EOF, ( lexbuf.lex_start_p, lexbuf.lex_start_p)
-              | token ->
-                token, ( lexbuf.lex_start_p, lexbuf.lex_curr_p)
+let rec token_locs lexbuf =
+  match token lexbuf with
+    COMMENT _ -> token_locs lexbuf
+  | EOF_IN_COMMENT _ ->
+      EOF, ( lexbuf.lex_start_p, lexbuf.lex_start_p)
+  | EOF_IN_STRING _ ->
+      EOF, ( lexbuf.lex_start_p, lexbuf.lex_start_p)
+  | token ->
+      token, ( lexbuf.lex_start_p, lexbuf.lex_curr_p)
 
-          let rec token_pos lexbuf =
-            match token lexbuf with
-                COMMENT _ -> token_pos lexbuf
-              | EOF_IN_COMMENT _ ->
-                EOF, ( lexbuf.lex_start_p.pos_cnum, lexbuf.lex_start_p.pos_cnum)
-              | EOF_IN_STRING _ ->
-                EOF, ( lexbuf.lex_start_p.pos_cnum, lexbuf.lex_start_p.pos_cnum)
-              | token ->
-                token, ( lexbuf.lex_start_p.pos_cnum, lexbuf.lex_curr_p.pos_cnum)
+let rec token_pos lexbuf =
+  match token lexbuf with
+    COMMENT _ -> token_pos lexbuf
+  | EOF_IN_COMMENT _ ->
+      EOF, ( lexbuf.lex_start_p.pos_cnum, lexbuf.lex_start_p.pos_cnum)
+  | EOF_IN_STRING _ ->
+      EOF, ( lexbuf.lex_start_p.pos_cnum, lexbuf.lex_start_p.pos_cnum)
+  | token ->
+      token, ( lexbuf.lex_start_p.pos_cnum, lexbuf.lex_curr_p.pos_cnum)
 
 
-          let token_locs_and_comments lexbuf =
-              let token = token lexbuf in
-              token,  ( lexbuf.lex_start_p, lexbuf.lex_curr_p)
+let token_locs_and_comments lexbuf =
+  let token = token lexbuf in
+  token,  ( lexbuf.lex_start_p, lexbuf.lex_curr_p)
 
-          let get_token = token
+let get_token = token
 
-          let token_with_comments = get_token
+let token_with_comments = get_token
 
-          let rec token lexbuf =
-            match get_token lexbuf with
-                COMMENT _ -> token lexbuf
-              | EOF_IN_COMMENT _
-              | EOF_IN_STRING _ -> EOF
-              | tok -> tok
+let rec token lexbuf =
+  match get_token lexbuf with
+    COMMENT _ -> token lexbuf
+  | EOF_IN_COMMENT _
+  | EOF_IN_STRING _ -> EOF
+  | tok -> tok
 
-	  let tokens_of_file filename =
-	    let ic = open_in filename in
-	    try
-	      init ();
-	      let lexbuf = Lexing.from_channel ic in
-	      let rec iter tokens =
-		let token = token_pos lexbuf in
-		match token with
-		    (EOF, _) -> List.rev tokens
-		  | _ -> iter (token :: tokens)
-	      in
-	      let tokens = iter [] in
-	      close_in ic;
-	      tokens
-	    with e -> close_in ic; raise e
+let tokens_of_file filename =
+  let ic = open_in filename in
+  try
+    init ();
+    let lexbuf = Lexing.from_channel ic in
+    let rec iter tokens =
+      let token = token_pos lexbuf in
+      match token with
+        (EOF, _) -> List.rev tokens
+      | _ -> iter (token :: tokens)
+    in
+    let tokens = iter [] in
+    close_in ic;
+    tokens
+  with e -> close_in ic; raise e
 
-          let tokens_with_loc_of_string s =
-	      init ();
-	      let lexbuf = Lexing.from_string s in
-	      let rec iter tokens =
-		let token = token_pos lexbuf in
-		match token with
-		    (EOF, _) -> List.rev tokens
-		  | _ -> iter (token :: tokens)
-	      in
-	      let tokens = iter [] in
-	      tokens
+let tokens_with_loc_of_string s =
+  init ();
+  let lexbuf = Lexing.from_string s in
+  let rec iter tokens =
+    let token = token_pos lexbuf in
+    match token with
+      (EOF, _) -> List.rev tokens
+    | _ -> iter (token :: tokens)
+  in
+  let tokens = iter [] in
+  tokens
 
-          let tokens_of_string s =
-	      init ();
-	      let lexbuf = Lexing.from_string s in
-	      let rec iter tokens =
-		let token = token lexbuf in
-		match token with
-		    (EOF) -> List.rev tokens
-		  | _ -> iter (token :: tokens)
-	      in
-	      let tokens = iter [] in
-	      tokens
+let tokens_of_string s =
+  init ();
+  let lexbuf = Lexing.from_string s in
+  let rec iter tokens =
+    let token = token lexbuf in
+    match token with
+      (EOF) -> List.rev tokens
+    | _ -> iter (token :: tokens)
+  in
+  let tokens = iter [] in
+  tokens
 
-	  let lines () = List.rev ( !lines_starts )
+let lines () = List.rev ( !lines_starts )
 
 }

File ocaml/contrib/ocp-indent/src/approx_tokens.ml

 
 (* ADMIN: fabrice *)
 
-  type token =
+type token =
   | AMPERAMPER
   | AMPERSAND
   | AND

File ocaml/contrib/ocp-indent/src/compat.ml

+
+module String = struct
+  include String
+
+  let is_space = function
+    | ' ' | '\012' | '\n' | '\r' | '\t' -> true
+    | _ -> false
+
+  let trim s =
+    let len = length s in
+    let i = ref 0 in
+    while !i < len && is_space (unsafe_get s !i) do
+      incr i
+    done;
+    let j = ref (len - 1) in
+    while !j >= !i && is_space (unsafe_get s !j) do
+      decr j
+    done;
+    if !i = 0 && !j = len - 1 then
+      s
+    else if !j >= !i then
+      sub s !i (!j - !i + 1)
+    else
+      ""
+end

File ocaml/contrib/ocp-indent/src/indentArgs.ml

     Sys.argv.(0) IndentVersion.version;
   exit 0
 
-let arg_debug = ref false
-let arg_file  = ref false
-let arg_file_out  = ref None
-let arg_lines = ref (None, None)
-let arg_numeric_only = ref false
-let arg_indent = ref IndentConfig.default
-let arg_inplace = ref false
+let debug = ref false
+let file_out  = ref None
+let lines = ref (None, None)
+let numeric = ref false
+let indent_config = ref IndentConfig.default
+let inplace = ref false
 
-let arg_usage =
+type input = InChannel of in_channel
+           | File of string
+
+let files : input list ref = ref []
+
+let usage =
   Printf.sprintf "%s [options] [filename]" Sys.argv.(0)
 
-let arg_list = ref []
+let arg_list_ref = ref []
 
-let arg_error fmt =
+let error fmt =
   Printf.ksprintf (fun s ->
     Printf.eprintf "Fatal error: %s\n" s;
-    Arg.usage !arg_list arg_usage; exit 2) fmt
+    Arg.usage !arg_list_ref usage; exit 2) fmt
 
 let set_lines str =
   try
-    arg_lines := match Util.string_split '-' str with
-    | [s] ->
-      let li = int_of_string s in Some li, Some li
-    | [s1;""] ->
-      Some (int_of_string s1), None
-    | ["";s2] ->
-      None, Some (int_of_string s2)
-    | [s1;s2] ->
-      Some (int_of_string s1), Some (int_of_string s2)
-    | _ -> arg_error "Bad --lines parameter: %S" str
+    lines := match Util.string_split '-' str with
+      | [s] ->
+          let li = int_of_string s in Some li, Some li
+      | [s1;""] ->
+          Some (int_of_string s1), None
+      | ["";s2] ->
+          None, Some (int_of_string s2)
+      | [s1;s2] ->
+          Some (int_of_string s1), Some (int_of_string s2)
+      | _ -> error "Bad --lines parameter: %S" str
   with
   | Failure "int_of_string" ->
-    arg_error "Bad --lines parameter: %S" str
-
-(*
-let add_file s = match !file with
-  | None   -> file := Some s
-  | Some _ -> error "Unknown parameter %S" s
-in
-*)
+      error "Bad --lines parameter: %S" str
 
 let set_indent s =
   if s = "help" then (print_endline IndentConfig.help; exit 0) else
     try
-      arg_indent := IndentConfig.update_from_string !arg_indent s
+      indent_config := IndentConfig.update_from_string !indent_config s
     with
     | Invalid_argument s ->
-      arg_error "Bad --config parameter %S.\n%s" s IndentConfig.help
+        error "Bad --config parameter %S.\n%s" s IndentConfig.help
     | Failure _ ->
-      arg_error "Bad --config value %S.\n%s" s IndentConfig.help
+        error "Bad --config value %S.\n%s" s IndentConfig.help
 
+let set_output s = match !file_out with
+  | None -> file_out := Some s
+  | Some o -> error "--output provided twice (%S and %S)" o s
 
-let _ =
-  arg_list := Arg.align [
-  "--config" , Arg.String set_indent, " ";
-  "-c"       , Arg.String set_indent, "var=value[,var=value...] \
-                                           Configure the indentation \
-                                           parameters. Try \"--config help\"";
-  "--debug"  , Arg.Set arg_debug        , " ";
-  "-d"       , Arg.Set arg_debug        , " Output debug info to stderr";
-  "--inplace", Arg.Set arg_inplace      , " ";
-  "-i"       , Arg.Set arg_inplace      , " Modify file in place";
-  "--lines"  , Arg.String set_lines , " ";
-  "-l"       , Arg.String set_lines , "n1-n2 Only indent the lines in the \
-                                           given interval (eg. 10-12)";
-  "--numeric", Arg.Set arg_numeric_only , " Only print the indentation values, \
-                                           not the contents. Useful in editors";
-  "--version", Arg.Unit version     , " ";
-  "-v"       , Arg.Unit version     , " Display version information and \
-                                           exit";
-  "--output",  Arg.String (fun s -> arg_file_out := Some s), " ";
-  "-o",  Arg.String (fun s -> arg_file_out := Some s),
-                                       "file Save output in file";
-]
+let syntax_ext name =
+  try Approx_lexer.enable_extension name
+  with Not_found ->
+      error "Unknown syntax extension %S. Available choices are %s."
+        name (String.concat ", " (Approx_lexer.available_extensions ()))
 
-let arg_list = !arg_list
+let arg_list = Arg.align [
+    "--config" , Arg.String set_indent, " ";
+    "-c"       , Arg.String set_indent, "var=value[,var=value...] \
+                                         Configure the indentation \
+                                         parameters. Try \"--config help\"";
+    "--debug"  , Arg.Set debug        , " ";
+    "-d"       , Arg.Set debug        , " Output debug info to stderr";
+    "--inplace", Arg.Set inplace      , " ";
+    "-i"       , Arg.Set inplace      , " Modify files in place";
+    "--lines"  , Arg.String set_lines , " ";
+    "-l"       , Arg.String set_lines , "n1-n2 Only indent the lines in the \
+                                         given interval (eg. 10-12)";
+    "--numeric", Arg.Set numeric      , " Only print the indentation values, \
+                                         not the contents. Useful in editors";
+    "--output" , Arg.String set_output, " ";
+    "-o"       , Arg.String set_output, "file Save output \
+                                         to file";
+    "--syntax" , Arg.String syntax_ext, Printf.sprintf "<%s> Handle keywords \
+                                                        for the given syntax \
+                                                        extension"
+      (String.concat "|" (Approx_lexer.available_extensions ()));
+    "--version", Arg.Unit version     , " ";
+    "-v"       , Arg.Unit version     , " Display version information and \
+                                         exit";
+  ]
 
-(*
-Arg.parse (Arg.align options) add_file usage;
-Util.default "/dev/stdin" !file, !lines, !numeric_only, !indent, !debug
+let _ = arg_list_ref := arg_list
 
-let file, lines, numeric_only, indent, debug = init_config ()
-*)
+let arg_anon = function
+  | "-" -> files := InChannel stdin :: !files
+  | path -> files := File path :: !files
+
+let parse () =
+  Arg.parse arg_list arg_anon usage;
+  let f = match !files with
+    | [] -> [InChannel stdin]
+    (* | _::_::_ when not !inplace -> *)
+    (*     error "Multiple files can only be supplied with --inplace." *)
+    | f -> List.rev f
+  in
+  files := f;
+  f
 
 (* indent_empty is set if and only if reindenting a single line *)
 let indent_empty () =
-  match !arg_lines with
+  match !lines with
   | Some fst, Some lst when fst = lst -> true
   | _ -> false
 
-let start_line ()=
-  match !arg_lines with None,_ -> 1 | Some n,_ -> n
-
 let in_lines l =
-  match !arg_lines with
-    None, None -> true
+  match !lines with
+  | None, None -> true
   | Some first, Some last -> first <= l && l <= last
   | Some first, None -> first <= l
   | None, Some last -> l <= last

File ocaml/contrib/ocp-indent/src/indentArgs.mli

 
 (* Current configuration: *)
 
-val arg_file: bool ref
-val arg_file_out : string option ref
-(*val arg_lines: int option * int option *)
-val arg_numeric_only: bool ref
-val arg_indent: IndentConfig.t ref
-val arg_debug: bool ref
-val arg_inplace : bool ref
-val arg_error : ('a, unit, string, 'b) format4 -> 'a
+val file_out : string option ref
+val numeric: bool ref
+val indent_config: IndentConfig.t ref
+val debug: bool ref
+val inplace : bool ref
+
+type input = InChannel of in_channel
+           | File of string
+val files : input list ref
+
+val error : ('a, unit, string, 'b) format4 -> 'a
 
 val indent_empty: unit -> bool
-val start_line: unit -> int
 val in_lines: int -> bool
 
-val arg_usage : string
+val usage : string
 val arg_list : (Arg.key * Arg.spec * Arg.doc) list
 
+(** Parses the arguments, initialising the references above, and returns
+    the list of inputs to be processed *)
+val parse : unit -> input list

File ocaml/contrib/ocp-indent/src/indentBlock.ml

   (* Special operators that should break arrow indentation have this prio
      (eg monad operators, >>=) *)
   let prio_flatop = 59
+  let prio_semi = 5
 
   let rec follow = function
     | KAnd k
      - has a inner padding [pad]
      - has a line count [count]
 
-         XXX XXX XXX [
-                            XXX
-                     ]
+             XXX XXX XXX [
+                                XXX
+                         ]
 
-         XXX XXX XXX [
-               XXX
-         ]
+             XXX XXX XXX [
+                   XXX
+             ]
 
-<---l--->
-<----------x-------->
-                     <-pad->
-        <-pad->
-*)
+     <---l--->
+     <----------x-------->
+                         <-pad->
+             <-pad->
+  *)
 
   type t = {
     k:   kind;
    - a node path to go to this block
    - the last token of this block
    - the last token offset
-   - the original indentation for this block *)
+   - the original starting column for this block *)
 type t = {
   path: Path.t;
   last: Nstream.token option;
   | None   -> None
   | Some t -> Some t.token
 
+(* a more efficient way to do this would be to store a
+   "context-type" in the stack *)
+let rec is_inside_type path =
+  match unwind (function
+      | KParen | KBegin | KBracket | KBrace | KBracketBar
+      | KVal | KLet | KLetIn | KBody (KVal | KLet | KLetIn)
+      | KBody(KType|KExternal) | KColon -> true
+      | _ -> false)
+      path
+  with
+  | {k=KBody(KVal|KType|KExternal) | KColon}::_ -> true
+  | {k=KParen | KBegin | KBracket | KBrace}::p ->
+      is_inside_type p
+  | _ -> false
+
 let stacktrace t =
-    Printf.eprintf "\027[32m%8s\027[m %s\n%!"
-      (match t.last with Some tok -> tok.substr | _ -> "")
-      (to_string t)
+  Printf.eprintf "\027[32m%8s\027[m %s\n%!"
+    (match t.last with Some tok -> tok.substr | _ -> "")
+    (to_string t)
 
 (* different kinds of position:
    [T]: token aligned: the child is aligned with the token position
 
 (* Take a block, a token stream and a token.
    Return the new block stack. *)
-let rec update_path config t stream tok =
+let update_path config t stream tok =
   let open IndentConfig in
   let is_first_line = Region.char_offset tok.region = tok.offset in
   let starts_line = tok.newlines > 0 || is_first_line in
                   | _ -> assert false
                 in
                 let l = paren.t + paren_len + 1 (* usually 1 space *) + pad in
-                Some ({ h with k; l; t=l; pad = h.t - l } :: p)
+                Some ({ h with k; l; t=l } :: p)
             | _ ->
                 match k,h.k with
-                | KExpr pk, KExpr ph when ph = pk -> None
+                | KExpr pk, KExpr ph when ph = pk ->
+                    (* respect the indent of the above same-priority term, we
+                       assume it was already back-indented *)
+                    Some ({ h with k; l=h.t; t=h.t; pad = h.pad } :: p)
                 | _ ->
-                    let l = max 0 (h.t + pad) in
-                    Some ({ h with k; l; t=l; pad = -pad } :: p)
+                    let l = h.t + pad in
+                    if l < 0 then None
+                    else Some ({ h with k; l; t=l; pad = -pad } :: p)
         in
         match negative_indent () with
         | Some p -> p
     | _ -> path
   in
   let before_append_atom = function
-    | {k=KWith(KTry|KMatch as m)}::_ as path ->
+    | {k=KWith(KTry|KMatch as m); pad}::_ as path ->
         (* Special case: 'match with' and no bar for the 1st case:
            we append a virtual bar for alignment *)
         let p =
-          append (KBar m) L ~pad:(config.i_with + 2) path
+          append (KBar m) L ~pad:(max 2 pad) path
         in
         if not starts_line then
           let t = max 0 (t.toff + tok.offset - 2) in
   in
   let atom path =
     let path = before_append_atom path in
-    append expr_atom L ~pad:(max config.i_base (Path.pad path)) path
+    let pad = match path with {k=KExpr _; pad}::_ -> pad | _ -> config.i_base in
+    append expr_atom L ~pad path
   in
   let open_paren k path =
     let path = before_append_atom path in
     let p = append k L (fold_expr path) in
-    match p,next_token_full stream with
-    | {k=KParen|KBegin} :: {k=KArrow _} :: _, _
+    match p with
+    | [] -> []
+    | {k=KParen|KBegin} :: {k=KArrow _} :: _
       when not starts_line ->
         (* Special case: paren/begin after arrow has extra indent
            (see test js-begin) *)
         Path.shift p config.i_base
-    | h::p, Some ({newlines=0} as next) ->
-        if not starts_line then
-          if k <> KParen && k <> KBegin then
-            let l = t.toff + tok.offset in
-            (* set alignment for next lines relative to [ *)
-            { h with l; t=l; pad = next.offset } :: p
-          else
-            h::p
-        else
-          (* set padding for next lines *)
-          { h with pad = next.offset } :: p
-    | _ -> p
+    | h::p as path ->
+        match next_token_full stream with
+        | Some next
+          when Region.start_line next.region = Region.end_line tok.region
+          ->
+            if k = KBegin then path
+            else if starts_line then
+              (* set padding for next lines *)
+              { h with pad = next.offset } :: p
+            else if k = KParen then path
+            else
+              let l = t.toff + tok.offset in
+              (* set alignment for next lines relative to [ *)
+              { h with l; t=l; pad = next.offset } :: p
+        | _ -> path
   in
   let close f path =
     (* Remove the padding for the closing brace/bracket/paren/etc. *)
   let op_prio_align_indent = function
     (* anything else : -10 *)
     (* in -> : 0 *)
-    | SEMI -> 5,L,-2
+    | SEMI -> prio_semi,L,-2
     | AS -> 8,L,config.i_base
     (* special negative indent is only honored at beginning of line *)
     (* then else : 10 *)
     | AMPERSAND | AMPERAMPER -> 50,T,0
     | INFIXOP0 s ->
         (match String.sub s 0 (min 2 (String.length s)) with
-        (* these should deindent fun -> *)
-        | ">>" -> prio_flatop,L,0
-        | "|!" | "|>" -> prio_flatop,T,0
-        | _ -> 60,L,config.i_base)
+         (* these should deindent fun -> *)
+         | ">>" -> prio_flatop,L,0
+         | "|!" | "|>" -> prio_flatop,T,0
+         | _ -> 60,L,config.i_base)
     | EQUAL | LESS | GREATER -> 60,L,config.i_base
     | INFIXOP1 _ -> 70,L,config.i_base
     | COLONCOLON -> 80,L,config.i_base
   in
   let make_infix token path =
     let op_prio, align, indent = op_prio_align_indent token in
-    match unwind_while (fun k -> prio k >= op_prio) path with
-    | Some p ->
-        extend (KExpr op_prio) align ~pad:indent p
-    | None -> (* used as prefix ? Don't apply T indent *)
-        append (KExpr op_prio) L ~pad:(max 0 indent) path
+    match path with
+    | {k=KExpr prio}::_ when prio >= op_prio && prio < prio_max ->
+        (* we are just after another operator (should be an atom).
+           handle as unary (eg. x + -y) : indented but no effect
+           on following expressions *)
+        (* append KNone L path *)
+        append (KExpr prio) L ~pad:(max 0 indent) path
+    | _ ->
+        match unwind_while (fun k -> prio k >= op_prio) path with
+        | Some p ->
+            extend (KExpr op_prio) align ~pad:indent p
+        | None -> (* used as prefix ? Don't apply T indent *)
+            append (KExpr op_prio) L ~pad:(max 0 indent) path
   in
   (* KNone nodes correspond to comments or top-level stuff, they shouldn't be
      taken into account when indenting the next token *)
+  let t0 = t in
   let t = match t.path with {k=KNone}::path -> {t with path}
-    | _ -> t
+                          | _ -> t
   in
   match tok.token with
   | SEMISEMI    -> append KNone L ~pad:0 (unwind_top t.path)
       open_paren KBrace t.path
   | FUNCTION ->
       (match fold_expr t.path with
-      | {k = KBody (KLet|KLetIn) | KArrow(KMatch|KTry)} as l :: _ as p
-        when not starts_line ->
-          append (KWith KMatch) L ~pad:(max l.pad config.i_with) p
-      | p ->
-          append (KWith KMatch) L ~pad:config.i_with p)
+       | l :: _ as p when not starts_line && not config.i_with_never ->
+           append (KWith KMatch) L ~pad:(max l.pad config.i_with) p
+       | p ->
+           append (KWith KMatch) L ~pad:config.i_with p)
   | FUN | FUNCTOR ->
       (match t.path with
-      | {k=KArrow KFun}::p ->
-          replace KFun L (unwind (function KFun -> true | _ -> false) p)
-      | p -> append KFun L (fold_expr p))
+       | {k=KArrow KFun}::p ->
+           replace KFun L (unwind (function KFun -> true | _ -> false) p)
+       | p -> append KFun L (fold_expr p))
   | STRUCT ->
       append KStruct L  (Path.maptop (fun n -> {n with pad=0}) t.path)
   | WHEN ->
-      append KWhen L ~pad:(config.i_base + 2)
+      append KWhen L ~pad:(config.i_base + if starts_line then 0 else 2)
         (unwind (function
-        | KWith(KTry|KMatch) | KBar(KTry|KMatch) | KFun -> true
-        | _ -> false)
+           | KWith(KTry|KMatch) | KBar(KTry|KMatch) | KFun -> true
+           | _ -> false)
            t.path)
   | SIG ->
       append KSig L (Path.maptop (fun n -> {n with pad=0}) t.path)
          is the most robust (for example w.r.t. unfinished expressions) *)
       (* - it's a top Let if it is after a closed expression *)
       (match t.path with
-      | {k=KExpr i}::p when i = prio_max ->
-          append KLet L (unwind_top p)
-      | {k=KNone}::_ | [] ->
-          append KLet L []
-      | _ ->
-          append KLetIn L (fold_expr t.path))
+       | {k=KExpr i}::p when i = prio_max ->
+           append KLet L (unwind_top p)
+       | {k=KNone}::_ | [] ->
+           append KLet L []
+       | _ ->
+           append KLetIn L (fold_expr t.path))
       (* - or if after a specific token *)
       (* if close_top_let t.last then *)
       (*   append KLet L config.i_base (unwind_top t.path) *)
 
   | CONSTRAINT ->
       let path =
-        unwind (function KType | KBody KType | KObject -> true | _ -> false) t.path
+        unwind
+          (function KType | KBody KType | KObject -> true | _ -> false)
+          t.path
       in
       append KLet L path
 
         | _ -> false
       in let path = unwind (unwind_to @* follow) t.path in
       (match path with
-      | {k=KType|KModule|KBody (KType|KModule)}