Source

ocaml-indent / main.ml

The default branch has multiple heads

Full commit
open Sexplib.Conv
open Pos
open Reader

let parse_args () = 
  let rev_paths = ref [] in
  let debug = ref false in
  Arg.parse [
    ("-debug", Arg.Set debug, "debugging")
  ] (fun s -> rev_paths := s :: !rev_paths) "indent paths";
  List.rev !rev_paths, !debug

let paths, debug = parse_args ()

type kind = 
  | K_TOPLET  (* top let *)
  | K_LET     (* local let *)
  | K_LPAREN 
  | K_LBRACE 
  | K_STRUCT 
  | K_SIG 
  | K_TRY 
  | K_MATCH 
  | K_TYPE 
  | K_IF 
  | K_THEN 
  | K_ELSE 
  | K_BEGIN 
  | K_ARROW
  | K_OPEN
  | K_FUNCTION
  | K_FUN
  | K_WHILE
  | K_FOR
  | K_DO
  | K_VAL
  | K_WITH
  | K_BAR
  | K_NONE
with sexp

type stack = (kind * int) list with sexp

let rec get_indent_chars = function
  | [] -> 0
  | (_,i)::bs -> i + get_indent_chars bs

open Parser

let pop f bases = 
  try
    let rec pop = function
      | (token, _) :: bs when f token -> bs
      | (_, _) :: bs -> pop bs
      | [] -> []
    in
    pop bases
  with
  | Exit -> bases

let pop' f bases = 
  try
    let rec pop = function
      | (token, _) :: bs when f token -> bs, token
      | (_, _) :: bs -> pop bs
      | [] -> [], K_NONE
    in
    pop bases
  with
  | Exit -> bases, K_NONE

let pop_before f bases = 
  try
    let rec pop = function
      | ((token, _) :: _ as bs) when f token -> bs
      | (_, _) :: bs -> pop bs
      | [] -> []
    in
    pop bases
  with
  | Exit -> bases

let rec is_top_let = function
  | None -> true
  | Some t ->
      match t with
      | COMMENT -> assert false (* COMMENT must be skipped *)
      | STRUCT | SEMISEMI -> true

      | BEGIN | COLONEQUAL
      | COMMA
      | DO
      | DOWNTO
      | ELSE
      | EQUAL
      | IF
      | IN
      | INFIXOP0 _
      | INFIXOP1 _
      | INFIXOP2 _
      | INFIXOP3 _
      | INFIXOP4 _
      | INITIALIZER
      | LBRACE
      | LBRACELESS
      | LBRACKET
      | LBRACKETBAR
      | LBRACKETLESS
      | LBRACKETGREATER
      | LESSMINUS
      | LPAREN
      | MINUS
      | MINUSDOT
      | MINUSGREATER
      | OR
      | PLUS
      | PLUSDOT
      | PREFIXOP _
      | STAR
      | THEN
      | TO
      | TRY
      | WHEN
      | WHILE -> false

      | _ -> true
;;

let rec top bases = match bases with
  | ((K_TOPLET | K_TYPE), _) :: bs -> bs
  | ((K_STRUCT | K_SIG), _) :: _ -> bases
  | _ :: bs -> top bs
  | [] -> []

let token bases ~prev = function
  | EOF -> assert false

  | MINUSGREATER -> 
      bases, 
      (K_ARROW, 2) :: bases
  | IF -> 
      bases, 
      (K_IF, 2) :: bases
  | THEN -> 
      let bases = pop ((=) K_IF) bases in
      bases,
      (K_THEN, 2) :: bases
  | ELSE -> 
      let bases = pop ((=) K_THEN) bases in
      bases,
      (K_ELSE, 2) :: bases
  | LPAREN -> 
      bases, 
      (K_LPAREN, 2) :: bases
  | RPAREN ->
      let bases = pop ((=) K_LPAREN) bases in
      bases, bases
  | LBRACE -> 
      bases,
      (K_LBRACE, 2) :: bases
  | RBRACE -> 
      let bases = pop ((=) K_LBRACE) bases in
      bases, bases
  | BEGIN  -> 
      bases,
      (K_BEGIN, 2) :: bases
  | END -> 
      let bases = pop ((=) K_BEGIN) bases in
      bases, bases
  | TYPE -> 
      let bases = top bases in
      bases, 
      (K_TYPE, 2) :: bases
  | VAL ->
      let bases = top bases in
      bases,
      (K_VAL, 2) :: bases
  | OPEN -> 
      let bases = top bases in
      bases, 
      (K_OPEN, 2) :: bases
  | FUN -> 
      bases, 
      (K_FUN, 2) :: bases
  | FUNCTION -> 
      bases, 
      (K_FUNCTION, 2) :: bases
  | BAR ->
      (* BAR rewind just before WITH/FUNCTION but cannot exceed LPAREN *)
      let bases, k = pop' (function K_WITH | K_FUNCTION | K_LPAREN -> true | _ -> false) bases in
      bases, (k, 2) :: bases
  | STRUCT -> 
      bases, 
      (K_STRUCT, 2) :: bases
  | SIG  -> 
      bases, 
      (K_SIG, 2) :: bases
  | TRY -> 
      bases,
      (K_TRY, 2) :: bases
  | MATCH ->
      bases,
      (K_MATCH, 2) :: bases
  | WITH ->
      (* typedesc support *)
      let bases = pop (function K_TRY | K_MATCH  -> true | K_TYPE -> raise Exit | _ -> false) bases in
      bases,
      (K_WITH, 2) :: bases
  | LET when is_top_let prev -> 
      let bases = top bases in
      bases,
      (K_TOPLET, 2) :: bases
  | LET -> 
      bases,
      (K_LET, 2) :: bases
  | IN -> 
      let bases = pop ((=) K_LET) bases in
      bases,
      bases
  | SEMISEMI -> 
      let bases = top bases in
      bases, bases
  | WHILE ->
      bases,
      (K_WHILE, 2) :: bases
  | DO ->
      let bases = pop (function K_WHILE | K_FOR -> true | _ -> false) bases in
      bases, 
      (K_DO, 2) :: bases
  | AND ->
      let bases = pop_before (function K_LET | K_TYPE -> true | _ -> false) bases in
      List.tl bases, bases
      
  |UIDENT _|STRING _|PREFIXOP _|OPTLABEL _|NATIVEINT _|LIDENT _|ASSERT|LABEL _
  |INT64 _|INT32 _|INT _
  |FLOAT _|CHAR _

  | INFIXOP4 _| INFIXOP3 _| INFIXOP2 _| INFIXOP1 _| INFIXOP0 _
      
  |COMMENT

  |WHEN|VIRTUAL|UNDERSCORE|TRUE|TO|TILDE|STAR
  |SHARP|SEMI|REC|RBRACKET|QUOTE|QUESTIONQUESTION|QUESTION|PRIVATE|PLUSDOT|PLUS
  |OR|OF|OBJECT|NEW|MUTABLE|MODULE|MINUSDOT|MINUS|METHOD|LESSMINUS|LESS
  |LBRACKETGREATER|LBRACKETLESS|LBRACKETBAR|LBRACKET|LBRACELESS|LAZY
  |INITIALIZER|INHERIT|INCLUDE|GREATERRBRACKET|GREATERRBRACE|GREATER|FUNCTOR
  |FOR|FALSE|EXTERNAL|EXCEPTION|EQUAL|DOWNTO|DOTDOT|DOT|DONE|CONSTRAINT
  |COMMA|COLONGREATER|COLONEQUAL|COLONCOLON|COLON|CLASS|BARRBRACKET|BARBAR|BANG
  |BACKQUOTE|AS|AMPERSAND|AMPERAMPER -> bases, bases

(*
  | _ -> bases, bases
*)
  

type state = {
  bases : stack; (** indentation stack *)
  last_orig_region : Region.t;            (** the last token's region *)
  orig_indent : int;  
  prev : Parser.token option
}

let _ = 
  List.iter (fun path ->
    let ic = open_in path in
    try
      let reader = LexReader.create_from_channel ic in
      let rec loop state = 
        match LexReader.lex reader Lexer.token with
        | EOF -> 
            (* The last white space is gone *)
            print_string "\n"
        | t -> 
            (* region from the source *)
            let orig_region = LexReader.region reader in

            (* space between the last token and the current *)
            let space_between = 
              let last_end = (snd state.last_orig_region).Position.pos_cnum in
              LexReader.substring 
                reader last_end ((fst orig_region).Position.pos_cnum - last_end)
            in

            (* token's string *)
            let substr = LexReader.current_substring reader in

            let new_line = Region.lnum state.last_orig_region <>  Region.lnum orig_region in

            (* original indentation *)
            let orig_indent = if new_line then Region.columns orig_region else state.orig_indent in

            (* the first token of the line may change the indentation of the line *)
            let bases_pre, bases = token state.bases ~prev:state.prev t in

(*
            (* fixed region *)
            let fixed_region = Region.move_chars (indent - orig_indent) orig_region in

            let bases = token tmp_bases ~prevs:state.prevs t fixed_region indent in
*)
            
            let state' = 
              { bases = bases;
                last_orig_region = orig_region; (* or just line number ? *)
                orig_indent = orig_indent;
                prev = if t <> COMMENT then Some t else state.prev (* COMMENT must be skipped *)
              }
            in

            if new_line then begin
              let indent = get_indent_chars bases_pre in
              let space_between = 
                try 
                  let pos = String.rindex space_between '\n' in 
                  String.sub space_between 0 pos
                with
                | _ -> "XXX"
              in

              Printf.printf "%s\n" space_between;

              if debug then begin
                print_string (String.make indent ' ');
                Format.printf "(* %s *)@." (Sexplib.Sexp.to_string_mach (sexp_of_stack bases_pre)); 
              end;

              print_string (String.make indent ' ');
              Printf.printf "%s" substr;
            end else begin
              print_string space_between;
              Printf.printf "%s" substr;
            end;
            loop state'
      in
      loop { bases = []; last_orig_region = Region.zero; orig_indent = 0; prev = None } 

    with
    | e -> 
        close_in ic;
        raise e
  ) paths