Source

ocaml-indent / main.ml

open Sexplib.Conv

let parse_args () = 
  let rev_paths = ref [] in
  Arg.parse [] (fun s -> rev_paths := s :: !rev_paths) "indent paths";
  List.rev !rev_paths

let paths = parse_args ()

module Position = struct
  open Sexplib.Conv

  type t = Lexing.position =  {
    pos_fname : string;
    pos_lnum : int;
    pos_bol : int;
    pos_cnum : int;
  } with sexp

  let to_string t = 
    Printf.sprintf "%s%d:%d" 
      (if t.pos_fname = "" then "" else t.pos_fname ^ ":")
      t.pos_lnum 
      (t.pos_cnum - t.pos_bol)

  let zero = { pos_fname = "";
               pos_lnum = 1;
               pos_bol = 0;
               pos_cnum = 0 }

  let columns p = p.pos_cnum - p.pos_bol
end

module Region = struct
  open Position
  type t = Position.t * Position.t
  let lnum (p,_) = p.pos_lnum
  let columns (p,_) = columns p
  let zero = (Position.zero, Position.zero)
  let move_chars diff (p,p') = 
    { p  with pos_cnum = p .pos_cnum + diff },
    { p' with pos_cnum = p'.pos_cnum + diff } 
end

type kind = [ `TOPLET | `LET | `LPAREN | `LBRACE | `STRUCT | `SIG ] with sexp

type stack = (kind * int) list with sexp

let get_indent_chars = function
  | [] -> 0
  | (_,i)::_ -> i


open Parser

let pop f bases = 
  let rec pop = function
    | (token, _) :: bs when f token -> bs
    | (_, _) :: bs -> pop bs
    | [] -> bases
  in
  pop bases

(* the indent of a line is computed from:
   - the state of the last line
   - the first token of the line
*)
let indent bases ~prev t =
  match t with
  | RPAREN -> pop (function `LPAREN -> true | _ -> false) bases
  | RBRACE -> pop (function `LBRACE -> true | _ -> false) bases
  | END ->    pop (function `STRUCT | `SIG -> true | _ -> false) bases
  | IN ->     pop (function `LET    -> true | _ -> false) bases
  | LET ->
      begin match prev with
      | Some (LPAREN | LBRACE | BEGIN | IN | EQUAL)  (* .. *) -> 
          bases
      | _ -> 
          pop (function `TOPLET -> true | _ -> false) bases
      end
  | _ -> bases

(* 
   bases : If [t] is at the head of the line, [bases] must be updated by [indent]
   region : With indentation fix. It may not the region from the source.
*)

let token bases ~prev t fixed_region current_indent =
  let _columns = Region.columns fixed_region in

  match t with
  | LPAREN -> (`LPAREN, current_indent + 2) :: bases
  | LBRACE -> (`LBRACE, current_indent + 2) :: bases
  | STRUCT -> (`STRUCT, current_indent + 2) :: bases
  | SIG    -> (`SIG, current_indent + 2) :: bases
  | LET -> 
      let bases = indent bases ~prev t in
      begin match prev with
      | Some (LPAREN | LBRACE | BEGIN | IN | EQUAL) (* .. *) -> 
          (`LET, current_indent + 2) :: bases
      | _ -> 
          (`TOPLET, current_indent + 2) :: bases
      end
  | _ -> indent bases ~prev t
;;

module Lexbuf = struct

  type t = {
    mutable buf : Buffer.t;
    mutable pos : int;
  }

  let create () = { buf = Buffer.create 10; pos = 0 }

  let add_substring t s offset len = 
    Buffer.add_substring t.buf s offset len

  let substring t offset len = 
    let offset = offset - t.pos in
    if offset < 0 then assert false;
    try
      Buffer.sub t.buf offset len
    with
    | e -> Format.eprintf "Buffer.sub %S %d %d@." (Buffer.contents t.buf) offset len; raise e

  let forget_before t pos =
    assert (t.pos <= pos);
    let removing = pos - t.pos in
    if removing = 0 then ()
    else begin
      let content = Buffer.sub t.buf removing (Buffer.length t.buf - removing) in
      let buf = Buffer.create (String.length content) in
      Buffer.add_string buf content;
      t.buf <- buf;
      t.pos <- pos
    end
end

module LexReader : sig
  type t
  val create_from_channel : in_channel -> t
  val lex : t -> (Lexing.lexbuf -> 'a) -> 'a
  val substring : t -> int -> int -> string
  val current_substring : t -> string
  val region : t -> Position.t * Position.t
end = struct
  open Lexbuf

  type t = Lexbuf.t * Lexing.lexbuf

  let create_from_channel ic = 
    let buf = Lexbuf.create () in
    let f s n = 
      let read_bytes = input ic s 0 n in
      Lexbuf.add_substring buf s 0 read_bytes;
      read_bytes
    in
    buf, Lexing.from_function f

  let lex (_,lexbuf) f = f lexbuf

  let substring (buf, _) = Lexbuf.substring buf

  let current_substring (buf, lexbuf) = 
    Lexbuf.substring buf
      lexbuf.Lexing.lex_start_p.Position.pos_cnum
      (lexbuf.Lexing.lex_curr_p.Position.pos_cnum - 
         lexbuf.Lexing.lex_start_p.Position.pos_cnum)

  let region (_, lexbuf) = lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p
end

type state = {
  bases : stack; (** indentation stack *)
  last_orig_region : Region.t;            (** the last token's region *)
  orig_indent : int;  
  prev_token : Parser.token option
}

let _ = 
  List.iter (fun path ->
    let ic = open_in path in
    try
      let reader = LexReader.create_from_channel ic in
      let rec loop state = 
        match LexReader.lex reader Lexer.token with
        | EOF -> ()
        | t -> 
            (* region from the source *)
            let orig_region = LexReader.region reader in

            (* space between the last token and the current *)
            let space_between = 
              let last_end = (snd state.last_orig_region).Position.pos_cnum in
              LexReader.substring 
                reader last_end ((fst orig_region).Position.pos_cnum - last_end)
            in

            (* token's string *)
            let substr = LexReader.current_substring reader in

            let new_line = Region.lnum state.last_orig_region <>  Region.lnum orig_region in

            (* original indentation *)
            let orig_indent = if new_line then Region.columns orig_region else state.orig_indent in

            (* the first token of the line may change the indentation of the line *)
            let tmp_bases = 
              if Region.lnum state.last_orig_region <>  Region.lnum orig_region then
                indent state.bases ~prev:state.prev_token t 
              else state.bases
            in

            let indent = get_indent_chars tmp_bases in
            
            (* fixed region *)
            let fixed_region = Region.move_chars (indent - orig_indent) orig_region in

            let bases = token tmp_bases ~prev:state.prev_token t fixed_region indent in
            
            let state' = 
              { bases = bases;
                last_orig_region = orig_region; (* or just line number ? *)
                orig_indent = orig_indent;
                prev_token = Some t
              }
            in

            if new_line then begin
              let space_between = 
                try 
                  let pos = String.rindex space_between '\n' in 
                  String.sub space_between 0 pos
                with
                | _ -> "XXX"
              in

              Printf.printf "%s\n" space_between;

              print_string (String.make indent ' ');
              Format.printf "(* %a *)@." Sexplib.Sexp.pp_hum (sexp_of_stack tmp_bases); 

              print_string (String.make indent ' ');
              Printf.printf "%s" substr;
            end else begin
              print_string space_between;
              Printf.printf "%s" substr;
            end;
            loop state'
      in
      loop { bases = []; last_orig_region = Region.zero; orig_indent = 0; prev_token = None } 

    with
    | e -> 
        close_in ic;
        raise e
  ) paths