Source

erlint / src / formatter.ml

open Token

module Po = Program_options

module R = Return

type error = { line_number : int
	     ; msg         : string
	     }

type error_t =
  | Forced_line_break
  | Msg of string

type warning = error

type parser_state =
  | Top_level
  | Function

type state = { program_options   : Program_options.t
	     (* The number of indentations, not the number of spaces *)
	     ; indentation_level : int
	     (* This is the current line we are reading, starting at 0 *)
	     ; src_line_number   : int
	     (* This is current line number we are writing, starting at 0 *)
	     ; dst_line_number   : int
	     ; parser_state      : parser_state list
	     ; line_builder      : Format_builder.t
	     ; builder           : Format_builder.t
	     ; warnings          : warning list
	     }


let default_state program_options buffer =
  { program_options   = program_options
  ; indentation_level = 0
  ; src_line_number   = 0
  ; dst_line_number   = 0
  ; parser_state      = [Top_level]
  ; line_builder      = Format_builder.create ()
  ; builder           = Format_builder.create ()
  ; warnings          = []
  }


let string_of_token = function
  | Token.Keyword kwd -> kwd
  | Token.Atom atm    -> atm
  | Token.Var var     -> var
  | Token.Char char   -> char
  | Token.Number num  -> num
  | Token.String str  -> str
  | Token.Comment com -> "%" ^ com
  | Token.Newline     -> "\n"
  | Token.Dot         -> "."

let incr ?(step = 1) num = num + step

let incr_src_line state =
  { state with src_line_number = incr state.src_line_number }

let incr_dst_line state =
  { state with
    dst_line_number = incr state.dst_line_number;
    builder = Format_builder.add_token state.builder Newline
  }

(*
 * Kind of lame I'm naming this bind but this is effectively
 * a monad-like thing so might as well stick to the literature?
 *)
let bind f = function
  | R.Success (state, rest) ->
    f state rest
  | R.Failure error ->
    R.Failure error

let fail state msg =
  R.Failure { line_number = state.src_line_number
	    ; msg = msg
	    }

let add_warning state msg =
  { state with
    warnings = { line_number = state.dst_line_number
	       ; msg = msg
	       }::state.warnings
  }

let add_token state token =
  { state with
    line_builder =
      Format_builder.add_token
	state.line_builder
	token
  }

let add_tokens state tokens =
  { state with
    line_builder =
      Format_builder.add_tokens
	state.line_builder
	tokens
  }

let add_space ?(num = 1) state =
  { state with
    line_builder =
      Format_builder.add_space
	state.line_builder
	num
  }


let append_line state =
  { state with
    builder =
      (Format_builder.append
	 state.builder
	 state.line_builder);
    line_builder = Format_builder.create ()
  }

let is_line_too_long state =
  (String.length
     (Format_builder.to_string
	state.line_builder
	string_of_token)
   > state.program_options.Po.max_line_length)

let succeed state rest =
  R.Success (state, rest)

let rec format_top_level state = function
  | Newline::xs ->
    let state' = append_line state in
    format_top_level (incr_dst_line (incr_src_line state')) xs
  | (Comment text)::xs ->
    let state =
      add_token
	state
	(Comment text)
    in
    if is_line_too_long state then begin
      format_top_level
	(add_warning state "Comment longer than maximum line length")
	xs
    end
    else
      format_top_level state xs
  | (Keyword "-")::(Atom "module")::xs ->
    bind
      format_top_level
      (format_module
	 (add_tokens
	    state
	    [Keyword "-"; Atom "module"])
	 xs)
  | (Keyword "-")::(Atom "export")::xs ->
    bind
      format_top_level
      (format_export
	 (add_tokens
	    state
	    [Keyword "-"; Atom "export"])
	 xs)
  | [] ->
    succeed (append_line state) []
  (* TODO: This needs to be removed at some point *)
  | _::xs -> format_top_level state xs
and format_module state = function
  | (Keyword "(")::(Atom mod_name)::(Keyword ")")::Dot::xs ->
    succeed
      (add_tokens
	 state
	 [Keyword "("; Atom mod_name; Keyword ")"; Dot])
      xs
  | _ ->
    fail state "Unknown -module declaration"
and format_export state = function
  | (Keyword "(")::(Keyword "[")::xs -> begin
    let state =
      add_tokens
	state
	[Keyword "("; Keyword "["]
    in
    match format_list_oneline state xs with
      | R.Success (state, rest) ->
	bind
	  format_top_level
	  (format_export_end
	     state
	     rest)
      | R.Failure Forced_line_break ->
	bind
	  format_top_level
	  (format_list_multiline
	     state
	     xs)
      | R.Failure (Msg msg) ->
	fail state msg
  end
  | _ ->
    fail state "Unknown -export declaration"
and format_export_end state = function
  | (Keyword ")")::Dot::xs ->
    succeed
      (add_tokens
	 state
	 [(Keyword ")"); Dot])
      xs
  | _ ->
    fail state "Unexpected tokens in -export"
and format_list_oneline state = function
    | (Keyword "]")::xs ->
      let state =
	add_token
	  state
	  (Keyword "]")
      in
      if is_line_too_long state then
	R.Failure Forced_line_break
      else
	succeed
	  state
	  xs
    | (Keyword ",")::xs ->
      format_list_oneline
	(add_space
	   (add_token
	      state
	      (Keyword ",")))
	xs
    | (Comment _)::_
    | Newline::_ ->
      R.Failure Forced_line_break
    | x::xs ->
      format_list_oneline
	(add_token
	   state
	   x)
	xs
    | [] ->
      R.Failure (Msg "Unexpected end of input")
and format_list_multiline state = function
    | _ -> fail state "Not implemented"

let format_code program_options code =
  (*
   * Initiate the buffer at the number of tokens * 10, just
   * a reasonable round number
   *)
  let buffer = Buffer.create (List.length code * 10) in
  let state = default_state program_options buffer in
  match format_top_level state code with
    | R.Success (state, []) ->
      let code =
	Format_builder.to_string
	  state.builder
	  string_of_token
      in
      R.Success (code, state.warnings)
    | R.Success (state, _) ->
      fail state "Did not consume entire input"
    | R.Failure error ->
      R.Failure error

let format_to_channel program_options out_chan code =
  match format_code program_options code with
    | R.Success (formatted_code, warnings) -> begin
      output_string out_chan formatted_code;
      R.Success warnings
    end
    | R.Failure errors ->
      R.Failure errors