Commits

camlspotter committed 4aed057

update

  • Participants
  • Parent commits 5a65f80

Comments (0)

Files changed (12)

     terminfo
     warnings
     location
+    pos
     parser
+    token
     lexer
+    reader
     main
 
 lexer.cmi: parser.cmi
 # 324 "lexer.mll"
       ( comment_start_loc := [Location.curr lexbuf];
         comment lexbuf;
-        token lexbuf )
+        (* token lexbuf *) )
 # 1321 "lexer.ml"
 
   | 23 ->
         Location.prerr_warning loc Warnings.Comment_start;
         comment_start_loc := [Location.curr lexbuf];
         comment lexbuf;
-        token lexbuf
+        (* token lexbuf *)
       )
 # 1331 "lexer.ml"
 
 # 417 "lexer.mll"
       ( match !comment_start_loc with
         | [] -> assert false
-        | [x] -> comment_start_loc := [];
+        | [x] -> comment_start_loc := []; lexbuf.lex_start_p <- x.Location.loc_start; COMMENT
         | _ :: l -> comment_start_loc := l;
                     comment lexbuf;
        )
   | "(*"
       { comment_start_loc := [Location.curr lexbuf];
         comment lexbuf;
-        token lexbuf }
+        (* token lexbuf *) }
   | "(*)"
       { let loc = Location.curr lexbuf in
         Location.prerr_warning loc Warnings.Comment_start;
         comment_start_loc := [Location.curr lexbuf];
         comment lexbuf;
-        token lexbuf
+        (* token lexbuf *)
       }
   | "*)"
       { let loc = Location.curr lexbuf in
   | "*)"
       { match !comment_start_loc with
         | [] -> assert false
-        | [x] -> comment_start_loc := [];
+        | [x] -> comment_start_loc := []; lexbuf.lex_start_p <- x.Location.loc_start; COMMENT
         | _ :: l -> comment_start_loc := l;
                     comment lexbuf;
        }
 open Sexplib.Conv
+open Pos
+open Reader
 
 let parse_args () = 
   let rev_paths = ref [] in
-  Arg.parse [] (fun s -> rev_paths := s :: !rev_paths) "indent paths";
-  List.rev !rev_paths
+  let debug = ref false in
+  Arg.parse [
+    ("-debug", Arg.Set debug, "debugging")
+  ] (fun s -> rev_paths := s :: !rev_paths) "indent paths";
+  List.rev !rev_paths, !debug
 
-let paths = parse_args ()
+let paths, debug = parse_args ()
 
-module Position = struct
-  open Sexplib.Conv
-
-  type t = Lexing.position =  {
-    pos_fname : string;
-    pos_lnum : int;
-    pos_bol : int;
-    pos_cnum : int;
-  } with sexp
-
-  let to_string t = 
-    Printf.sprintf "%s%d:%d" 
-      (if t.pos_fname = "" then "" else t.pos_fname ^ ":")
-      t.pos_lnum 
-      (t.pos_cnum - t.pos_bol)
-
-  let zero = { pos_fname = "";
-               pos_lnum = 1;
-               pos_bol = 0;
-               pos_cnum = 0 }
-
-  let columns p = p.pos_cnum - p.pos_bol
-end
-
-module Region = struct
-  open Position
-  type t = Position.t * Position.t
-  let lnum (p,_) = p.pos_lnum
-  let columns (p,_) = columns p
-  let zero = (Position.zero, Position.zero)
-  let move_chars diff (p,p') = 
-    { p  with pos_cnum = p .pos_cnum + diff },
-    { p' with pos_cnum = p'.pos_cnum + diff } 
-end
-
-type kind = [ `TOPLET | `LET | `LPAREN | `LBRACE | `STRUCT | `SIG ] with sexp
+type kind = 
+  | K_TOPLET  (* top let *)
+  | K_LET     (* local let *)
+  | K_LPAREN 
+  | K_LBRACE 
+  | K_STRUCT 
+  | K_SIG 
+  | K_TRY 
+  | K_MATCH 
+  | K_TYPE 
+  | K_IF 
+  | K_THEN 
+  | K_ELSE 
+  | K_BEGIN 
+  | K_ARROW
+  | K_OPEN
+  | K_FUNCTION
+  | K_FUN
+  | K_WHILE
+  | K_FOR
+  | K_DO
+  | K_VAL
+  | K_WITH
+  | K_BAR
+  | K_NONE
+with sexp
 
 type stack = (kind * int) list with sexp
 
-let get_indent_chars = function
+let rec get_indent_chars = function
   | [] -> 0
-  | (_,i)::_ -> i
-
+  | (_,i)::bs -> i + get_indent_chars bs
 
 open Parser
 
 let pop f bases = 
-  let rec pop = function
-    | (token, _) :: bs when f token -> bs
-    | (_, _) :: bs -> pop bs
-    | [] -> bases
-  in
-  pop bases
+  try
+    let rec pop = function
+      | (token, _) :: bs when f token -> bs
+      | (_, _) :: bs -> pop bs
+      | [] -> []
+    in
+    pop bases
+  with
+  | Exit -> bases
 
-(* the indent of a line is computed from:
-   - the state of the last line
-   - the first token of the line
-*)
-let indent bases ~prev t =
-  match t with
-  | RPAREN -> pop (function `LPAREN -> true | _ -> false) bases
-  | RBRACE -> pop (function `LBRACE -> true | _ -> false) bases
-  | END ->    pop (function `STRUCT | `SIG -> true | _ -> false) bases
-  | IN ->     pop (function `LET    -> true | _ -> false) bases
-  | LET ->
-      begin match prev with
-      | Some (LPAREN | LBRACE | BEGIN | IN | EQUAL)  (* .. *) -> 
-          bases
-      | _ -> 
-          pop (function `TOPLET -> true | _ -> false) bases
-      end
-  | _ -> bases
+let pop' f bases = 
+  try
+    let rec pop = function
+      | (token, _) :: bs when f token -> bs, token
+      | (_, _) :: bs -> pop bs
+      | [] -> [], K_NONE
+    in
+    pop bases
+  with
+  | Exit -> bases, K_NONE
 
-(* 
-   bases : If [t] is at the head of the line, [bases] must be updated by [indent]
-   region : With indentation fix. It may not the region from the source.
-*)
+let pop_before f bases = 
+  try
+    let rec pop = function
+      | ((token, _) :: _ as bs) when f token -> bs
+      | (_, _) :: bs -> pop bs
+      | [] -> []
+    in
+    pop bases
+  with
+  | Exit -> bases
 
-let token bases ~prev t fixed_region current_indent =
-  let _columns = Region.columns fixed_region in
+let rec is_top_let = function
+  | None -> true
+  | Some t ->
+      match t with
+      | COMMENT -> assert false (* COMMENT must be skipped *)
+      | STRUCT | SEMISEMI -> true
 
-  match t with
-  | LPAREN -> (`LPAREN, current_indent + 2) :: bases
-  | LBRACE -> (`LBRACE, current_indent + 2) :: bases
-  | STRUCT -> (`STRUCT, current_indent + 2) :: bases
-  | SIG    -> (`SIG, current_indent + 2) :: bases
-  | LET -> 
-      let bases = indent bases ~prev t in
-      begin match prev with
-      | Some (LPAREN | LBRACE | BEGIN | IN | EQUAL) (* .. *) -> 
-          (`LET, current_indent + 2) :: bases
-      | _ -> 
-          (`TOPLET, current_indent + 2) :: bases
-      end
-  | _ -> indent bases ~prev t
+      | BEGIN | COLONEQUAL
+      | COMMA
+      | DO
+      | DOWNTO
+      | ELSE
+      | EQUAL
+      | IF
+      | IN
+      | INFIXOP0 _
+      | INFIXOP1 _
+      | INFIXOP2 _
+      | INFIXOP3 _
+      | INFIXOP4 _
+      | INITIALIZER
+      | LBRACE
+      | LBRACELESS
+      | LBRACKET
+      | LBRACKETBAR
+      | LBRACKETLESS
+      | LBRACKETGREATER
+      | LESSMINUS
+      | LPAREN
+      | MINUS
+      | MINUSDOT
+      | MINUSGREATER
+      | OR
+      | PLUS
+      | PLUSDOT
+      | PREFIXOP _
+      | STAR
+      | THEN
+      | TO
+      | TRY
+      | WHEN
+      | WHILE -> false
+
+      | _ -> true
 ;;
 
-module Lexbuf = struct
+let rec top bases = match bases with
+  | ((K_TOPLET | K_TYPE), _) :: bs -> bs
+  | ((K_STRUCT | K_SIG), _) :: _ -> bases
+  | _ :: bs -> top bs
+  | [] -> []
 
-  type t = {
-    mutable buf : Buffer.t;
-    mutable pos : int;
-  }
+let token bases ~prev = function
+  | EOF -> assert false
 
-  let create () = { buf = Buffer.create 10; pos = 0 }
+  | MINUSGREATER -> 
+      bases, 
+      (K_ARROW, 2) :: bases
+  | IF -> 
+      bases, 
+      (K_IF, 2) :: bases
+  | THEN -> 
+      let bases = pop ((=) K_IF) bases in
+      bases,
+      (K_THEN, 2) :: bases
+  | ELSE -> 
+      let bases = pop ((=) K_THEN) bases in
+      bases,
+      (K_ELSE, 2) :: bases
+  | LPAREN -> 
+      bases, 
+      (K_LPAREN, 2) :: bases
+  | RPAREN ->
+      let bases = pop ((=) K_LPAREN) bases in
+      bases, bases
+  | LBRACE -> 
+      bases,
+      (K_LBRACE, 2) :: bases
+  | RBRACE -> 
+      let bases = pop ((=) K_LBRACE) bases in
+      bases, bases
+  | BEGIN  -> 
+      bases,
+      (K_BEGIN, 2) :: bases
+  | END -> 
+      let bases = pop ((=) K_BEGIN) bases in
+      bases, bases
+  | TYPE -> 
+      let bases = top bases in
+      bases, 
+      (K_TYPE, 2) :: bases
+  | VAL ->
+      let bases = top bases in
+      bases,
+      (K_VAL, 2) :: bases
+  | OPEN -> 
+      let bases = top bases in
+      bases, 
+      (K_OPEN, 2) :: bases
+  | FUN -> 
+      bases, 
+      (K_FUN, 2) :: bases
+  | FUNCTION -> 
+      bases, 
+      (K_FUNCTION, 2) :: bases
+  | BAR ->
+      (* BAR rewind just before WITH/FUNCTION but cannot exceed LPAREN *)
+      let bases, k = pop' (function K_WITH | K_FUNCTION | K_LPAREN -> true | _ -> false) bases in
+      bases, (k, 2) :: bases
+  | STRUCT -> 
+      bases, 
+      (K_STRUCT, 2) :: bases
+  | SIG  -> 
+      bases, 
+      (K_SIG, 2) :: bases
+  | TRY -> 
+      bases,
+      (K_TRY, 2) :: bases
+  | MATCH ->
+      bases,
+      (K_MATCH, 2) :: bases
+  | WITH ->
+      (* typedesc support *)
+      let bases = pop (function K_TRY | K_MATCH  -> true | K_TYPE -> raise Exit | _ -> false) bases in
+      bases,
+      (K_WITH, 2) :: bases
+  | LET when is_top_let prev -> 
+      let bases = top bases in
+      bases,
+      (K_TOPLET, 2) :: bases
+  | LET -> 
+      bases,
+      (K_LET, 2) :: bases
+  | IN -> 
+      let bases = pop ((=) K_LET) bases in
+      bases,
+      bases
+  | SEMISEMI -> 
+      let bases = top bases in
+      bases, bases
+  | WHILE ->
+      bases,
+      (K_WHILE, 2) :: bases
+  | DO ->
+      let bases = pop (function K_WHILE | K_FOR -> true | _ -> false) bases in
+      bases, 
+      (K_DO, 2) :: bases
+  | AND ->
+      let bases = pop_before (function K_LET | K_TYPE -> true | _ -> false) bases in
+      List.tl bases, bases
+      
+  |UIDENT _|STRING _|PREFIXOP _|OPTLABEL _|NATIVEINT _|LIDENT _|ASSERT|LABEL _
+  |INT64 _|INT32 _|INT _
+  |FLOAT _|CHAR _
 
-  let add_substring t s offset len = 
-    Buffer.add_substring t.buf s offset len
+  | INFIXOP4 _| INFIXOP3 _| INFIXOP2 _| INFIXOP1 _| INFIXOP0 _
+      
+  |COMMENT
 
-  let substring t offset len = 
-    let offset = offset - t.pos in
-    if offset < 0 then assert false;
-    try
-      Buffer.sub t.buf offset len
-    with
-    | e -> Format.eprintf "Buffer.sub %S %d %d@." (Buffer.contents t.buf) offset len; raise e
+  |WHEN|VIRTUAL|UNDERSCORE|TRUE|TO|TILDE|STAR
+  |SHARP|SEMI|REC|RBRACKET|QUOTE|QUESTIONQUESTION|QUESTION|PRIVATE|PLUSDOT|PLUS
+  |OR|OF|OBJECT|NEW|MUTABLE|MODULE|MINUSDOT|MINUS|METHOD|LESSMINUS|LESS
+  |LBRACKETGREATER|LBRACKETLESS|LBRACKETBAR|LBRACKET|LBRACELESS|LAZY
+  |INITIALIZER|INHERIT|INCLUDE|GREATERRBRACKET|GREATERRBRACE|GREATER|FUNCTOR
+  |FOR|FALSE|EXTERNAL|EXCEPTION|EQUAL|DOWNTO|DOTDOT|DOT|DONE|CONSTRAINT
+  |COMMA|COLONGREATER|COLONEQUAL|COLONCOLON|COLON|CLASS|BARRBRACKET|BARBAR|BANG
+  |BACKQUOTE|AS|AMPERSAND|AMPERAMPER -> bases, bases
 
-  let forget_before t pos =
-    assert (t.pos <= pos);
-    let removing = pos - t.pos in
-    if removing = 0 then ()
-    else begin
-      let content = Buffer.sub t.buf removing (Buffer.length t.buf - removing) in
-      let buf = Buffer.create (String.length content) in
-      Buffer.add_string buf content;
-      t.buf <- buf;
-      t.pos <- pos
-    end
-end
-
-module LexReader : sig
-  type t
-  val create_from_channel : in_channel -> t
-  val lex : t -> (Lexing.lexbuf -> 'a) -> 'a
-  val substring : t -> int -> int -> string
-  val current_substring : t -> string
-  val region : t -> Position.t * Position.t
-end = struct
-  open Lexbuf
-
-  type t = Lexbuf.t * Lexing.lexbuf
-
-  let create_from_channel ic = 
-    let buf = Lexbuf.create () in
-    let f s n = 
-      let read_bytes = input ic s 0 n in
-      Lexbuf.add_substring buf s 0 read_bytes;
-      read_bytes
-    in
-    buf, Lexing.from_function f
-
-  let lex (_,lexbuf) f = f lexbuf
-
-  let substring (buf, _) = Lexbuf.substring buf
-
-  let current_substring (buf, lexbuf) = 
-    Lexbuf.substring buf
-      lexbuf.Lexing.lex_start_p.Position.pos_cnum
-      (lexbuf.Lexing.lex_curr_p.Position.pos_cnum - 
-         lexbuf.Lexing.lex_start_p.Position.pos_cnum)
-
-  let region (_, lexbuf) = lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p
-end
+(*
+  | _ -> bases, bases
+*)
+  
 
 type state = {
   bases : stack; (** indentation stack *)
   last_orig_region : Region.t;            (** the last token's region *)
   orig_indent : int;  
-  prev_token : Parser.token option
+  prev : Parser.token option
 }
 
 let _ = 
       let reader = LexReader.create_from_channel ic in
       let rec loop state = 
         match LexReader.lex reader Lexer.token with
-        | EOF -> ()
+        | EOF -> 
+            (* The last white space is gone *)
+            print_string "\n"
         | t -> 
             (* region from the source *)
             let orig_region = LexReader.region reader in
             let orig_indent = if new_line then Region.columns orig_region else state.orig_indent in
 
             (* the first token of the line may change the indentation of the line *)
-            let tmp_bases = 
-              if Region.lnum state.last_orig_region <>  Region.lnum orig_region then
-                indent state.bases ~prev:state.prev_token t 
-              else state.bases
-            in
+            let bases_pre, bases = token state.bases ~prev:state.prev t in
 
-            let indent = get_indent_chars tmp_bases in
-            
+(*
             (* fixed region *)
             let fixed_region = Region.move_chars (indent - orig_indent) orig_region in
 
-            let bases = token tmp_bases ~prev:state.prev_token t fixed_region indent in
+            let bases = token tmp_bases ~prevs:state.prevs t fixed_region indent in
+*)
             
             let state' = 
               { bases = bases;
                 last_orig_region = orig_region; (* or just line number ? *)
                 orig_indent = orig_indent;
-                prev_token = Some t
+                prev = if t <> COMMENT then Some t else state.prev (* COMMENT must be skipped *)
               }
             in
 
             if new_line then begin
+              let indent = get_indent_chars bases_pre in
               let space_between = 
                 try 
                   let pos = String.rindex space_between '\n' in 
 
               Printf.printf "%s\n" space_between;
 
-              print_string (String.make indent ' ');
-              Format.printf "(* %a *)@." Sexplib.Sexp.pp_hum (sexp_of_stack tmp_bases); 
+              if debug then begin
+                print_string (String.make indent ' ');
+                Format.printf "(* %s *)@." (Sexplib.Sexp.to_string_mach (sexp_of_stack bases_pre)); 
+              end;
 
               print_string (String.make indent ' ');
               Printf.printf "%s" substr;
             end;
             loop state'
       in
-      loop { bases = []; last_orig_region = Region.zero; orig_indent = 0; prev_token = None } 
+      loop { bases = []; last_orig_region = Region.zero; orig_indent = 0; prev = None } 
 
     with
     | e -> 
         close_in ic;
         raise e
   ) paths
+
   | WHEN
   | WHILE
   | WITH
+  | COMMENT
 
 with sexp
   | WHEN
   | WHILE
   | WITH
+  | COMMENT
 
 with sexp
+module Position = struct
+  open Sexplib.Conv
+
+  type t = Lexing.position =  {
+    pos_fname : string;
+    pos_lnum : int;
+    pos_bol : int;
+    pos_cnum : int;
+  } with sexp
+
+  let to_string t = 
+    Printf.sprintf "%s%d:%d" 
+      (if t.pos_fname = "" then "" else t.pos_fname ^ ":")
+      t.pos_lnum 
+      (t.pos_cnum - t.pos_bol)
+
+  let zero = { pos_fname = "";
+               pos_lnum = 1;
+               pos_bol = 0;
+               pos_cnum = 0 }
+
+  let columns p = p.pos_cnum - p.pos_bol
+end
+
+module Region = struct
+  open Position
+  type t = Position.t * Position.t with sexp
+  let lnum (p,_) = p.pos_lnum
+  let columns (p,_) = columns p
+  let zero = (Position.zero, Position.zero)
+  let move_chars diff (p,p') = 
+    { p  with pos_cnum = p .pos_cnum + diff },
+    { p' with pos_cnum = p'.pos_cnum + diff } 
+end
+open Pos
+
+module Lexbuf = struct
+
+  type t = {
+    mutable buf : Buffer.t;
+    mutable pos : int;
+  }
+
+  let create () = { buf = Buffer.create 10; pos = 0 }
+
+  let add_substring t s offset len = 
+    Buffer.add_substring t.buf s offset len
+
+  let substring t offset len = 
+    let offset = offset - t.pos in
+    if offset < 0 then assert false;
+    try
+      Buffer.sub t.buf offset len
+    with
+    | e -> Format.eprintf "Buffer.sub %S %d %d@." (Buffer.contents t.buf) offset len; raise e
+
+  let substring_of_region t (p1, p2) = 
+    substring t p1.Position.pos_cnum (p2.Position.pos_cnum - p1.Position.pos_cnum)
+
+  let forget_before t pos =
+    assert (t.pos <= pos);
+    let removing = pos - t.pos in
+    if removing = 0 then ()
+    else begin
+      let content = Buffer.sub t.buf removing (Buffer.length t.buf - removing) in
+      let buf = Buffer.create (String.length content) in
+      Buffer.add_string buf content;
+      t.buf <- buf;
+      t.pos <- pos
+    end
+end
+
+module LexReader : sig
+  type t
+  val create_from_channel : in_channel -> t
+  val lex : t -> (Lexing.lexbuf -> 'a) -> 'a
+  val substring : t -> int -> int -> string
+  val substring_of_region : t -> Region.t -> string
+  val current_substring : t -> string
+  val region : t -> Region.t
+end = struct
+  open Lexbuf
+
+  type t = Lexbuf.t * Lexing.lexbuf
+
+  let create_from_channel ic = 
+    let buf = Lexbuf.create () in
+    let f s n = 
+      let read_bytes = input ic s 0 n in
+      Lexbuf.add_substring buf s 0 read_bytes;
+      read_bytes
+    in
+    buf, Lexing.from_function f
+
+  let lex (_,lexbuf) f = f lexbuf
+
+  let substring (buf, _) = Lexbuf.substring buf
+  let substring_of_region (buf, _) = Lexbuf.substring_of_region buf
+
+  let current_substring (buf, lexbuf) = 
+    Lexbuf.substring buf
+      lexbuf.Lexing.lex_start_p.Position.pos_cnum
+      (lexbuf.Lexing.lex_curr_p.Position.pos_cnum - 
+         lexbuf.Lexing.lex_start_p.Position.pos_cnum)
+
+  (* It is not correct! *)
+  let region (_, lexbuf) = lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p
+end
+
+let f = function
+  | X -> 1
+  | X | Y -> 2
+  | X -> 1
+  | (X | Y) -> 3
+  | X -> 1
+  | Y -> 
+      1
+  | Y -> 1
+      2
+  | _ -> Z
+  | ( X 
+    | Y
+    | Z ) -> 4
+
+(* comment *)
+      
+let f = 
+  10 
+  + 2 -
+    3
+  + 4
+
+(* comment *)
+let x = 
+  let y = 1
+    + 2
+    - 3
+  and z = foo
+  in
+  y + z
+
+(* comment *)
+f 1 2 3 
+  4 5 6