Commits

camlspotter  committed 3e9a340

p4 quotes and some clean-up

  • Participants
  • Parent commits 1b6fc5b
  • Branches 4.01.0

Comments (0)

Files changed (14)

 .*~$
 tests_out/.*\.ml
 docs/_build
-^lexer\.ml$
+^xlexer\.ml$
 ^linenum\.ml$
 ^\.omake.*
 ocaml-indent
     reader
     tokenstr
     xparser
+    xlexer
     machine
     args
     main
+val paths : string list
+val debug : bool
+val lines : (int * int) option (** invariant: if [lines <> None] then [List.length paths <= 1] *)
+val showstate : bool
+val cursor : (int * int) option
+
       | COMMENT _ -> assert false (* COMMENT must be skipped *)
       | STRUCT | SEMISEMI
 
-      | UIDENT _|STRING _|OPTLABEL _|NATIVEINT _|LIDENT _|LABEL _|INT64 _|INT32 _
+      | UIDENT _|STRING _|P4QUOTE _|OPTLABEL _|NATIVEINT _|LIDENT _|LABEL _|INT64 _|INT32 _
       | INT _|FLOAT _|CHAR _|WITH|VIRTUAL|VAL|UNDERSCORE|TYPE|TRUE|TILDE|SIG|SHARP
       | RPAREN|REC|RBRACKET|RBRACE|QUOTE|QUESTION|PRIVATE|OPEN
       | OF|OBJECT|NEW|MUTABLE|MODULE|METHOD|MATCH|LET|LAZY|INHERIT|INCLUDE
         in
         f bases0
 
-    | INT64 _ | INT32 _ | INT _ | LIDENT _ | UIDENT _ | FLOAT _| CHAR _ | STRING _ | TRUE | FALSE ->
+    | INT64 _ | INT32 _ | INT _ | LIDENT _ | UIDENT _ | FLOAT _| CHAR _ | STRING _ | TRUE | FALSE | P4QUOTE _ ->
         let rec f bases = match bases with
           | { k = KExpr _ } :: _ -> bases0, bases0
           | [] -> bases0, { k = KExpr cols; indent = cols + 2; } :: bases0
 
 module Sexp = Sexplib.Sexp (* No open Sexplib, since Parser corrides with Sexplib.Parser *)
 
+module Parser = Xparser
+
 (** [l] is in the interested area or not? *)
 let check_line l = function
   | None -> `Inside
+module Position : sig
+  type t = Lexing.position = {
+    pos_fname : string;
+    pos_lnum : int;
+    pos_bol : int;
+    pos_cnum : int;
+  } with sexp
+
+  val to_string : t -> string
+  val zero : t
+  val columns : t -> int
+  val compare : t -> t -> int
+  val compare_lines_cols : t -> int * int -> int
+end
+
+module Region : sig
+  type t = Position.t * Position.t with sexp
+  
+  val lnum : Position.t * 'a -> int
+  val columns : Position.t * 'a -> int
+  val zero : Position.t * Position.t
+  val move_chars : int -> Position.t * Position.t -> Position.t * Position.t
+  val contain : Position.t * Position.t -> Position.t -> [> `In | `Out_left | `Out_right ]
+  val contain_lines_cols : Position.t * Position.t -> int * int -> [> `In | `Out_left | `Out_right ]
+end
   let substring_of_region t (p1, p2) = 
     substring t p1.Position.pos_cnum (p2.Position.pos_cnum - p1.Position.pos_cnum)
 
-  let forget_before t pos =
+  let _forget_before t pos =
     assert (t.pos <= pos);
     let removing = pos - t.pos in
     if removing = 0 then ()
   let region (_, lexbuf) = lexbuf.Lexing.lex_start_p, lexbuf.Lexing.lex_curr_p
 end
 
+include LexReader
+(** This module provides the same functionality of [Lexing.from_function] 
+    but the actual substring can be retrievable from the obtained tokens.
+*)
+
+type t (** The reader type *)
+
+val create_from_channel : in_channel -> t
+(** Creation from [in_channel] *)
+
+val lex : t -> (Lexing.lexbuf -> 'a) -> 'a
+(** Run a lexing function once over the reader *)
+
+val substring : t -> int -> int -> string
+(** [substring t pos len] returns the substring of [t] *)
+
+val substring_of_region : t -> Pos.Region.t -> string
+(** Same as [substring] but by [Pos.Region.t] *)
+
+val region : t -> Pos.Region.t
+(** Returns the reader's internal lexbuf's current region. *)
+
+val current_substring : t -> string
+(** Returns the reader's internal lexbuf's current region's substring. *)
+

File tests/else-in-next-line.ml

+let f x = if boo then bar
+  else zee

File tests/p4quote.ml

+let x = <<x>>
+let x = << \ >>
+let x = << >\> >>
 open Pos
-open Reader
 module Parser = Xparser
 open Parser
 
 let of_channel ic = 
   let icoptref = ref (Some ic) in
   try
-    let reader = LexReader.create_from_channel ic in
+    let reader = Reader.create_from_channel ic in
     let rec loop last_region = 
       let token = 
         try
-          LexReader.lex reader Lexer.token 
+          Reader.lex reader Xlexer.token 
         with
-        | Lexer.Error (e, _loc) ->
-            Format.eprintf "%a@." Lexer.report_error e;
+        | Xlexer.Error (e, _loc) ->
+            Format.eprintf "%a@." Xlexer.report_error e;
             assert false
       in
-      let region = LexReader.region reader in
+      let region = Reader.region reader in
       (* token's string *)
-      let substr = LexReader.current_substring reader in
+      let substr = Reader.current_substring reader in
 
       let space_between = 
         let last_end = (snd last_region).Position.pos_cnum in
-        LexReader.substring 
+        Reader.substring 
           reader last_end ((fst region).Position.pos_cnum - last_end)
       in
       let space_between_region = (snd last_region, fst region) in

File tokenstr.mli

   space : Pos.Region.t * string
 }
 
-val of_channel : in_channel -> Parser.token info t
+val of_channel : in_channel -> Xparser.token info t
 
-val of_path : string -> Parser.token info t
+val of_path : string -> Xparser.token info t
 
 val close : 'a t -> unit
 
+(***********************************************************************)
+(*                                                                     *)
+(*                                OCaml                                *)
+(*                                                                     *)
+(*            Xavier Leroy, projet Cristal, INRIA Rocquencourt         *)
+(*                                                                     *)
+(*  Copyright 1996 Institut National de Recherche en Informatique et   *)
+(*  en Automatique.  All rights reserved.  This file is distributed    *)
+(*  under the terms of the Q Public License version 1.0.               *)
+(*                                                                     *)
+(***********************************************************************)
+
+(* The lexer definition *)
+
+{
+open Lexing
+open Misc
+open Xparser
+
+type error =
+  | Illegal_character of char
+  | Illegal_escape of string
+  | Unterminated_comment of Location.t
+  | Unterminated_string
+  | Unterminated_string_in_comment of Location.t
+  | Keyword_as_label of string
+  | Literal_overflow of string
+  | Unterminated_p4_quotation
+;;
+
+exception Error of error * Location.t;;
+
+(* The table of keywords *)
+
+let keyword_table =
+  create_hashtable 149 [
+    "and", AND;
+    "as", AS;
+    "assert", ASSERT;
+    "begin", BEGIN;
+    "class", CLASS;
+    "constraint", CONSTRAINT;
+    "do", DO;
+    "done", DONE;
+    "downto", DOWNTO;
+    "else", ELSE;
+    "end", END;
+    "exception", EXCEPTION;
+    "external", EXTERNAL;
+    "false", FALSE;
+    "for", FOR;
+    "fun", FUN;
+    "function", FUNCTION;
+    "functor", FUNCTOR;
+    "if", IF;
+    "in", IN;
+    "include", INCLUDE;
+    "inherit", INHERIT;
+    "initializer", INITIALIZER;
+    "lazy", LAZY;
+    "let", LET;
+    "match", MATCH;
+    "method", METHOD;
+    "module", MODULE;
+    "mutable", MUTABLE;
+    "new", NEW;
+    "object", OBJECT;
+    "of", OF;
+    "open", OPEN;
+    "or", OR;
+(*  "parser", PARSER; *)
+    "private", PRIVATE;
+    "rec", REC;
+    "sig", SIG;
+    "struct", STRUCT;
+    "then", THEN;
+    "to", TO;
+    "true", TRUE;
+    "try", TRY;
+    "type", TYPE;
+    "val", VAL;
+    "virtual", VIRTUAL;
+    "when", WHEN;
+    "while", WHILE;
+    "with", WITH;
+
+    "mod", INFIXOP3("mod");
+    "land", INFIXOP3("land");
+    "lor", INFIXOP3("lor");
+    "lxor", INFIXOP3("lxor");
+    "lsl", INFIXOP4("lsl");
+    "lsr", INFIXOP4("lsr");
+    "asr", INFIXOP4("asr")
+]
+
+(* To buffer string literals *)
+
+let initial_string_buffer = String.create 256
+let string_buff = ref initial_string_buffer
+let string_index = ref 0
+
+let reset_string_buffer () =
+  string_buff := initial_string_buffer;
+  string_index := 0
+
+let store_string_char c =
+  if !string_index >= String.length (!string_buff) then begin
+    let new_buff = String.create (String.length (!string_buff) * 2) in
+      String.blit (!string_buff) 0 new_buff 0 (String.length (!string_buff));
+      string_buff := new_buff
+  end;
+  String.unsafe_set (!string_buff) (!string_index) c;
+  incr string_index
+
+let store_lexeme lexbuf =
+  let s = Lexing.lexeme lexbuf in
+  for i = 0 to String.length s - 1 do
+    store_string_char s.[i];
+  done
+
+let get_stored_string () =
+  let s = String.sub (!string_buff) 0 (!string_index) in
+  string_buff := initial_string_buffer;
+  s
+
+(* To store the position of the beginning of a string and comment *)
+let string_start_loc = ref Location.none;;
+let comment_start_loc = ref [];;
+let in_comment () = !comment_start_loc <> [];;
+let is_in_string = ref false
+let in_string () = !is_in_string
+let print_warnings = ref true
+
+(* To translate escape sequences *)
+
+let char_for_backslash = function
+  | 'n' -> '\010'
+  | 'r' -> '\013'
+  | 'b' -> '\008'
+  | 't' -> '\009'
+  | c   -> c
+
+let char_for_decimal_code lexbuf i =
+  let c = 100 * (Char.code(Lexing.lexeme_char lexbuf i) - 48) +
+           10 * (Char.code(Lexing.lexeme_char lexbuf (i+1)) - 48) +
+                (Char.code(Lexing.lexeme_char lexbuf (i+2)) - 48) in
+  if (c < 0 || c > 255) then
+    if in_comment ()
+    then 'x'
+    else raise (Error(Illegal_escape (Lexing.lexeme lexbuf),
+                      Location.curr lexbuf))
+  else Char.chr c
+
+let char_for_hexadecimal_code lexbuf i =
+  let d1 = Char.code (Lexing.lexeme_char lexbuf i) in
+  let val1 = if d1 >= 97 then d1 - 87
+             else if d1 >= 65 then d1 - 55
+             else d1 - 48
+  in
+  let d2 = Char.code (Lexing.lexeme_char lexbuf (i+1)) in
+  let val2 = if d2 >= 97 then d2 - 87
+             else if d2 >= 65 then d2 - 55
+             else d2 - 48
+  in
+  Char.chr (val1 * 16 + val2)
+
+(* To convert integer literals, allowing max_int + 1 (PR#4210) *)
+
+let cvt_int_literal s =
+  - int_of_string ("-" ^ s)
+let cvt_int32_literal s =
+  Int32.neg (Int32.of_string ("-" ^ String.sub s 0 (String.length s - 1)))
+let cvt_int64_literal s =
+  Int64.neg (Int64.of_string ("-" ^ String.sub s 0 (String.length s - 1)))
+let cvt_nativeint_literal s =
+  Nativeint.neg (Nativeint.of_string ("-" ^ String.sub s 0
+                                                       (String.length s - 1)))
+
+(* Remove underscores from float literals *)
+
+let remove_underscores s =
+  let l = String.length s in
+  let rec remove src dst =
+    if src >= l then
+      if dst >= l then s else String.sub s 0 dst
+    else
+      match s.[src] with
+        '_' -> remove (src + 1) dst
+      |  c  -> s.[dst] <- c; remove (src + 1) (dst + 1)
+  in remove 0 0
+
+(* recover the name from a LABEL or OPTLABEL token *)
+
+let get_label_name lexbuf =
+  let s = Lexing.lexeme lexbuf in
+  let name = String.sub s 1 (String.length s - 2) in
+  if Hashtbl.mem keyword_table name then
+    raise (Error(Keyword_as_label name, Location.curr lexbuf));
+  name
+;;
+
+(* Update the current location with file name and line number. *)
+
+let update_loc lexbuf file line absolute chars =
+  let pos = lexbuf.lex_curr_p in
+  let new_file = match file with
+                 | None -> pos.pos_fname
+                 | Some s -> s
+  in
+  lexbuf.lex_curr_p <- { pos with
+    pos_fname = new_file;
+    pos_lnum = if absolute then line else pos.pos_lnum + line;
+    pos_bol = pos.pos_cnum - chars;
+  }
+;;
+
+(* Warn about Latin-1 characters used in idents *)
+
+let warn_latin1 lexbuf =
+  Location.prerr_warning (Location.curr lexbuf)
+    (Warnings.Deprecated "ISO-Latin1 characters in identifiers")
+;;
+
+(* Error report *)
+
+open Format
+
+let report_error ppf = function
+  | Illegal_character c ->
+      fprintf ppf "Illegal character (%s)" (Char.escaped c)
+  | Illegal_escape s ->
+      fprintf ppf "Illegal backslash escape in string or character (%s)" s
+  | Unterminated_comment _ ->
+      fprintf ppf "Comment not terminated"
+  | Unterminated_string ->
+      fprintf ppf "String literal not terminated"
+  | Unterminated_string_in_comment _ ->
+      fprintf ppf "This comment contains an unterminated string literal"
+  | Keyword_as_label kwd ->
+      fprintf ppf "`%s' is a keyword, it cannot be used as label name" kwd
+  | Literal_overflow ty ->
+      fprintf ppf "Integer literal exceeds the range of representable \
+                   integers of type %s" ty
+  | Unterminated_p4_quotation ->
+      fprintf ppf "P4 quotation not terminated"
+;;
+
+(*
+  let move_start_p shift c = (* FIXME Please see PR#5820*)
+    let p = c.lexbuf.lex_start_p in
+    c.lexbuf.lex_start_p <- { (p) with pos_cnum = p.pos_cnum + shift }
+*)
+
+let is_p4 = ref true
+let in_quotation = ref false
+}
+
+let newline = ('\010' | "\013\010" )
+let blank = [' ' '\009' '\012']
+let lowercase = ['a'-'z' '_']
+let uppercase = ['A'-'Z']
+let identchar = ['A'-'Z' 'a'-'z' '_' '\'' '0'-'9']
+let lowercase_latin1 = ['a'-'z' '\223'-'\246' '\248'-'\255' '_']
+let uppercase_latin1 = ['A'-'Z' '\192'-'\214' '\216'-'\222']
+let identchar_latin1 =
+  ['A'-'Z' 'a'-'z' '_' '\192'-'\214' '\216'-'\246' '\248'-'\255' '\'' '0'-'9']
+let symbolchar =
+  ['!' '$' '%' '&' '*' '+' '-' '.' '/' ':' '<' '=' '>' '?' '@' '^' '|' '~']
+let decimal_literal =
+  ['0'-'9'] ['0'-'9' '_']*
+let hex_literal =
+  '0' ['x' 'X'] ['0'-'9' 'A'-'F' 'a'-'f']['0'-'9' 'A'-'F' 'a'-'f' '_']*
+let oct_literal =
+  '0' ['o' 'O'] ['0'-'7'] ['0'-'7' '_']*
+let bin_literal =
+  '0' ['b' 'B'] ['0'-'1'] ['0'-'1' '_']*
+let int_literal =
+  decimal_literal | hex_literal | oct_literal | bin_literal
+let float_literal =
+  ['0'-'9'] ['0'-'9' '_']*
+  ('.' ['0'-'9' '_']* )?
+  (['e' 'E'] ['+' '-']? ['0'-'9'] ['0'-'9' '_']*)?
+
+(* from P4's Lexer.mll *)
+let ident = (lowercase|uppercase) identchar*
+let locname = ident
+
+rule token = parse
+  | newline
+      { update_loc lexbuf None 1 false 0;
+        token lexbuf
+      }
+  | blank +
+      { token lexbuf }
+  | "_"
+      { UNDERSCORE }
+  | "~"
+      { TILDE }
+  | "~" lowercase identchar * ':'
+      { LABEL (get_label_name lexbuf) }
+  | "~" lowercase_latin1 identchar_latin1 * ':'
+      { warn_latin1 lexbuf; LABEL (get_label_name lexbuf) }
+  | "?"
+      { QUESTION }
+  | "?" lowercase identchar * ':'
+      { OPTLABEL (get_label_name lexbuf) }
+  | "?" lowercase_latin1 identchar_latin1 * ':'
+      { warn_latin1 lexbuf; OPTLABEL (get_label_name lexbuf) }
+  | lowercase identchar *
+      { let s = Lexing.lexeme lexbuf in
+        try Hashtbl.find keyword_table s
+        with Not_found -> LIDENT s }
+  | lowercase_latin1 identchar_latin1 *
+      { warn_latin1 lexbuf; LIDENT (Lexing.lexeme lexbuf) }
+  | uppercase identchar *
+      { UIDENT(Lexing.lexeme lexbuf) }       (* No capitalized keywords *)
+  | uppercase_latin1 identchar_latin1 *
+      { warn_latin1 lexbuf; UIDENT(Lexing.lexeme lexbuf) }
+  | int_literal
+      { try
+          INT (cvt_int_literal (Lexing.lexeme lexbuf))
+        with Failure _ ->
+          raise (Error(Literal_overflow "int", Location.curr lexbuf))
+      }
+  | float_literal
+      { FLOAT (remove_underscores(Lexing.lexeme lexbuf)) }
+  | int_literal "l"
+      { try
+          INT32 (cvt_int32_literal (Lexing.lexeme lexbuf))
+        with Failure _ ->
+          raise (Error(Literal_overflow "int32", Location.curr lexbuf)) }
+  | int_literal "L"
+      { try
+          INT64 (cvt_int64_literal (Lexing.lexeme lexbuf))
+        with Failure _ ->
+          raise (Error(Literal_overflow "int64", Location.curr lexbuf)) }
+  | int_literal "n"
+      { try
+          NATIVEINT (cvt_nativeint_literal (Lexing.lexeme lexbuf))
+        with Failure _ ->
+          raise (Error(Literal_overflow "nativeint", Location.curr lexbuf)) }
+  | "\""
+      { reset_string_buffer();
+        is_in_string := true;
+        let string_start = lexbuf.lex_start_p in
+        string_start_loc := Location.curr lexbuf;
+        string lexbuf;
+        is_in_string := false;
+        lexbuf.lex_start_p <- string_start;
+        STRING (get_stored_string()) }
+  | "'" newline "'"
+      { update_loc lexbuf None 1 false 1;
+        CHAR (Lexing.lexeme_char lexbuf 1) }
+  | "'" [^ '\\' '\'' '\010' '\013'] "'"
+      { CHAR(Lexing.lexeme_char lexbuf 1) }
+  | "'\\" ['\\' '\'' '"' 'n' 't' 'b' 'r' ' '] "'"
+      { CHAR(char_for_backslash (Lexing.lexeme_char lexbuf 2)) }
+  | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
+      { CHAR(char_for_decimal_code lexbuf 2) }
+  | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
+      { CHAR(char_for_hexadecimal_code lexbuf 3) }
+  | "'\\" _
+      { let l = Lexing.lexeme lexbuf in
+        let esc = String.sub l 1 (String.length l - 1) in
+        raise (Error(Illegal_escape esc, Location.curr lexbuf))
+      }
+  | "(*"
+      { let start_loc = Location.curr lexbuf  in
+        comment_start_loc := [start_loc];
+        reset_string_buffer ();
+        let end_loc = comment lexbuf in
+        let s = get_stored_string () in
+        reset_string_buffer ();
+        COMMENT (s, { start_loc with
+                      Location.loc_end = end_loc.Location.loc_end })
+      }
+  | "(*)"
+      { let loc = Location.curr lexbuf  in
+        if !print_warnings then
+          Location.prerr_warning loc Warnings.Comment_start;
+        comment_start_loc := [loc];
+        reset_string_buffer ();
+        let end_loc = comment lexbuf in
+        let s = get_stored_string () in
+        reset_string_buffer ();
+        COMMENT (s, { loc with Location.loc_end = end_loc.Location.loc_end })
+      }
+  | "*)"
+      { let loc = Location.curr lexbuf in
+        Location.prerr_warning loc Warnings.Comment_not_end;
+        lexbuf.Lexing.lex_curr_pos <- lexbuf.Lexing.lex_curr_pos - 1;
+        let curpos = lexbuf.lex_curr_p in
+        lexbuf.lex_curr_p <- { curpos with pos_cnum = curpos.pos_cnum - 1 };
+        STAR
+      }
+
+  | "#" [' ' '\t']* (['0'-'9']+ as num) [' ' '\t']*
+        ("\"" ([^ '\010' '\013' '"' ] * as name) "\"")?
+        [^ '\010' '\013'] * newline
+      { update_loc lexbuf name (int_of_string num) true 0;
+        token lexbuf
+      }
+  | "#"  { SHARP }
+  | "&"  { AMPERSAND }
+  | "&&" { AMPERAMPER }
+  | "`"  { BACKQUOTE }
+  | "'"  { QUOTE }
+  | "("  { LPAREN }
+  | ")"  { RPAREN }
+  | "*"  { STAR }
+  | ","  { COMMA }
+  | "->" { MINUSGREATER }
+  | "."  { DOT }
+  | ".." { DOTDOT }
+  | ":"  { COLON }
+  | "::" { COLONCOLON }
+  | ":=" { COLONEQUAL }
+  | ":>" { COLONGREATER }
+  | ";"  { SEMI }
+  | ";;" { SEMISEMI }
+  | "<"  { LESS }
+  | "<-" { LESSMINUS }
+  | "="  { EQUAL }
+  | "["  { LBRACKET }
+  | "[|" { LBRACKETBAR }
+  | "[<" { LBRACKETLESS }
+  | "[>" { LBRACKETGREATER }
+  | "]"  { RBRACKET }
+  | "{"  { LBRACE }
+  | "{<" { LBRACELESS }
+  | "|"  { BAR }
+  | "||" { BARBAR }
+  | "|]" { BARRBRACKET }
+  | ">"  { GREATER }
+  | ">]" { GREATERRBRACKET }
+  | "}"  { RBRACE }
+  | ">}" { GREATERRBRACE }
+  | "!"  { BANG }
+
+  | "!=" { INFIXOP0 "!=" }
+  | "+"  { PLUS }
+  | "+." { PLUSDOT }
+  | "-"  { MINUS }
+  | "-." { MINUSDOT }
+
+  | "!" symbolchar +
+            { PREFIXOP(Lexing.lexeme lexbuf) }
+  | ['~' '?'] symbolchar +
+            { PREFIXOP(Lexing.lexeme lexbuf) }
+
+  (* P4 special rules, which must become '<' *)
+  | "<<" symbolchar*
+      { 
+        if !is_p4 then begin
+          (* unwind and parse it as a quotation *)
+          lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - String.length (Lexing.lexeme lexbuf);
+          p4_quotation lexbuf
+        end else INFIXOP0(Lexing.lexeme lexbuf)
+      }
+
+  | "<" [ ':' '@' ]
+      { 
+        if !is_p4 then begin
+          (* unwind and parse it as a quotation *)
+          lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - String.length (Lexing.lexeme lexbuf);
+          p4_quotation lexbuf
+        end else INFIXOP0(Lexing.lexeme lexbuf)
+      }
+
+  | "$" symbolchar*
+      { 
+        if !in_quotation then begin
+          (* unwind and parse it as a quotation *)
+          lexbuf.lex_curr_pos <- lexbuf.lex_curr_pos - String.length (Lexing.lexeme lexbuf);
+          assert false
+        end else INFIXOP0(Lexing.lexeme lexbuf)
+      }
+
+  | ['=' '<' '>' '|' '&' '$'] symbolchar *
+            { INFIXOP0(Lexing.lexeme lexbuf) }
+  | ['@' '^'] symbolchar *
+            { INFIXOP1(Lexing.lexeme lexbuf) }
+  | ['+' '-'] symbolchar *
+            { INFIXOP2(Lexing.lexeme lexbuf) }
+  | "**" symbolchar *
+            { INFIXOP4(Lexing.lexeme lexbuf) }
+  | ['*' '/' '%'] symbolchar *
+            { INFIXOP3(Lexing.lexeme lexbuf) }
+  | eof { EOF }
+  | _
+      { raise (Error(Illegal_character (Lexing.lexeme_char lexbuf 0),
+                     Location.curr lexbuf))
+      }
+
+and comment = parse
+    "(*"
+      { comment_start_loc := (Location.curr lexbuf) :: !comment_start_loc;
+        store_lexeme lexbuf;
+        comment lexbuf;
+      }
+  | "*)"
+      { match !comment_start_loc with
+        | [] -> assert false
+        | [_] -> comment_start_loc := []; Location.curr lexbuf
+        | _ :: l -> comment_start_loc := l;
+                  store_lexeme lexbuf;
+                  comment lexbuf;
+       }
+  | "\""
+      {
+        string_start_loc := Location.curr lexbuf;
+        store_string_char '"';
+        is_in_string := true;
+        begin try string lexbuf
+        with Error (Unterminated_string, _) ->
+          match !comment_start_loc with
+          | [] -> assert false
+          | loc :: _ ->
+            let start = List.hd (List.rev !comment_start_loc) in
+            comment_start_loc := [];
+            raise (Error (Unterminated_string_in_comment start, loc))
+        end;
+        is_in_string := false;
+        store_string_char '"';
+        comment lexbuf }
+  | "''"
+      { store_lexeme lexbuf; comment lexbuf }
+  | "'" newline "'"
+      { update_loc lexbuf None 1 false 1;
+        store_lexeme lexbuf;
+        comment lexbuf
+      }
+  | "'" [^ '\\' '\'' '\010' '\013' ] "'"
+      { store_lexeme lexbuf; comment lexbuf }
+  | "'\\" ['\\' '"' '\'' 'n' 't' 'b' 'r' ' '] "'"
+      { store_lexeme lexbuf; comment lexbuf }
+  | "'\\" ['0'-'9'] ['0'-'9'] ['0'-'9'] "'"
+      { store_lexeme lexbuf; comment lexbuf }
+  | "'\\" 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F'] "'"
+      { store_lexeme lexbuf; comment lexbuf }
+  | eof
+      { match !comment_start_loc with
+        | [] -> assert false
+        | loc :: _ ->
+          let start = List.hd (List.rev !comment_start_loc) in
+          comment_start_loc := [];
+          raise (Error (Unterminated_comment start, loc))
+      }
+  | newline
+      { update_loc lexbuf None 1 false 0;
+        store_lexeme lexbuf;
+        comment lexbuf
+      }
+  | _
+      { store_lexeme lexbuf; comment lexbuf }
+
+and string = parse
+    '"'
+      { () }
+  | '\\' newline ([' ' '\t'] * as space)
+      { update_loc lexbuf None 1 false (String.length space);
+        string lexbuf
+      }
+  | '\\' ['\\' '\'' '"' 'n' 't' 'b' 'r' ' ']
+      { store_string_char(char_for_backslash(Lexing.lexeme_char lexbuf 1));
+        string lexbuf }
+  | '\\' ['0'-'9'] ['0'-'9'] ['0'-'9']
+      { store_string_char(char_for_decimal_code lexbuf 1);
+         string lexbuf }
+  | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F'] ['0'-'9' 'a'-'f' 'A'-'F']
+      { store_string_char(char_for_hexadecimal_code lexbuf 2);
+         string lexbuf }
+  | '\\' _
+      { if in_comment ()
+        then string lexbuf
+        else begin
+(*  Should be an error, but we are very lax.
+          raise (Error (Illegal_escape (Lexing.lexeme lexbuf),
+                        Location.curr lexbuf))
+*)
+          let loc = Location.curr lexbuf in
+          Location.prerr_warning loc Warnings.Illegal_backslash;
+          store_string_char (Lexing.lexeme_char lexbuf 0);
+          store_string_char (Lexing.lexeme_char lexbuf 1);
+          string lexbuf
+        end
+      }
+  | newline
+      { if not (in_comment ()) then
+          Location.prerr_warning (Location.curr lexbuf) Warnings.Eol_in_string;
+        update_loc lexbuf None 1 false 0;
+        store_lexeme lexbuf;
+        string lexbuf
+      }
+  | eof
+      { is_in_string := false;
+        raise (Error (Unterminated_string, !string_start_loc)) }
+  | _
+      { store_string_char(Lexing.lexeme_char lexbuf 0);
+        string lexbuf }
+
+and skip_sharp_bang = parse
+  | "#!" [^ '\n']* '\n' [^ '\n']* "\n!#\n"
+       { update_loc lexbuf None 3 false 0 }
+  | "#!" [^ '\n']* '\n'
+       { update_loc lexbuf None 1 false 0 }
+  | "" { () }
+
+and p4_quotation = parse
+  | '<' (':' ident)? ('@' locname)? '<' 
+      { 
+        let loc = Location.curr lexbuf in
+        p4_quotation' loc (Buffer.create 10) lexbuf
+      }
+
+and p4_quotation' loc buf = parse
+  | ">>"
+      { Buffer.add_string buf (Lexing.lexeme lexbuf);
+        P4QUOTE (Buffer.contents buf)
+      }
+  | eof 
+      { raise (Error (Unterminated_p4_quotation, loc)) }
+  | newline 
+      { update_loc lexbuf None 1 false 0; 
+        Buffer.add_string buf (Lexing.lexeme lexbuf);
+        p4_quotation' loc buf lexbuf 
+      }
+  | _ { Buffer.add_string buf (Lexing.lexeme lexbuf);
+        p4_quotation' loc buf lexbuf }
+
+{
+  let token_with_comments = token
+
+  let last_comments = ref []
+  let rec token lexbuf =
+    match token_with_comments lexbuf with
+        COMMENT (s, comment_loc) ->
+          last_comments := (s, comment_loc) :: !last_comments;
+          token lexbuf
+      | tok -> tok
+  let comments () = List.rev !last_comments
+  let init () =
+    is_in_string := false;
+    last_comments := [];
+    comment_start_loc := []
+
+}
   let sexp_of_t _t = Sexplib.Sexp.List []
 end
 
-type token = Parser.token =
+type token =
   | AMPERAMPER
   | AMPERSAND
   | AND
   | WHILE
   | WITH
   | COMMENT of (string * Location.t)
+  | P4QUOTE of string
 
 with sexp_of