Source

ocaml-lib / syntax.ml

Full commit
(* analyse lexicale générique *)

type t_stream = Token.t Stream.t

type t_list = Token.t list

open Token

(* forbidden idents in syntax of logics *)
let keywords = ["and";"or";"not"]

let rec get_terms : t_list -> string list =
  function
      [] -> []
    | Term name::l -> name::get_terms l
    | _::l -> get_terms l

let from_channel : in_channel -> t_stream =
  fun ch ->
    let lexbuf = Lexing.from_channel ch in
    Stream.from (fun _ ->
	match Lexer.token lexbuf with
	  EOL -> None
	| tok -> Some tok)

let from_string : string -> t_stream =
  fun s ->
    let lexbuf = Lexing.from_string s in
    Stream.from (fun _ ->
	match Lexer.token lexbuf with
	  EOL -> None
	| tok -> Some tok)

let rec list_of_stream : t_stream -> t_list = parser
    [<'tok; toks = list_of_stream>] -> tok::toks
  | [<>] -> []

let rec of_list : t_list -> t_stream =
  fun toks -> Stream.of_list
      (List.filter
	 (function
	     PP_tilda | PP_space | PP_cut | PP_break (_,_) -> false
	   | _ -> true
	 ) toks
      )

(* get the string representation of the token *)
type space_of_token = Sep | PonctL | PonctR | Op | Word

(*
let string_of_token : Token.t -> string * space_of_token =
  function
    | EOL -> "\n", Sep
    | BackQuote -> "`", Sep
    | Tilda -> "~", Sep
    | Exclam -> "!", PonctR
    | At -> "@", Sep
    | Sharp -> "#", Sep
    | Dollar -> "$", Sep
    | Percent -> "%", Sep
    | Hat -> "^", Op
    | Et -> "&", Op
    | Star -> "*", Op
    | LeftPar -> "(", PonctL
    | RightPar -> ")", Sep
    | Minus -> "-", Op
    | Plus -> "+", Op
    | Equal -> "=", Op
    | LeftAcc -> "{", PonctL
    | RightAcc -> "}", Sep
    | LeftBra -> "[", PonctL
    | RightBra -> "]", Sep
    | Pipe -> "|", Op
    | BackSlash -> "\\", Sep
    | Slash -> "/", Sep
    | Interro -> "?", PonctR
    | LT -> "<", Op
    | GT -> ">", Op
    | Comma -> ",", PonctR
    | DotDot -> "..", Sep
    | Dot -> ".", PonctR
    | Colon -> ":", PonctR
    | SemiColon -> ";", PonctR
    | DoubleQuote -> "\"", Sep
    | Quote -> "'", Sep
    | Ident s -> s, Word
    | Int n -> string_of_int n, Word
    | Float f -> string_of_float f, Word
    | String s -> "\"" ^ String.escaped s ^ "\"", Word
    | Term s ->
	let b = ref true in
	(match s.[0] with 'A'..'Z' | '_' -> () | _ -> b:= false);
	if !b then String.iter (function 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' -> () | _ -> b:= false) s;
	if !b
	then s, Word
	else "'" ^ String.escaped s ^ "'", Word
    | Char c -> "`" ^ Char.escaped c ^ "`", Word
    | PP_tilda -> " ", Sep
    | PP_space -> " ", Sep
    | PP_cut -> "", Sep
    | PP_break (spaces,offset) -> " ", Sep

let rec stringizer : t_list -> string =
  function
      [] -> ""
    | tok::toks ->
	let buf = Buffer.create 100 in
	let s, t = string_of_token tok in
	Buffer.add_string buf s;
	stringizer2 buf t toks
and stringizer2 buf t0 = function
    [] -> Buffer.contents buf
  | tok::toks ->
      let s, t = string_of_token tok in
      ( match t0, t with
      |	Sep, Word -> ()
      |	t1, Sep when t1 <> PonctR -> ()
      |	t1, PonctR when t1 <> PonctR -> ()
      |	PonctL, t1 when t1 <> PonctL -> ()
      |	_, _ -> Buffer.add_char buf ' ');
      Buffer.add_string buf s;
      stringizer2 buf t toks
*)

open Format

let pp_print_token : formatter -> Token.t -> unit =
  fun ff -> function
    | EOL -> pp_print_newline ff ()
    | BackQuote -> pp_print_string ff "`"
    | Tilda -> pp_print_string ff "~"
    | Exclam -> pp_print_string ff "!"
    | At -> pp_print_string ff "@"
    | Sharp -> pp_print_string ff "#"
    | Dollar -> pp_print_string ff "$"
    | Percent -> pp_print_string ff "%"
    | Hat -> pp_print_string ff "^"
    | Et -> pp_print_string ff "&"
    | Star -> pp_print_string ff "*"
    | LeftPar -> pp_print_string ff "("
    | RightPar -> pp_print_string ff ")"
    | Minus -> pp_print_string ff "-"
    | Plus -> pp_print_string ff "+"
    | Equal -> pp_print_string ff "="
    | LeftAcc ->pp_print_string ff "{"
    | RightAcc ->pp_print_string ff "}"
    | LeftBra ->pp_print_string ff "["
    | RightBra ->pp_print_string ff "]"
    | Pipe ->pp_print_string ff "|"
    | BackSlash ->pp_print_string ff "\\"
    | Slash ->pp_print_string ff "/"
    | Interro ->pp_print_string ff "?"
    | LT ->pp_print_string ff "<"
    | GT ->pp_print_string ff ">"
    | Comma ->pp_print_string ff ","
    | DotDot ->pp_print_string ff ".."
    | Dot ->pp_print_string ff "."
    | Colon ->pp_print_string ff ":"
    | SemiColon ->pp_print_string ff ";"
    | DoubleQuote ->pp_print_string ff "\""
    | Quote ->pp_print_string ff "'"
    | Ident s -> pp_print_string ff s
    | Int n -> pp_print_int ff n
    | Float f -> pp_print_float ff f
    | String s -> pp_print_string ff ("\"" ^ String.escaped s ^ "\"")
    | Term s ->
	let b = ref true in
	(match s.[0] with 'A'..'Z' | '_' -> () | _ -> b:= false);
	if !b then String.iter (function 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' -> () | _ -> b:= false) s;
	if !b
	then pp_print_string ff s
	else pp_print_string ff ("'" ^ String.escaped s ^ "'")
    | Char c -> pp_print_string ff ("`" ^ Char.escaped c ^ "`")
    | PP_tilda -> pp_print_string ff " "
    | PP_space -> pp_print_space ff ()
    | PP_cut -> pp_print_cut ff ()
    | PP_break (spaces,offset) -> pp_print_break ff spaces offset


let rec pp_print_tokens : formatter -> t_list -> unit =
  fun ff tokens -> List.iter (pp_print_token ff) tokens
(*
  fun ff -> function
      [] -> ()
    | tok::toks ->
	let s, t = string_of_token tok in
	Format.pp_print_string ff s;
	pp_print_tokens2 ff t toks
and pp_print_tokens2 ff t0 = function
    [] -> ()
  | tok::toks ->
      let s, t = string_of_token tok in
      ( match t0, t with
      | Sep, Word -> ()
      | t1, Sep when t1 <> PonctR -> ()
      | t1, PonctR when t1 <> PonctR -> ()
      | PonctL, t1 when t1 <> PonctL -> ()
      | _, _ -> Format.pp_print_space ff ());
      Format.pp_print_string ff s;
      pp_print_tokens2 ff t toks
*)

let stringizer : t_list -> string =
  fun toks ->
    pp_print_tokens Format.str_formatter toks;
    Format.flush_str_formatter ()

let print_tokens : t_list -> unit =
  fun toks ->
    pp_print_tokens Format.std_formatter toks

(* fonctions generiques pour le parsing *)


(* fonctions generiques pour le printing *)
(* ------------------------------------- *)

(* add parenthesis around the given t_list if op (precedence) is above the ctx (precedence) *)

let add_par : int -> int -> t_list -> t_list =
  fun ctx op l -> if op > ctx then LeftPar::(l @ [RightPar]) else l


(* messages d'erreur syntaxique *)

let error_RightPar = "Syntax error: a closing parenthesis is missing."
let error_RightBra = "Syntax error: a closing bracket is missing."