ocaml-lib / syntax.ml

(* analyse lexicale générique *)

type t_stream = Token.t Stream.t

type t_list = Token.t list

open Token

(* forbidden idents in syntax of logics *)
let keywords = ["and";"or";"not"]

let rec get_terms : t_list -> string list =
  function
      [] -> []
    | Term name::l -> name::get_terms l
    | _::l -> get_terms l

let from_channel : in_channel -> t_stream =
  fun ch ->
    let lexbuf = Lexing.from_channel ch in
    Stream.from (fun _ ->
      try Some (Lexer.token lexbuf)
      with Eof -> None)
(*
	try match Lexer.token lexbuf with
	  EOL -> None
	| tok -> Some tok
	with Eof -> None)
*)

let from_string : string -> t_stream =
  fun s ->
    let lexbuf = Lexing.from_string s in
    Stream.from (fun _ ->
      try Some (Lexer.token lexbuf)
      with Eof -> None)

let rec list_of_stream : t_stream -> t_list = parser
    [<'tok; toks = list_of_stream>] -> tok::toks
  | [<>] -> []

let rec of_list : t_list -> t_stream =
  fun toks -> Stream.of_list
      (List.filter
	 (function
	     PP_tilda | PP_space | PP_cut | PP_break (_,_) | PP_newline -> false
	   | _ -> true
	 ) toks
      )

(* get the string representation of the token *)
type space_of_token = Sep | PonctL | PonctR | Op | Word

(*
let string_of_token : Token.t -> string * space_of_token =
  function
    | EOL -> "\n", Sep
    | BackQuote -> "`", Sep
    | Tilda -> "~", Sep
    | Exclam -> "!", PonctR
    | At -> "@", Sep
    | Sharp -> "#", Sep
    | Dollar -> "$", Sep
    | Percent -> "%", Sep
    | Hat -> "^", Op
    | Et -> "&", Op
    | Star -> "*", Op
    | LeftPar -> "(", PonctL
    | RightPar -> ")", Sep
    | Minus -> "-", Op
    | Plus -> "+", Op
    | Equal -> "=", Op
    | LeftAcc -> "{", PonctL
    | RightAcc -> "}", Sep
    | LeftBra -> "[", PonctL
    | RightBra -> "]", Sep
    | Pipe -> "|", Op
    | BackSlash -> "\\", Sep
    | Slash -> "/", Sep
    | Interro -> "?", PonctR
    | LT -> "<", Op
    | GT -> ">", Op
    | Comma -> ",", PonctR
    | DotDot -> "..", Sep
    | Dot -> ".", PonctR
    | Colon -> ":", PonctR
    | SemiColon -> ";", PonctR
    | DoubleQuote -> "\"", Sep
    | Quote -> "'", Sep
    | Ident s -> s, Word
    | Int n -> string_of_int n, Word
    | Float (f,p) -> string_of_float f, Word
        let s = string_of_float f in
        let l = String.length s in
        let prec = prec_of_sfloat s in
        if prec >= p
        then s
        else
          let zeros = String.make (p-prec) '0' in
          try
            let i_e = String.index s 'e' in
            String.sub s 0 i_e ^ zeros ^ String.sub s i_e (l-i_e) 
          with Not_found -> s ^ zeros,
        Word
    | String s -> "\"" ^ String.escaped s ^ "\"", Word
    | Term s ->
	let b = ref true in
	(match s.[0] with 'A'..'Z' | '_' -> () | _ -> b:= false);
	if !b then String.iter (function 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' -> () | _ -> b:= false) s;
	if !b
	then s, Word
	else "'" ^ String.escaped s ^ "'", Word
    | Char c -> "`" ^ Char.escaped c ^ "`", Word
    | PP_tilda -> " ", Sep
    | PP_space -> " ", Sep
    | PP_cut -> "", Sep
    | PP_break (spaces,offset) -> " ", Sep

let rec stringizer : t_list -> string =
  function
      [] -> ""
    | tok::toks ->
	let buf = Buffer.create 100 in
	let s, t = string_of_token tok in
	Buffer.add_string buf s;
	stringizer2 buf t toks
and stringizer2 buf t0 = function
    [] -> Buffer.contents buf
  | tok::toks ->
      let s, t = string_of_token tok in
      ( match t0, t with
      |	Sep, Word -> ()
      |	t1, Sep when t1 <> PonctR -> ()
      |	t1, PonctR when t1 <> PonctR -> ()
      |	PonctL, t1 when t1 <> PonctL -> ()
      |	_, _ -> Buffer.add_char buf ' ');
      Buffer.add_string buf s;
      stringizer2 buf t toks
*)

open Format

let rec power10 : int -> float =
      function
      | 0 -> 1.
      | p -> if p > 0
             then power10 (p-1) *. 10.
             else power10 (p+1) /. 10.

let pp_print_token : formatter -> Token.t -> Token.t -> unit =
  fun ff pred -> function
    | BackQuote -> pp_print_string ff "`"
    | Tilda -> pp_print_string ff "~"
    | Exclam -> pp_print_string ff "!"
    | At -> pp_print_string ff "@"
    | Sharp -> pp_print_string ff "#"
    | Dollar -> pp_print_string ff "$"
    | Percent -> pp_print_string ff "%"
    | Hat -> pp_print_string ff "^"
    | Et -> pp_print_string ff "&"
    | Star -> pp_print_string ff "*"
    | LeftPar -> pp_print_string ff "("
    | RightPar -> pp_print_string ff ")"
    | Minus ->
	( match pred with
	| Ident _ | Term _ -> pp_print_string ff " "
	| _ -> ());
	pp_print_string ff "-"
    | Plus ->
	( match pred with
	| Ident _ | Term _ -> pp_print_string ff " "
	| _ -> ());
	pp_print_string ff "+"
    | Equal -> pp_print_string ff "="
    | LeftAcc -> pp_print_string ff "{"
    | RightAcc -> pp_print_string ff "}"
    | LeftBra -> pp_print_string ff "["
    | RightBra -> pp_print_string ff "]"
    | Pipe -> pp_print_string ff "|"
    | BackSlash -> pp_print_string ff "\\"
    | Slash -> pp_print_string ff "/"
    | Interro -> pp_print_string ff "?"
    | LT -> pp_print_string ff "<"
    | GT -> pp_print_string ff ">"
    | Comma -> pp_print_string ff ","
    | DotDot -> pp_print_string ff ".."
    | Dot ->
	( match pred with
	| Int _ | Float _ -> pp_print_string ff " "
	| _ -> ());
	pp_print_string ff "."
    | Colon -> pp_print_string ff ":"
    | SemiColon -> pp_print_string ff ";"
    | DoubleQuote -> pp_print_string ff "\""
    | Quote -> pp_print_string ff "'"
    | Ident s ->
	( match pred with
	| Ident _ | Term _ -> pp_print_string ff " "
	| _ -> ());
	pp_print_string ff s
    | Int n ->
	( match pred with
	| Plus | Minus | Int _ | Float _ | Ident _ | Term _ -> pp_print_string ff " "
	| _ -> ());
	pp_print_int ff n
    | Float (f,p) ->
	( match pred with
	| Plus | Minus | Int _ | Float _ | Ident _ | Term _ -> pp_print_string ff " "
	| _ -> ());
        let sm = if f=0. then "" else string_of_int (int_of_float ((abs_float f) *. (power10 (-p)))) in
	let l = String.length sm in
        let e = let x = (p+l) mod 3 in if x >= 0 then x else x+3 in
        let exp e = if e = 0 then "" else "e" ^ string_of_int e in
	let s =
          (if f < 0. then "-" else "") ^
          if e = 1 then String.sub sm 0 1 ^ "." ^ String.sub sm 1 (l-1) ^ exp (p+l-1)
          else if e = 2 then
            if l >= 2 & p+l+1 <> 0 then String.sub sm 0 2 ^ "." ^ String.sub sm 1 (l-2) ^ exp (p+l-2)
            else "0.0" ^ sm ^ exp (p + l + 1)
          else
            if l >= 3 & p+l <> 0 then String.sub sm 0 3 ^ "." ^ String.sub sm 1 (l-3) ^ exp (p+l-3)
            else "0." ^ sm ^ exp (p + l) in
	pp_print_string ff s
    | String s -> pp_print_string ff ("\"" ^ String.escaped s ^ "\"")
    | Term s ->
	( match pred with
	| Ident _ | Term _ -> pp_print_string ff " "
	| _ -> ());
	let b = ref true in
	(match s.[0] with 'A'..'Z' | '_' -> () | _ -> b:= false);
	if !b then String.iter (function 'a'..'z' | 'A'..'Z' | '_' | '0'..'9' -> () | _ -> b:= false) s;
	if !b
	then pp_print_string ff s
	else pp_print_string ff ("'" ^ String.escaped s ^ "'")
    | Char c -> pp_print_string ff ("`" ^ Char.escaped c ^ "`")
    | PP_tilda -> pp_print_string ff " "
    | PP_space -> pp_print_space ff ()
    | PP_cut -> pp_print_cut ff ()
    | PP_break (spaces,offset) -> pp_print_break ff spaces offset
    | PP_newline -> pp_print_newline ff ()


let rec pp_print_tokens : formatter -> t_list -> unit =
  fun ff toks -> pp_print_tokens2 ff PP_newline toks
and pp_print_tokens2 ff pred =
  function
    | [] -> ()
    | tok::toks ->
	pp_print_token ff pred tok;
	pp_print_tokens2 ff tok toks

(*
  fun ff -> function
      [] -> ()
    | tok::toks ->
	let s, t = string_of_token tok in
	Format.pp_print_string ff s;
	pp_print_tokens2 ff t toks
and pp_print_tokens2 ff t0 = function
    [] -> ()
  | tok::toks ->
      let s, t = string_of_token tok in
      ( match t0, t with
      | Sep, Word -> ()
      | t1, Sep when t1 <> PonctR -> ()
      | t1, PonctR when t1 <> PonctR -> ()
      | PonctL, t1 when t1 <> PonctL -> ()
      | _, _ -> Format.pp_print_space ff ());
      Format.pp_print_string ff s;
      pp_print_tokens2 ff t toks
*)

let stringizer : t_list -> string =
  fun toks ->
    pp_print_tokens Format.str_formatter toks;
    Format.flush_str_formatter ()

let print_tokens : t_list -> unit =
  fun toks ->
    pp_print_tokens Format.std_formatter toks

(* fonctions generiques pour le printing *)
(* ------------------------------------- *)

(* messages d'erreur syntaxique *)

let error_RightPar = "Syntax error: a closing parenthesis is missing."
let error_RightBra = "Syntax error: a closing bracket is missing."

(* add parenthesis around the given t_list if op (precedence) is above the ctx (precedence) *)

let add_par : int -> int -> t_list -> t_list =
  fun ctx op l -> if op > ctx then LeftPar::(l @ [RightPar]) else l

(* fonctions generiques pour le parsing *)
(* ------------------------------------ *)

(* parsing of proposition-like language, where operations and atoms are parameterized *)

type 'a spec_prop = { 
    all : string; none : string; a : string; an : string; o : string; n :  string;
    taut : 'a; cont : 'a; neg : 'a -> 'a; conj : 'a -> 'a -> 'a; disj : 'a -> 'a -> 'a;
      atom : t_stream -> 'a
  } 

let rec parse_prop spec = parser
  | [<'Token.Ident s when s = spec.all >] -> spec.taut
  | [<'Token.Ident s when s = spec.none>] -> spec.cont
  | [<q = parse_term spec; f = parse_suite spec>] -> f q
and parse_suite spec = parser
    [<'Token.Ident s when s = spec.a; q = parse_prop spec>] -> (fun q' -> spec.conj q' q)
  | [<'Token.Ident s when s = spec.an; q = parse_fact spec>] -> (fun q' -> spec.conj q' (spec.neg q))
  | [<>] -> (fun q' -> q')
and parse_term spec = parser
  | [<q = parse_fact spec; f = parse_term_suite spec>] -> f q
and parse_term_suite spec = parser
    [<'Token.Ident s when s = spec.o; q = parse_term spec>] -> (fun q' -> spec.disj q' q)
  | [<>] -> (fun q' -> q')
and parse_fact spec = parser
  | [<'Token.LeftPar; q = parse_prop spec; 'Token.RightPar ?? error_RightPar>] -> q
  | [<'Token.Ident s when s = spec.n; q = parse_fact spec>] -> spec.neg q
  | [<a = spec.atom>] -> a

(* generic functions about strings *)
(* ------------------------------- *)

let rec split (normalize, separator, stopword) (s : string) = 
      split2 (normalize, separator, stopword) s 0 "" []
and split2 (normalize, separator, stopword) s i w ws =
      if i>=String.length s then addword (normalize, separator, stopword) w ws
      else if separator s.[i] then split2 (normalize, separator, stopword) s (i+1) "" (addword (normalize, separator, stopword) w ws)
      else split2 (normalize, separator, stopword) s (i+1) (w ^ String.make 1 s.[i]) ws
and addword (normalize, separator, stopword) w ws =
      if w = "" or stopword (normalize w) then ws else w::ws
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.