Source

OCamlRuby / parser.ml

Full commit
open Util
open Llist
open Ast
open ParserMonad

module P = ParserMonad.Make (struct
  type ts = token llist
  type error = pos * string
  let eplus (p1, s1) (p2, s2) =
    match pos_compare p1 p2 with
    | 0 -> (p1, s1 ^"\n"^ s2)
    | 1 -> (p1, s1)
    | _ -> (p2, s2)
end)

open P
type 'a parser = 'a m
exception ParseError of error

(* for debug ===========*)
 let mode = 0
 let pts ts = match mode with
 | 1 -> p " - %s\n" (sllist ", " s_token !$ts); flush stderr
(* | 2 ->
    let s = match ts with
    | t1::t2::t3::t4::tail ->  slist "" (const ".") (t4::tail) ^ slist ";\t" stoken [t3;t2;t1]
    | _ -> (slist "; " stoken ts)
    in p " - %s\n" s*)
 | _ -> ()
(* =================== *)

let unit_parser f : 'a parser = function
  | Cons (t1, ts) ->
      begin match f t1 with
      | `Val v -> pts ts; Inl (v, !$ts)
      | `Err s -> Inr s
      end
  | Nil -> Inr (Ast.null_pos, "unit_parser")

let kwd k = unit_parser (function | (pos, Genlex.Kwd ki) when k = ki -> `Val (pos,k) | (pos,t) -> `Err (pos,"token'"^s_token0 t^"' is not kwd'"^ k ^"'"))
let ident0 = unit_parser (function | (pos, Genlex.Ident s) -> `Val s | (pos,t) -> `Err (pos, "token'"^s_token0 t^"' is not identifier."))
let ident = (kwd "@" >> ident0 >>= (return $ (^) "@")) <|> (kwd "&" >> ident0)  <|> ident0

let literal = unit_parser (function
  | pos, Genlex.String s -> `Val (LString s)
  | pos, Genlex.Int i -> `Val (LInt i)
  | pos, Genlex.Kwd "true" -> `Val (LBool true)
  | pos, Genlex.Kwd "false" -> `Val (LBool false)
  | pos, _ -> `Err (pos, "literal"))


let sep_many delim (p: 'a parser) : 'a list parser =
  (p >>= fun x1 -> many (kwd delim >> p) >>= fun xs -> return (x1::xs))
  <|>
  (return [])

let arguments_as : 'a parser -> 'a list parser = fun p ->
  (kwd "(" >> sep_many "," p >>= fun args -> kwd ")" >> return args)

let make_block(pos, argnames, body) =
  Call (Obj (ClassDef (pos, "__BLOCK", FunDef ("call", argnames, body))), "new", [])


let rec expr code =
  begin
  lterm() >>= fun tm -> (expr >>= fun e -> return @@ Ignore (tm, e))
                    <|> (return tm)
  end code
and lterm () =
  rterm() >>= fun t1 -> (kwd "&&" >> lterm() >>= fun tm -> return @@ Call (Obj t1, "&&", [tm]))
                    <|> (kwd "||" >> lterm() >>= fun tm -> return @@ Call (Obj t1, "||", [tm]))
		    <|> (return t1)
and rterm () =
  aterm() >>= fun t1 -> (kwd "==" >> rterm() >>= fun tm -> return @@ Call (Obj t1, "==", [tm]))
                    <|> (kwd "!=" >> rterm() >>= fun tm -> return @@ Call (Obj t1, "!=", [tm]))
                    <|> (kwd "<=" >> rterm() >>= fun tm -> return @@ Call (Obj t1, "<=", [tm]))
                    <|> (kwd "<"  >> rterm() >>= fun tm -> return @@ Call (Obj t1, "<",  [tm]))
                    <|> (kwd ">=" >> rterm() >>= fun tm -> return @@ Call (Obj t1, ">=", [tm]))
                    <|> (kwd ">"  >> rterm() >>= fun tm -> return @@ Call (Obj t1, ">",  [tm]))
		    <|> (return t1)
and aterm () =
  mterm() >>= fun t1 -> (kwd "+" >> aterm() >>= fun tm -> return @@ Call (Obj t1, "+", [tm]))
                    <|> (kwd "-" >> aterm() >>= fun tm -> return @@ Call (Obj t1, "-", [tm]))
		    <|> (return t1)
and mterm () =
  factor() >>= fun f1 -> (kwd "*" >> mterm() >>= fun tm -> return @@ Call (Obj f1, "*", [tm]))
                     <|> (kwd "/" >> mterm() >>= fun tm -> return @@ Call (Obj f1, "/", [tm]))
		     <|> (return f1)

and factor () =
  factor1() >>= fun fac -> factor_next fac
and factor1 () =
  (kwd "@" >> ident >>= fun vname -> kwd "=" >> lterm() >>= fun tm -> return (Assign ("@"^vname, tm)))
  <|>
  (ident >>= fun vname -> kwd "=" >> lterm() >>= fun tm -> return (VarDef (vname, tm)))
  <|>
  (ident >>= fun fname -> arguments_as (expr) >>= fun args -> return (Call (Self, fname, args)))
  <|>
  (ident >>= fun fname -> block() >>= fun b -> return @@ Call (Self, fname, [b]))
  <|>
  (kwd "def" >> ident >>= fun fname -> arguments_as ident >>= fun argnames -> expr >>= fun body -> kwd "end" >>
    return (FunDef (fname, argnames, body)))
  <|>
  (kwd "def" >> ident >>= fun fname -> expr >>= fun body -> kwd "end" >>
    return (FunDef (fname, [], body)))
  <|>
  (kwd "class" >>= fun (pos,_) -> ident >>= fun cname -> expr >>= fun flds -> kwd "end" >> return (ClassDef (pos, cname, flds)))
  <|>
  (kwd "class" >>= fun (pos,_) -> expr >>= fun flds -> kwd "end" >> return (ClassDef (pos, "__UNNAMEDCLASS", flds)))
  <|>
  (block())
  <|>
  (kwd "if" >> expr >>= fun cond -> kwd "then" >> expr >>= fun e1 -> kwd "else" >> expr >>= fun e2 -> kwd"end" >>
    return (If (cond, e1, e2)))
  <|>
  (kwd "(" >> expr >>= fun e -> kwd ")" >> return e)
  <|>
  (literal >>= fun l -> return @@ Literal l)
  <|>
  (kwd "@" >> ident >>= fun x -> return @@ InstanceVar ("@"^x))
  <|>
  (ident >>= fun x -> return @@ Var x)
and factor_next fac =
  (kwd "." >> ident >>= fun fname -> arguments_as expr >>= fun args -> factor_next @@ Call (Obj fac, fname, args))
  <|>
  (kwd "." >> ident >>= fun fname -> block() >>= fun b -> factor_next @@ Call (Obj fac, fname, [b]))
  <|>
  (kwd "." >> ident >>= fun fname -> factor_next @@ Call (Obj fac, fname, []))
  <|>
  (return fac)

and block () =
  (kwd "{"  >>= fun (pos,_) -> kwd "|" >> sep_many "," ident >>= fun argnames -> kwd "|" >> expr >>= fun body -> kwd "}" >> return @@ make_block(pos, argnames, body))
  <|>
  (kwd "do" >>= fun (pos,_) -> kwd "|" >> sep_many "," ident >>= fun argnames -> kwd "|" >> expr >>= fun body -> kwd "end" >> return @@ make_block(pos, argnames, body))




let rec set_position l i = function
  | Cons (Genlex.Kwd "`", tl) -> set_position (l+1) 1 !$tl
  | Cons (t, tl) -> Cons (({ line=l; idx=i }, t), lazy (set_position l (i+1) !$tl))
  | Nil -> Nil

let parse str =
  match expr @@ set_position 1 1 @@ Llist.of_stream str with
  | Inl (states, Nil) ->
      states
  | Inl (states, Cons((pos,t),ts)) ->
      begin match lterm() (Cons ((pos,t),ts)) with
      | Inr e -> raise (ParseError e)
      | _ -> raise (ParseError (pos, "unread token: '"^s_token0 t^"'."))
      end
  | Inr e ->
      raise (ParseError e)