Source

amall / src / urilex.mll

{
open Uri_type;;

let failwith fmt = Printf.ksprintf failwith fmt
}

(* common part from rfc2616 *)

let    OCTET          = _
let    CHAR           = [ '\000' - '\127' ]
let    UPALPHA        = [ 'A' - 'Z' ]
let    LOALPHA        = [ 'a' - 'z' ]
let    ALPHA          = UPALPHA | LOALPHA
let    DIGIT          = [ '0' - '9' ]
let    CTL            = [ '\000' - '\031' '\127' ]
let    CR             = '\013'
let    LF             = '\010'
let    SP             = '\032'
let    HT             = '\009'

(* addon *)

let    DBLQ           = '\034'
let    HEXDIG         = DIGIT | ['a'-'f' 'A'-'F']

let    none           = ( _ # _ )?

(* uri *)

let scheme        = ALPHA ( ALPHA | DIGIT | '+' | '-' | '.' )*
let pct_encoded   = '%' HEXDIG HEXDIG
let unreserved    = ALPHA | DIGIT | '-' | '.' | '_' | '~'
let sub_delims    = [ '!' '$' '&' '\'' '(' ')'
                      '*' '+' ',' ';' '=' ]
let pchar         = unreserved | pct_encoded | sub_delims | ':' | '@'
let query         = ( pchar | '/' | '?' )*
let fragment      = ( pchar | '/' | '?' )*

let segment       = pchar*
let segment_nz    = pchar+
let segment_nz_nc = ( unreserved | pct_encoded | sub_delims | '@' )+

let path_abempty  = ( '/' segment )*
let path_absolute = '/' ( segment_nz ( '/' segment )* )?
let path_noscheme = segment_nz_nc ( '/' segment )*
let path_rootless = segment_nz ( '/' segment )*
let path_empty    = none  (* <0>pchar *)

let userinfo      = ( unreserved | pct_encoded | sub_delims | ':' )*
let port          = DIGIT*

let reg_name      = ( unreserved | pct_encoded | sub_delims )*

let dec_octet     =   DIGIT                (* 0-9 *)
                    | ['1'-'9'] DIGIT      (* 10-99 *)
                    | '1' DIGIT DIGIT      (* 100-199 *)
                    | '2' ['0'-'4'] DIGIT  (* 200-249 *)
                    | "25" ['0'-'5'] DIGIT (* 250-255 *)

let IPv4address   = dec_octet '.' dec_octet '.' dec_octet '.' dec_octet
let IPvFuture     = 'v' HEXDIG+ '.' ( unreserved | sub_delims | ':' )+

let h16           = HEXDIG HEXDIG? HEXDIG? HEXDIG?
let hc            = h16 ':'
let ls32          = ( hc h16 ) | IPv4address
let IPv6address   =
                                     hc hc hc hc hc hc ls32
  |                            "::"     hc hc hc hc hc ls32
  | (                   h16 )? "::"        hc hc hc hc ls32
  | ( hc                h16 )? "::"           hc hc hc ls32
  | ( hc hc             h16 )? "::"              hc hc ls32
  | ( hc hc hc          h16 )? "::"                 hc ls32
  | ( hc hc hc hc       h16 )? "::"                    ls32
  | ( hc hc hc hc hc    h16 )? "::"                         h16
  | ( hc hc hc hc hc hc h16 )? "::"

let IP_literal    = "[" ( IPv6address | IPvFuture  ) "]"


(* not an uri, but http *)

let g_space = ['\x20' '\x09']

let g_ctl = ['\x00' - '\x1F' '\x7F']

let g_separator =
  [ '(' ')' '<' '>' '@'
    ',' ';' ':' '\\' '<' '>'
    '/' '[' ']' '?' '='
    '{' '}' '\x20' '\x09'
  ]

let token = (_ # g_ctl # g_separator)+

let g_text = (_ # g_ctl)

let g_char = ['\x00' - '\x7F']


rule uri = parse
  (scheme as scheme) ":"
    {
(* let () = print_string "1\n" in *)
      let (authority_opt, (path, (query, fragment))) =
        hier_part_qf lexbuf in
      { scheme = Some scheme
      ; authority = authority_opt
      ; path = path
      ; query = query
      ; fragment = fragment
      }
    }
| none
    {
      let (path, (query, fragment)) =
        path_query_fragment lexbuf
      in
      { scheme = None
      ; authority = None
      ; path = path
      ; query = query
      ; fragment = fragment
      }
    }

and hier_part_qf = parse
  "//"
    {
(* let () = print_string "2a\n" in *)
      authority_path_abempty_qf lexbuf
    }
| none
    {
      (None, path_query_fragment lexbuf)
    }

and path_query_fragment = parse
  (path_absolute | path_rootless | path_empty) as path
    { (path, query_fragment lexbuf)
    }

and host_portopt = parse
  ( ( IP_literal  (none as is_lit)
    | IPv4address (none as is_ipv4)
    | reg_name    (none as is_reg_name)
    )
      as host
  )
  ( ( ":" (port as port_opt_txt) ) ?)
    {
(* let () = print_string "3\n" in *)
      let host_kind =
        (match (is_lit, is_ipv4, is_reg_name) with
           Some _, None, None -> IP_literal
         | None, Some _, None -> IPv4address
         | None, None, Some _ -> Reg_name
         | _ -> assert false
        )
      in
      let port_opt =
        (match port_opt_txt with
           None -> None
         | Some txt ->
             if txt = ""
             then None
             else (try Some (int_of_string txt) with _ -> None)
        )
      in
        (host_kind, host, port_opt)
    }

and authority_path_abempty_qf = parse
  ((userinfo as userinfo_opt) '@')?
    {
      let (host_kind, host, port_opt) = host_portopt lexbuf in
      let path = path_abempty_qf lexbuf in
      ( Some
        { host_kind = host_kind
        ; host = host
        ; port = port_opt
        ; userinfo = userinfo_opt
        }
      , path
      )
    }

     
and path_abempty_qf = parse
  path_abempty
    { let path = Lexing.lexeme lexbuf in
      (path, query_fragment lexbuf)
    }

and query_fragment = parse
  ('?' (query as query_opt))?
    { (query_opt, fragment lexbuf) }

and fragment = parse
  ('#' (fragment as fragment_opt))?
    { fragment_opt }


(*
   hier-part     = "//" authority path-abempty
                 / path-absolute
                 / path-rootless
                 / path-empty

   URI-reference = URI / relative-ref

   absolute-URI  = scheme ":" hier-part [ "?" query ]

   relative-ref  = relative-part [ "?" query ] [ "#" fragment ]

   relative-part = "//" authority path-abempty
                 / path-absolute
                 / path-noscheme
                 / path-empty



   path          = path-abempty    ; begins with "/" or is empty
                 / path-absolute   ; begins with "/" but not "//"
                 / path-noscheme   ; begins with a non-colon segment
                 / path-rootless   ; begins with a segment
                 / path-empty      ; zero characters


   reserved      = gen-delims / sub-delims
   gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"




*)


(* not an uri, but http *)

and content_type = parse
  g_space+
    { content_type lexbuf }
| (token as mtype) '/' (token as msubtype) g_space*
    { try
        let pars = parameters [] lexbuf in
        `Ok (mtype, msubtype, pars)
      with
      | Failure msg -> `Error msg
      | e -> `Error (Printexc.to_string e)
    }

| ((_ *) as txt)
    { `Error (Printf.sprintf "bad Content-Type: %S" txt) }

and parameters acc = parse
  eof 
    { List.rev acc }
| ""
    { let p = parameter lexbuf in
      parameters (p :: acc) lexbuf
    }

and parameter = parse
  ';' g_space* (token as par_name) '='
    { let v = parameter_value lexbuf in
      (par_name, v)
    }

| ((_ *) as txt)
    { failwith "expected parameter, found %S" txt }

and parameter_value = parse
  (token as t) g_space*
    { t }
| '"'
    { inside_quoted_string (Buffer.create 40) lexbuf }
| ((_ *) as txt)
    { failwith "expected parameter's value, found %S" txt }

and inside_quoted_string buf = parse
  '"' g_space*
    { Buffer.contents buf }
| '\\'
    { let c = escaped_char lexbuf in
      let () = Buffer.add_char buf c in
      inside_quoted_string buf lexbuf
    }
| ((g_text # [^ '"' '\\'])+ as txt)
    { let () = Buffer.add_string buf txt in
      inside_quoted_string buf lexbuf
    }
| ((_ *) as txt)
    { failwith "expected HTTP's TEXT, found %S" txt }

and escaped_char = parse
  g_char as c
    { c }
| (_ as c)
    { failwith "expected \\x00..\\x7F after backslash, found %C" c }
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.