Source

fastq_bench / fastqbench.ml

(*

ocamlfind ocamlopt -thread -package lwt.unix,biocaml,core -linkpkg fastqbench.ml -o fastbench

*)

open Core.Std
open Lwt

let failf fmt =
  ksprintf (fun s -> Lwt.fail (Failure s)) fmt
  
let count_next_reads parser =
  let strpos = Biocaml_pos.to_string in
  let rec next_m c =
    match Biocaml_fastq.next parser with
    | `not_ready -> return c
    | `record {Biocaml_fastq. name; sequence; comment; qualities; } ->
      next_m (c + 1)
    | `error (`sequence_and_qualities_do_not_match (l, seq, qs)) ->
      failf "Error  %s: %d bp Vs %d q-scores\n" (strpos l)
        (String.length seq) (String.length qs)
    | `error (`wrong_comment_line (l, _)) ->
      failf "Syntax error %s: (comment line)\n" (strpos l)
    | `error (`wrong_name_line (l, _)) ->
      failf "Syntax error %s: (name line)\n" (strpos l)
  in
  next_m 0

let count_reads ~buffer_size file =
  let parser = Biocaml_transform.Line_oriented.parser ~filename:file () in
  let reads = ref 0 in
  Lwt_io.with_file ~buffer_size ~mode:Lwt_io.input file (fun i ->
    let rec loop () =
      Lwt_io.read ~count:buffer_size i
      >>= fun read_string ->
      if read_string = "" then
        return !reads
      else (
        Biocaml_transform.Line_oriented.feed_string parser read_string;
        count_next_reads parser >>= fun c ->
        reads := !reads + c;
        loop ())
    in
    loop ())

let do_bench repetitions buffer_sizes files =
  Lwt_io.printf "{section|Benchmark}\n\
                 {b|Started On %s}\n" Time.(now () |! to_string)
  >>= fun () ->
  Lwt_io.printf "Info: {list|\n\
                 {*} Repetitions: %d\n\
                 {*} Buffer-sizes: %s\n\
                 {*} Files: {br} %s\n\
                 }{p}\n"
    repetitions (String.concat ~sep:", " (List.map buffer_sizes (sprintf "%d")))
    (String.concat ~sep:"{br} " (List.map files (sprintf "{t|%s}")))
  >>= fun () ->
  Lwt_list.iter_s (fun file ->
    Lwt_io.printf "{b|File:} {t|%s}\n\
                   {begin table 3}\n\
                   {c h|Buf-size}{c h|{#} Reads} {c h|Time (Avg.)}\n" file
    >>= fun () ->
    Lwt_list.map_s (fun buffer_size ->
      let reads = ref 0 in
      let rec iteration = function
        | 0 -> return ()
        | n ->
          count_reads ~buffer_size file >>= fun r ->
          reads := r;
          iteration (n - 1)
      in
      let start = Time.now () in
      iteration repetitions >>= fun () ->
      let time = Time.(diff (now ()) start) in
      return (buffer_size, !reads, Core.Span.to_float time)
    ) buffer_sizes
    >>= fun results ->
    Lwt_list.iter_s (fun (bufs, reads, time) ->
      Lwt_io.printf "{c|%d} {c|%d} {c|%f (%f)}\n"
        bufs reads time (time /. float repetitions))
      results
    >>= fun () ->
    Lwt_io.printf "{end}{p}\n"
  ) files
  >>= fun () ->
  Lwt_io.printf "{p}{b|Ended On %s}\n" Time.(now () |! to_string)
let () =
  let repetitions = ref 1 in
  let buffer_sizes = ref [1024] in
  let filenames = ref [] in
  let options = [
    ("-buffer-sizes", Arg.String (fun s ->
      buffer_sizes :=
        List.map (String.split ~on:',' s) Int.of_string),
     "<sizes>\n\tComa-separated list of buffer sizes");
    ("-repetitions", Arg.Set_int repetitions,
     "<nb>\n\tNumber of repetitions (per buf-size and per file)");
  ] in
  let anon s = filenames := s :: !filenames in
  Arg.parse options anon "Usage: see -help";
  if !filenames = [] then
    eprintf "Nothing to do\n"
  else
    Lwt_main.run (do_bench !repetitions !buffer_sizes !filenames)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.