Commits

Sebastien Mondet committed c854f41

Import first version

Comments (0)

Files changed (2)

+@SEQ_ID
+GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
++
+!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
+@SEQ_ID
+GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
++
+!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
+@SEQ_ID
+GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
++
+!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
+@SEQ_ID
+GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
++
+!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
+@SEQ_ID
+GATTTGGGGTTCAAAGCAGTATCGATCAAATAGTAAATCCATTTGTTCAACTCACAGTTT
++
+!''*((((***+))%%%++)(%%%%).1***-+*''))**55CCF>>>>>>CCCCCCC65
+(*
+
+ocamlfind ocamlopt -thread -package lwt.unix,biocaml,core -linkpkg fastqbench.ml -o fastbench
+
+*)
+
+open Core.Std
+open Lwt
+
+let failf fmt =
+  ksprintf (fun s -> Lwt.fail (Failure s)) fmt
+  
+let count_next_reads parser =
+  let rec next_m c =
+    match Biocaml_fastq.next parser with
+    | `nothing_ready -> return c
+    | `record {Biocaml_fastq. name; sequence; comment; qualities; } ->
+      next_m (c + 1)
+    | `error (`sequence_and_qualities_do_not_match (l, seq, qs)) ->
+      failf "Error line %d: %d bp Vs %d q-scores\n" l
+        (String.length seq) (String.length qs)
+    | `error (`wrong_comment_line (l, _)) ->
+      failf "Syntax error (comment line) line: %d\n" l
+    | `error (`wrong_name_line (l, _)) ->
+      failf "Syntax error (name line) line: %d\n" l
+  in
+  next_m 0
+
+let count_reads ~buffer_size file =
+  let parser = Biocaml_fastq.parser () in
+  let reads = ref 0 in
+  Lwt_io.with_file ~buffer_size ~mode:Lwt_io.input file (fun i ->
+    let rec loop () =
+      Lwt_io.read ~count:buffer_size i
+      >>= fun read_string ->
+      if read_string = "" then
+        return !reads
+      else (
+        Biocaml_fastq.feed_string parser read_string;
+        count_next_reads parser >>= fun c ->
+        reads := !reads + c;
+        loop ())
+    in
+    loop ())
+
+let do_bench repetitions buffer_sizes files =
+  Lwt_io.printf "{section|Benchmark}\n\
+                 {b|Started On %s}\n" Time.(now () |! to_string)
+  >>= fun () ->
+  Lwt_io.printf "Info: {list|\n\
+                 {*} Repetitions: %d\n\
+                 {*} Buffer-sizes: %s\n\
+                 {*} Files: {br} %s\n\
+                 }{p}\n"
+    repetitions (String.concat ~sep:", " (List.map buffer_sizes (sprintf "%d")))
+    (String.concat ~sep:"{br} " (List.map files (sprintf "{t|%s}")))
+  >>= fun () ->
+  Lwt_list.iter_s (fun file ->
+    Lwt_io.printf "{b|File:} {t|%s}\n\
+                   {begin table 3}\n\
+                   {c h|Buf-size}{c h|{#} Reads} {c h|Time (Avg.)}\n" file
+    >>= fun () ->
+    Lwt_list.map_s (fun buffer_size ->
+      let reads = ref 0 in
+      let rec iteration = function
+        | 0 -> return ()
+        | n ->
+          count_reads ~buffer_size file >>= fun r ->
+          reads := r;
+          iteration (n - 1)
+      in
+      let start = Time.now () in
+      iteration repetitions >>= fun () ->
+      let time = Time.(diff (now ()) start) in
+      return (buffer_size, !reads, Core.Span.to_float time)
+    ) buffer_sizes
+    >>= fun results ->
+    Lwt_list.iter_s (fun (bufs, reads, time) ->
+      Lwt_io.printf "{c|%d} {c|%d} {c|%f (%f)}\n"
+        bufs reads time (time /. float repetitions))
+      results
+    >>= fun () ->
+    Lwt_io.printf "{end}{p}\n"
+  ) files
+  >>= fun () ->
+  Lwt_io.printf "{p}{b|Ended On %s}\n" Time.(now () |! to_string)
+let () =
+  let repetitions = ref 1 in
+  let buffer_sizes = ref [1024] in
+  let filenames = ref [] in
+  let options = [
+    ("-buffer-sizes", Arg.String (fun s ->
+      buffer_sizes :=
+        List.map (String.split ~on:',' s) Int.of_string),
+     "<sizes>\n\tComa-separated list of buffer sizes");
+    ("-repetitions", Arg.Set_int repetitions,
+     "<nb>\n\tNumber of repetitions (per buf-size and per file)");
+  ] in
+  let anon s = filenames := s :: !filenames in
+  Arg.parse options anon "Usage: see -help";
+  if !filenames = [] then
+    eprintf "Nothing to do\n"
+  else
+    Lwt_main.run (do_bench !repetitions !buffer_sizes !filenames)
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.