Commits

Kota UENISHI committed 0ac43e2

add importer

Comments (0)

Files changed (6)

      resp1 <- exchange conn $ put (BS.pack "foo") (BS.pack "bar") Nothing (binary $ BS.pack  "content") RT.Default RT.Default False
      resp2 <- exchangeMaybe conn $ get (BS.pack "foo") (BS.pack "bar") RT.Default;
      disconnect conn
-     case (Res.get resp2) of
-         Just (seq_content, vclock) -> putStrLn $ show seq_content
-         Nothing -> putStrLn "nothing"
+     putStrLn $ show $ Res.get resp2
+--         Just (seq_content, vclock) -> putStrLn $ show seq_content
+--         Nothing -> putStrLn "nothing"
 
+import edu.cmu.sphinx.frontend.util.Microphone;
+import edu.cmu.sphinx.recognizer.Recognizer;
+import edu.cmu.sphinx.result.Result;
+import edu.cmu.sphinx.util.props.ConfigurationManager;
+
+/**
+ * A simple HelloWorld demo showing a simple speech application built using Sphinx-4. This application uses the Sphinx-4
+ * endpointer, which automatically segments incoming audio into utterances and silences.
+ */
+public class HelloWorld {
+    
+    public static void main(String[] args) {
+        ConfigurationManager cm;
+        
+        if (args.length > 0) {
+            cm = new ConfigurationManager(args[0]);
+        } else {
+            cm = new ConfigurationManager(HelloWorld.class.getResource("helloworld.config.xml"));
+        }
+
+        Recognizer recognizer = (Recognizer) cm.lookup("recognizer");
+        recognizer.allocate();
+
+        // start the microphone or exit if the programm if this is not possible
+        Microphone microphone = (Microphone) cm.lookup("microphone");
+        if (!microphone.startRecording()) {
+            System.out.println("Cannot start microphone.");
+            recognizer.deallocate();
+            System.exit(1);
+        }
+
+        System.out.println("Say: (Good morning | Hello) ( Bhiksha | Evandro | Paul | Philip | Rita | Will )");
+
+        // loop the recognition until the programm exits.
+        while (true) {
+            System.out.println("Start speaking. Press Ctrl-C to quit.\n");
+
+            Result result = recognizer.recognize();
+
+            if (result != null) {
+                String resultText = result.getBestFinalResultNoFiller();
+                System.out.println("You said: " + resultText + '\n');
+            } else {
+                System.out.println("I can't hear what you said.\n");
+            }
+        }
+    }
+}

ocaml/importer/.gitignore

+*.o
+*~
+.omakedb*
+O*.omc

ocaml/importer/OMakefile

+.PHONY: all install clean test
+
+# .SUBDIRS:
+
+USE_OCAMLFIND = true
+
+OCAMLPACKS[] =
+    bz2
+    expat
+    json-wheel
+    riak
+
+if $(not $(OCAMLFIND_EXISTS))
+   eprintln(This project requires ocamlfind\, but is was not found.)
+   eprintln(You need to install ocamlfind and run "omake --configure".)
+   exit 1
+
+# OCAMLINCLUDES +=
+
+# NATIVE_ENABLED = $(OCAMLOPT_EXISTS)
+# BYTE_ENABLED = $(not $(OCAMLOPT_EXISTS))
+
+# OCAMLFLAGS    +=
+# OCAMLCFLAGS   +=
+# OCAMLOPTFLAGS +=
+# OCAML_LINK_FLAGS +=
+# OCAML_BYTE_LINK_FLAGS +=
+# OCAML_NATIVE_LINK_FLAGS +=
+
+FILES[] =
+   importer
+
+PROGRAM = importer
+# OCAML_LIBS +=
+# OCAML_CLIBS +=
+# OCAML_OTHER_LIBS +=
+# OCAML_LIB_FLAGS +=
+
+.DEFAULT: $(OCamlProgram $(PROGRAM), $(FILES))
+
+test: $(PROGRAM)
+	./$(PROGRAM)

ocaml/importer/OMakeroot

+open build/C
+open build/OCaml
+open build/LaTeX
+
+#
+# The command-line variables are defined *after* the
+# standard configuration has been loaded.
+#
+DefineCommandVars()
+
+#
+# Include the OMakefile in this directory.
+#
+.SUBDIRS: .

ocaml/importer/importer.ml

+(* see http://mjambon.com/json-wheel-doc/ for Json doc *)
+(* see http://mmzeeman.home.xs4all.nl/ocaml/expat-doc/Expat.html for Xml processing *)
+
+(* "../enwiki-20121101-pages-meta-current1.xml-p000000010p000010000.bz2" *)
+
+let input_filename = Sys.argv.(2)
+let bucket_name = Sys.argv.(1)
+
+let buflen = 65536
+let s = Stack.create()
+let in_page = ref false
+let count = ref 0
+
+let riakc = ref None
+
+let pj json =
+  print_endline
+    (Json_io.string_of_json ~allow_nan:true ~compact:false ~recursive:true json);;
+
+let string_of_json json =
+  Json_io.string_of_json ~allow_nan:true ~compact:false ~recursive:true json;;
+
+let rec read_all_bytes psr bzp pos =
+  try
+    let buf = String.create buflen in
+    let bytes_read = Bz2.read bzp buf 0 buflen in
+    Expat.parse psr (String.sub buf 0 bytes_read);
+    read_all_bytes psr bzp (pos+bytes_read)
+  with
+      End_of_file -> pos;;
+
+let elem_handler tag attrs = match tag with
+  | "page" ->
+    in_page := true;
+    Stack.push (Json_type.Object([])) s;
+  | _ when !in_page ->
+(* Do we need to preserve the attributes? No. Useless.
+    let json_attrs = List.map (fun (k,v) -> (k, Json_type.String(v))) attrs in
+    Stack.push (Json_type.Object(json_attrs)) s; *)
+    Stack.push (Json_type.Object([])) s;
+  | _ ->
+    ();;
+
+exception Sucks
+
+let riak_client () = match (!riakc) with
+  | None ->
+    let c = Riak.riak_connect_with_defaults "127.0.0.1" 8087 in
+    riakc := Some(c);
+    c;
+  | Some(c) -> c;;
+
+let close_riak_client () = match (!riakc) with
+  | None -> ();
+  | Some(c) -> Riak.riak_disconnect c;;
+
+let get_title json_obj =
+  let rec get_title_ = function
+    | [] -> raise Sucks;
+    | ("title", Json_type.String(title))::_ -> title;
+    | _::tl -> get_title_ tl
+  in
+  match json_obj with
+    | Json_type.Object(pairs) -> get_title_ pairs;
+    | _ -> raise Sucks;;
+
+let process_page p =
+  let title = get_title p in
+  let c = riak_client() in
+(* val riak_put : riak_connection -> riak_bucket -> riak_key option ->
+   string -> riak_put_option list -> riak_object list 
+  print_string title;
+  flush_all(); *)
+  let _ = Riak.riak_put c bucket_name (Some title) (string_of_json p) [] in
+(*   [ Riak.Put_return_body(true) ] in List.iter Riak.print_riak_obj objs;
+  print_endline " ...done"; *)
+  ();;
+
+let not_empty = function
+  | ("__text", Json_type.String(s))
+      when (String.length (String.trim s) = 0)
+	-> false;
+  | _   -> true;;
+
+let unwrap = function
+  |Json_type.Object(pairs) ->
+    begin
+      match List.filter not_empty pairs with
+	| [] -> Json_type.Null;
+	| [("__text", json_str)] -> json_str;
+	| l  -> Json_type.Object(l)
+    end;
+  |o -> o;;
+
+let elem_ehandler tag = match tag with
+  | "page" ->
+    in_page := false;
+    process_page (unwrap (Stack.pop s));
+    count := !count + 1;
+    if (!count mod 100) = 0 then begin
+      print_string ".";
+      flush_all();
+    end;
+  | _ when !in_page -> begin
+(*    print_endline ("end "^tag); *)
+    let o = unwrap (Stack.pop s) in
+    match Stack.pop s with
+      | Json_type.Object(parent) when o <> Json_type.Null -> 
+	let new_obj = Json_type.Object((tag, o)::parent) in
+	Stack.push new_obj s;
+      | json_obj ->
+	Stack.push json_obj s;
+
+  end;
+  | _ -> ();;
+
+let append_json_string json_str str = match json_str with
+  | Json_type.String(txt) -> Json_type.String(txt^str);
+  | _ ->                     Json_type.String(str);;
+
+let data_handler txt =
+  if !in_page then begin
+    match Stack.pop s with
+      | Json_type.Object( ("__text",prev)::tl) ->
+	let pair = ("__text", (append_json_string prev txt)) in
+	Stack.push (Json_type.Object( pair::tl )) s;
+      | Json_type.Object(parent) ->
+	let o = Json_type.String(txt) in
+	Stack.push (Json_type.Object( ("__text",o)::parent )) s;
+      |json_obj -> 
+	Stack.push json_obj s;
+  end;;
+
+let _ =
+  print_endline input_filename;
+  Printexc.record_backtrace false;
+
+  let fp = open_in input_filename in
+  let bzp = Bz2.open_in fp in (* http://camlbz2.forge.ocamlcore.org/api/Bz2.html *)
+  let psr = Expat.parser_create (None) in
+  Expat.set_character_data_handler psr data_handler;
+  Expat.set_start_element_handler psr elem_handler;
+  Expat.set_end_element_handler psr elem_ehandler;
+  try
+  let size = read_all_bytes psr bzp 0 in
+  Bz2.close_in bzp;
+  close_in fp;
+  close_riak_client();
+
+  print_int size;
+  print_endline " bytes read!";
+  with _ -> print_endline (Printexc.get_backtrace());
+  Printf.printf "%d pages!\n" (!count);;