Oliver Gu avatar Oliver Gu committed 76014ab

Added sparse svm node representation for problems that don't use a precomputed kernel.

Comments (0)

Files changed (8)

+2013-02-22: Added a sparse representation of svm node arrays for problems
+            (training sets) without a precomputed kernel.
+
 2013-02-20: Renamed library to svm to resolve linker problem.
             Thanks to Markus Mottl <markus.mottl@gmail.com> for this suggestion.
 
 OASISFormat:       0.3
 Name:              libsvm-ocaml
-Version:           0.8.1
+Version:           0.8.2
 Synopsis:          libsvm-ocaml - OCaml bindings to the LIBSVM library
 Description:       libsvm-ocaml offers an OCaml-interface to the LIBSVM library
 Authors:           Oliver Gu <odietric@gmail.com>
 # OASIS_START
-# DO NOT EDIT (digest: e5cadc18e631c2fdf0b94d729cd263a7)
-version = "0.8"
+# DO NOT EDIT (digest: 28061f77c5ec5c3f4a2e296ef62d5f1f)
+version = "0.8.2"
 description = "libsvm-ocaml - OCaml bindings to the LIBSVM library"
 requires = "core lacaml"
 archive(byte) = "svm.cma"
     external svm_load_model : string -> model = "svm_load_model_stub"
 
     external svm_get_svm_type : model -> svm_type = "svm_get_svm_type_stub"
+    external svm_get_kernel_type : model -> kernel_type = "svm_get_kernel_type_stub"
     external svm_get_nr_class : model -> int = "svm_get_nr_class_stub"
     external svm_get_labels : model -> int list = "svm_get_labels_stub"
     external svm_get_svr_probability :
       model -> svm_node_array -> float * float array = "svm_predict_probability_stub"
   end
 
-  (* Note: This function does not create a sparse svm node, i.e. we also add
-     zero values to the node. The reason is that learning with precomputed
-     kernels requires a non-sparse node representation. *)
+  (* This functions skips all entries with zero
+     value and creates a sparse svm node array. *)
+  let sparse_svm_node_array_of_vec v =
+    let count_nonzeros v = Vec.fold (fun count x ->
+      count + if x <> 0. then 1 else 0) 0 v
+    in
+    let size = count_nonzeros v + 1 in
+    let nodes = Stub.svm_node_array_create size in
+    let pos = ref 0 in
+    Vec.iteri (fun index value ->
+      if value <> 0. then begin
+        Stub.svm_node_array_set nodes !pos index value;
+        incr pos
+      end) v;
+    Stub.svm_node_array_set nodes !pos (-1) 0.;
+    nodes
+
   let svm_node_array_of_vec v =
     let n = Vec.dim v in
-    let node = Stub.svm_node_array_create (n+1) in
+    let nodes = Stub.svm_node_array_create (n+1) in
     Vec.iteri (fun index value ->
       let pos = index-1 in
-      Stub.svm_node_array_set node pos pos value) v;
-    Stub.svm_node_array_set node n (-1) 0.;
-    node
+      Stub.svm_node_array_set nodes pos pos value) v;
+    Stub.svm_node_array_set nodes n (-1) 0.;
+    nodes
 
   let svm_node_array_of_list l ~len =
     let size = len + 1 in
-    let node = Stub.svm_node_array_create size in
+    let nodes = Stub.svm_node_array_create size in
     List.iteri l ~f:(fun pos (index, value) ->
-      Stub.svm_node_array_set node pos index value);
-    Stub.svm_node_array_set node len (-1) 0.;
-    node
+      Stub.svm_node_array_set nodes pos index value);
+    Stub.svm_node_array_set nodes len (-1) 0.;
+    nodes
 
   let count_lines file =
     In_channel.with_file file ~f:(fun ic ->
     let get_n_samples t = t.n_samples
     let get_n_feats t = t.n_feats
 
-    let create ~x ~y =
+    let create_gen x y ~f =
       let n_samples = Mat.dim1 x in
       let n_feats = Mat.dim2 x in
       let x' = Mat.transpose x in
       let v = Stub.double_array_create n_samples in
       for i = 1 to n_samples do
         let x_row = Mat.col x' i in
-        Stub.svm_node_matrix_set m (i-1) (svm_node_array_of_vec x_row);
+        Stub.svm_node_matrix_set m (i-1) (f x_row);
         Stub.double_array_set v (i-1) y.{i}
       done;
       let prob = Stub.svm_problem_create () in
         prob;
       }
 
+    let create ~x ~y = create_gen x y ~f:sparse_svm_node_array_of_vec
+
+    let create_k ~k ~y = create_gen k y ~f:svm_node_array_of_vec
+
     let load file =
       let n_samples = count_lines file in
       let n_feats = ref 0 in
       let y = Stub.double_array_create n_samples in
       for i = 0 to n_samples-1 do
         let width = Stub.svm_problem_width t.prob i in
-        let node = Stub.svm_node_array_create (width+1) in
+        let nodes = Stub.svm_node_array_create (width+1) in
         for j = 0 to width-1 do
           let index, value = Stub.svm_problem_x_get t.prob i j in
           if Float.(=.) value min_feats.{index} then
-            Stub.svm_node_array_set node j index lower
+            Stub.svm_node_array_set nodes j index lower
           else if Float.(=.) value max_feats.{index} then
-            Stub.svm_node_array_set node j index upper
+            Stub.svm_node_array_set nodes j index upper
           else
             let new_value = lower +. (upper-.lower) *.
               (value-.min_feats.{index}) /.
               (max_feats.{index}-.min_feats.{index})
             in
-            Stub.svm_node_array_set node j index new_value
+            Stub.svm_node_array_set nodes j index new_value
         done;
-        Stub.svm_node_array_set node width (-1) 0.;
-        Stub.svm_node_matrix_set x i node;
+        Stub.svm_node_array_set nodes width (-1) 0.;
+        Stub.svm_node_matrix_set x i nodes;
         Stub.double_array_set y i (Stub.svm_problem_y_get t.prob i);
       done;
       let scaled_prob = Stub.svm_problem_create () in
     if not verbose then Stub.svm_set_quiet_mode () else ();
     Stub.svm_cross_validation problem.Problem.prob params n_folds
 
-  let predict_one model ~x = Stub.svm_predict model (svm_node_array_of_vec x)
+  let predict_one model ~x =
+    let nodes = match Stub.svm_get_kernel_type model with
+    | PRECOMPUTED -> svm_node_array_of_vec x
+    | _ -> sparse_svm_node_array_of_vec x
+    in
+    Stub.svm_predict model nodes
 
   let predict model ~x =
     let n = Mat.dim1 x in
     y
 
   let predict_values model ~x =
-    let dec_vals = Stub.svm_predict_values model (svm_node_array_of_vec x) in
+    let nodes = match Stub.svm_get_kernel_type model with
+      | PRECOMPUTED -> svm_node_array_of_vec x
+      | _ -> sparse_svm_node_array_of_vec x
+    in
+    let dec_vals = Stub.svm_predict_values model nodes in
     match Stub.svm_get_svm_type model with
     | EPSILON_SVR | NU_SVR | ONE_CLASS ->
       Array.make_matrix 1 1 dec_vals.(0)
       invalid_arg "One-class problems do not support probability estimates."
     | C_SVC | NU_SVC ->
       if Stub.svm_check_probability_model model then
-        Stub.svm_predict_probability model (svm_node_array_of_vec x)
+        let nodes = match Stub.svm_get_kernel_type model with
+        | PRECOMPUTED -> svm_node_array_of_vec x
+        | _ -> sparse_svm_node_array_of_vec x
+        in
+        Stub.svm_predict_probability model nodes
       else
         invalid_arg "Model does not support probability estimates."
 
         | Some line ->
           let target, feats = parse_line line ~pos:i in
           expected.{i} <- target;
-          let node = svm_node_array_of_list feats ~len:(List.length feats) in
-          predicted.{i} <- Stub.svm_predict model node;
+          let nodes = svm_node_array_of_list feats ~len:(List.length feats) in
+          predicted.{i} <- Stub.svm_predict model nodes;
           loop (i+1)
       in
       loop 1)
         instance. *)
     val create : x:mat -> y:vec -> t
 
+    (** [create_k k y] constructs a problem from a matrix [k] and target vector
+        [y]. The matrix [k] has to be of the following form:
+
+        1 K(x1,x1) K(x1,x2) ... K(x1,xL)
+
+        2 K(x2,x1) K(x2,x2) ... K(x2,xL)
+
+        ...
+
+        L K(xL,x1) K(xL,x2) ... K(xL,xL)
+
+        where L denotes the number of training instances and K(x,y) is the
+        precomputed kernel value of the two training instances x and y. *)
+    val create_k : k:mat -> y:vec -> t
+
     (** [get_n_samples prob] @return the number of training samples. *)
     val get_n_samples : t -> int
 

lib/libsvm_stubs.c

   CAMLreturn(v_type);
 }
 
+CAMLprim value svm_get_kernel_type_stub(value v_model)
+{
+  CAMLparam1(v_model);
+  CAMLlocal1(v_kernel);
+  v_kernel = Val_long(Svm_model_val(v_model)->param.kernel_type);
+  CAMLreturn(v_kernel);
+}
+
 CAMLprim value svm_get_nr_class_stub(value v_model)
 {
   CAMLparam1(v_model);
 (* setup.ml generated for the first time by OASIS v0.2.0 *)
 
 (* OASIS_START *)
-(* DO NOT EDIT (digest: 2067b6f2b07ddc80c37903c0b7ae4686) *)
+(* DO NOT EDIT (digest: fc0701cc2c8fa07c3173bbbee1259fcc) *)
 (*
    Regenerated by OASIS v0.3.0
    Visit http://oasis.forge.ocamlcore.org for more information and
           ocaml_version = Some (OASISVersion.VGreaterEqual "3.12");
           findlib_version = Some (OASISVersion.VGreaterEqual "1.3.1");
           name = "libsvm-ocaml";
-          version = "0.8";
+          version = "0.8.2";
           license =
             OASISLicense.DEP5License
               (OASISLicense.DEP5Unit
           };
      oasis_fn = Some "_oasis";
      oasis_version = "0.3.0";
-     oasis_digest = Some "\133\239\132p\005\018\147'^K^\234@h\\\157";
+     oasis_digest = Some ">\128\233\017H\170\192A\131.T\246\140~f\191";
      oasis_exec = None;
      oasis_setup_args = [];
      setup_update = false;
     [|1.; 0.; 0.; 0.; 0.|]; [|2.; 0.; 1.; 0.; 1.|];
     [|3.; 0.; 0.; 1.; 1.|]; [|4.; 0.; 1.; 1.; 2.|];
   |] in
-  let problem = Svm.Problem.create ~x:k ~y:targets in
+  let problem = Svm.Problem.create_k ~k ~y:targets in
   let model = Svm.train ~kernel:`PRECOMPUTED ~c:10.
     ~weights:[(1,10.);(0,1.)] problem
   in
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.