Markus Mottl avatar Markus Mottl committed 1b9b9b6 Merge

Merged sort branch

Comments (0)

Files changed (20)

 .PHONY:	clean
 clean: setup.ml
 	ocaml setup.ml -clean
-	$(RM) $(TARBALL)
+	$(RM) $(wildcard $(TARBALL))
 	-touch setup.ml # force reconfigure
 
 distclean: setup.ml
 (** Complex matrices (precision: complexxx). *)
 
 type trans3 = [ `C | `N | `T ]
+(** Transpose parameter (conjugate transposed, normal, or transposed). *)
+
 val prec : (Complex.t, complexxx_elt) Bigarray.kind
 (** Precision for this submodule {!CPREC}.  Allows to write precision
     independent code. *)
 (** Matrices (precision: floatxx). *)
 
 type trans3 = [ `N | `T ]
+(** Transpose parameter (normal or transposed).  For complex matrices,
+    conjugate transpose is also offered, hence the name. *)
 
 val prec : (float, floatxx_elt) Bigarray.kind
 (** Precision for this submodule {!FPREC}.  Allows to write precision
 
 exception InternalError of string
 
-type int_vec = (int32, int32_elt, fortran_layout) Array1.t
+type int_vec = (int, int_elt, fortran_layout) Array1.t
 
-let create_int_vec n = Array1.create int32 fortran_layout n
+let create_int_vec n = Array1.create int fortran_layout n
+
+type int32_vec = (int32, int32_elt, fortran_layout) Array1.t
+
+let create_int32_vec n = Array1.create int32 fortran_layout n
 
 let mat_from_vec v =
   array2_of_genarray (reshape (genarray_of_array1 v) [| Array1.dim v; 1 |])
 (** [InternalError msg] gets raised when BLAS or LAPACK exhibit undefined
     behaviour. *)
 
-type int_vec = (int32, int32_elt, fortran_layout) Array1.t
-(** Type of 32bit Fortran integer vectors. *)
+type int_vec = (int, int_elt, fortran_layout) Array1.t
+(** Type of OCaml integer vectors. *)
 
 val create_int_vec : int -> int_vec
 (** [create_int_vec n] @return an int-vector with [n] rows. *)
 
+type int32_vec = (int32, int32_elt, fortran_layout) Array1.t
+(** Type of 32bit Fortran integer vectors. *)
+
+val create_int32_vec : int -> int32_vec
+(** [create_int32_vec n] @return an int32-vector with [n] rows. *)
+
 val mat_from_vec : ('a, 'b, 'c) Array1.t -> ('a, 'b, 'c) Array2.t
 (** [mat_from_vec a] converts the vector [a] into a matrix with [Array1.dim a]
     rows and 1 column.  The data is shared between the two matrices. *)
   ar : int ->
   ac : int ->
   a : mat ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   work : vec ->
   anorm : float ->
   int * float = "lacaml_CPRECsycon_stub_bc" "lacaml_CPRECsycon_stub"
 val sycon :
     ?n : int ->
     ?up : bool ->
-    ?ipiv : int_vec ->
+    ?ipiv : int32_vec ->
     ?anorm : float ->
     ?work : vec ->
     ?ar : int ->
   ac : int ->
   a : mat ->
   work : vec ->
-  iwork : int_vec ->
+  iwork : int32_vec ->
   norm : char ->
   anorm : float ->
   int * float = "lacaml_FPRECgecon_stub_bc" "lacaml_FPRECgecon_stub"
   in
   let iwork, _liwork =
     get_work
-      loc Common.create_int_vec iwork
+      loc Common.create_int32_vec iwork
       (gecon_min_liwork n) (gecon_min_liwork n) liwork_str in
   let anorm =
     match anorm with
   ar : int ->
   ac : int ->
   a : mat ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   work : vec ->
-  iwork : int_vec ->
+  iwork : int32_vec ->
   anorm : float ->
   int * float = "lacaml_FPRECsycon_stub_bc" "lacaml_FPRECsycon_stub"
 
       loc Vec.create work (sycon_min_lwork n) (sycon_min_lwork n) lwork_str in
   let iwork, _liwork =
     get_work
-      loc Common.create_int_vec iwork
+      loc Common.create_int32_vec iwork
       (sycon_min_liwork n) (sycon_min_liwork n) liwork_str in
   let ipiv =
     if ipiv = None then sytrf ~n ~up ~work ~ar ~ac a
   ac : int ->
   a : mat ->
   work : vec ->
-  iwork : int_vec ->
+  iwork : int32_vec ->
   anorm : float ->
   int * float = "lacaml_FPRECpocon_stub_bc" "lacaml_FPRECpocon_stub"
 
       loc Vec.create work (pocon_min_lwork n) (pocon_min_lwork n) lwork_str in
   let iwork, _liwork =
     get_work
-      loc Common.create_int_vec iwork
+      loc Common.create_int32_vec iwork
       (pocon_min_liwork n) (pocon_min_liwork n) liwork_str in
   let anorm =
     match anorm with
   a : mat ->
   m : int ->
   n : int ->
-  jpvt : int_vec ->
+  jpvt : int32_vec ->
   rcond : float ->
   work : vec ->
   lwork : int ->
   let work = Vec.create 1 in
   let info, _ =
     direct_gelsy
-      ~ar ~ac ~a ~m ~n ~jpvt:empty_int_vec
+      ~ar ~ac ~a ~m ~n ~jpvt:empty_int32_vec
       ~rcond:(-1.0) ~work ~lwork:~-1 ~nrhs ~br ~bc ~b
   in
   if info = 0 then int_of_float work.{1}
           invalid_arg (sprintf "%s: jpvt: valid=[%d..[ got=%d" loc n dim_jpvt)
         else jpvt
     | None ->
-        let jpvt = create_int_vec n in
+        let jpvt = create_int32_vec n in
         Array1.fill jpvt Int32.zero;
         jpvt in
 
   vt : mat ->
   work : vec ->
   lwork : int ->
-  iwork : int_vec
+  iwork : int32_vec
   -> int = "lacaml_FPRECgesdd_stub_bc" "lacaml_FPRECgesdd_stub"
 
 let gesdd_min_lwork ?(jobz = `A) ~m ~n () =
         invalid_arg
           (sprintf "%s: liwork: valid=[%d..[ got=%d" loc min_liwork liwork);
       iwork
-  | None -> create_int_vec min_liwork
+  | None -> create_int32_vec min_liwork
 
 let gesdd_min_lwork_char jobz ~m ~n =
   let jobz =
   w : vec ->
   work : vec ->
   lwork : int ->
-  iwork : int_vec ->
+  iwork : int32_vec ->
   liwork : int
   -> int = "lacaml_FPRECsyevd_stub_bc" "lacaml_FPRECsyevd_stub"
 
 
 let syevd_get_opt_l_li_work loc ar ac a n vectors jobz uplo =
   let work = Vec.create 1 in
-  let iwork = Common.create_int_vec 1 in
+  let iwork = Common.create_int32_vec 1 in
   let info =
     direct_syevd
       ~ar ~ac ~a ~n ~jobz ~uplo ~ofsw:1 ~w:Vec.empty
     | Some work, None ->
         let lwork = Array1.dim work in
         let liwork = syevd_get_opt_liwork loc ar ac a n vectors jobz uplo in
-        let iwork = Common.create_int_vec liwork in
+        let iwork = Common.create_int32_vec liwork in
         work, iwork, lwork, liwork
     | None, Some iwork ->
         let lwork = syevd_get_opt_lwork loc ar ac a n vectors jobz uplo in
         let lwork, liwork =
           syevd_get_opt_l_li_work loc ar ac a n vectors jobz uplo in
         let work = Vec.create lwork in
-        let iwork = Common.create_int_vec liwork in
+        let iwork = Common.create_int32_vec liwork in
         work, iwork, lwork, liwork in
   let info =
     direct_syevd ~ar ~ac ~a ~n ~jobz ~uplo ~ofsw ~w ~work ~lwork ~iwork ~liwork
   zr : int ->
   zc : int ->
   z : mat ->
-  isuppz : int_vec ->
+  isuppz : int32_vec ->
   work : vec ->
   lwork : int ->
-  iwork : int_vec ->
+  iwork : int32_vec ->
   liwork : int
   -> int * int = "lacaml_FPRECsyevr_stub_bc" "lacaml_FPRECsyevr_stub"
 
 let syevr_get_opt_l_li_work
       loc ar ac a n jobz range uplo vl vu il iu abstol ofsw w zr zc z isuppz =
   let work = Vec.create 1 in
-  let iwork = Common.create_int_vec 1 in
+  let iwork = Common.create_int32_vec 1 in
   let info, _ =
     direct_syevr
       ~ar ~ac ~a ~n
   let z = Mat.create n 0 in
   let ofsw = 1 in
   let w = Vec.empty in
-  let isuppz = empty_int_vec in
+  let isuppz = empty_int32_vec in
   syevr_get_opt_l_li_work
     loc ar ac a n jobz range uplo vl vu il iu abstol ofsw w zr zc z isuppz
 
     let min_lisuppz_1 = max 1 m in
     let min_lisuppz = min_lisuppz_1 + min_lisuppz_1 in
     match isuppz with
-    | None -> create_int_vec min_lisuppz
+    | None -> create_int32_vec min_lisuppz
     | Some isuppz ->
         let lisuppz = Array1.dim isuppz in
         if lisuppz < min_lisuppz then
           syevr_get_opt_liwork
             loc ar ac a n
             jobz range uplo vl vu il iu abstol ofsw w zr zc z isuppz in
-        let iwork = Common.create_int_vec liwork in
+        let iwork = Common.create_int32_vec liwork in
         work, iwork, lwork, liwork
     | None, Some iwork ->
         let lwork =
             loc ar ac a n
             jobz range uplo vl vu il iu abstol ofsw w zr zc z isuppz in
         let work = Vec.create lwork in
-        let iwork = Common.create_int_vec liwork in
+        let iwork = Common.create_int32_vec liwork in
         work, iwork, lwork, liwork in
   let info, m =
     direct_syevr
   ?norm : norm2 ->
   ?anorm : float ->
   ?work : vec ->
-  ?iwork : int_vec ->
+  ?iwork : int32_vec ->
   ?ar : int ->
   ?ac : int ->
   mat
 val sycon :
     ?n : int ->
     ?up : bool ->
-    ?ipiv : int_vec ->
+    ?ipiv : int32_vec ->
     ?anorm : float ->
     ?work : vec ->
-    ?iwork : int_vec ->
+    ?iwork : int32_vec ->
     ?ar : int ->
     ?ac : int ->
     mat
     ?up : bool ->
     ?anorm : float ->
     ?work : vec ->
-    ?iwork : int_vec ->
+    ?iwork : int32_vec ->
     ?ar : int ->
     ?ac : int ->
     mat
   ?ac : int ->
   mat ->
   ?rcond : float ->
-  ?jpvt : int_vec ->
+  ?jpvt : int32_vec ->
   ?work : vec ->
   ?nrhs : int ->
   ?br : int ->
   ?s : vec ->
   ?ur : int -> ?uc : int -> ?u : mat ->
   ?vtr : int -> ?vtc : int -> ?vt : mat ->
-  ?iwork : int_vec ->
+  ?iwork : int32_vec ->
   ?ar : int -> ?ac : int -> mat
   -> int
 
   ?ur : int -> ?uc : int -> ?u : mat ->
   ?vtr : int -> ?vtc : int -> ?vt : mat ->
   ?work : vec ->
-  ?iwork : int_vec ->
+  ?iwork : int32_vec ->
   ?ar : int -> ?ac : int -> mat
   -> vec * mat * mat
 
   ?vectors : bool ->
   ?up : bool ->
   ?work : vec ->
-  ?iwork : int_vec ->
+  ?iwork : int32_vec ->
   ?ofsw : int ->
   ?w : vec ->
   ?ar : int ->
     @param vectors default = false i.e, eigenvectors are not computed
     @param up default = true i.e., upper triangle of [a] is stored
     @param work default = vec of optimum length (-> {!syev_opt_lwork})
-    @param iwork default = int_vec of optimum length (-> {!syevd_opt_liwork})
+    @param iwork default = int32_vec of optimum length (-> {!syevd_opt_liwork})
     @param ofsw default = 1 or ignored if [w] is not given
     @param w default = vec of length [n] *)
 
   ?up : bool ->
   ?abstol : float ->
   ?work : vec ->
-  ?iwork : int_vec ->
+  ?iwork : int32_vec ->
   ?ofsw : int ->
   ?w : vec ->
   ?zr : int ->
   ?zc : int ->
   ?z : mat ->
-  ?isuppz : int_vec ->
+  ?isuppz : int32_vec ->
   ?ar : int ->
   ?ac : int ->
   mat
-  -> int * vec * mat * int_vec
+  -> int * vec * mat * int32_vec
 (** [syevr
       ?n ?vectors ?range ?up ?abstol ?work ?iwork
       ?ofsw ?w ?zr ?zc ?z ?isuppz ?ar ?ac a]
     @param up default = true i.e., upper triangle of [a] is stored
     @param abstol default = result of calling [lamch `S]
     @param work default = vec of optimum length (-> {!syev_opt_lwork})
-    @param iwork default = int_vec of optimum length (-> {!syevr_opt_liwork})
+    @param iwork default = int32_vec of optimum length (-> {!syevr_opt_liwork})
     @param ofsw default = 1 or ignored if [w] is not given
     @param w default = vec of length [n]
     @param zr default = 1
     @param zc default = 1
     @param z default = matrix with minimal required dimension
-    @param isuppz default = [int_vec] with minimal required dimension
+    @param isuppz default = [int32_vec] with minimal required dimension
     @param ar default = 1
     @param ac default = 1 *)
 
 external direct_larnv :
   idist : int ->
   ofsiseed : int ->
-  iseed : int_vec ->
+  iseed : int32_vec ->
   n : int ->
   ofsx : int ->
   x : vec ->
   let iseed =
     match iseed with
     | None ->
-        let iseed = create_int_vec (ofsiseed + 3) in
+        let iseed = create_int32_vec (ofsiseed + 3) in
         for i = ofsiseed to ofsiseed + 3 do iseed.{i} <- Int32.one done;
         iseed
     | Some iseed ->
   ar : int ->
   ac : int ->
   a : mat ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   int = "lacaml_NPRECgetrf_stub_bc" "lacaml_NPRECgetrf_stub"
 
 let getrf ?m ?n ?ipiv ?(ar = 1) ?(ac = 1) a =
   br : int ->
   bc : int ->
   b : mat ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   int = "lacaml_NPRECgetrs_stub_bc" "lacaml_NPRECgetrs_stub"
 
 let getrs
   ar : int ->
   ac : int ->
   a : mat ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   work : vec ->
   lwork : int ->
   int = "lacaml_NPRECgetri_stub_bc" "lacaml_NPRECgetri_stub"
 
 let getri_get_opt_lwork loc n ar ac a =
   let work = Vec.create 1 in
-  let info = direct_getri ~n ~ar ~ac ~a ~ipiv:empty_int_vec ~work ~lwork:~-1 in
+  let info = direct_getri ~n ~ar ~ac ~a ~ipiv:empty_int32_vec ~work ~lwork:~-1 in
   if info = 0 then int_of_numberxx work.{1}
   else getri_err loc getri_min_lwork n a 1 info
 
   ar : int ->
   ac : int ->
   a : mat ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   work : vec ->
   lwork : int ->
   int = "lacaml_NPRECsytrf_stub_bc" "lacaml_NPRECsytrf_stub"
 let sytrf_get_opt_lwork loc uplo n ar ac a =
   let work = Vec.create 1 in
   let info =
-    direct_sytrf ~uplo ~n ~ar ~ac ~a ~ipiv:empty_int_vec ~work ~lwork:~-1
+    direct_sytrf ~uplo ~n ~ar ~ac ~a ~ipiv:empty_int32_vec ~work ~lwork:~-1
   in
   if info = 0 then int_of_numberxx work.{1}
   else sytrf_err loc n a info
   br : int ->
   bc : int ->
   b : mat ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   int = "lacaml_NPRECsytrs_stub_bc" "lacaml_NPRECsytrs_stub"
 
 let sytrs
   ar : int ->
   ac : int ->
   a : mat ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   work : vec ->
   int = "lacaml_NPRECsytri_stub_bc" "lacaml_NPRECsytri_stub"
 
   ac : int ->
   a : mat ->
   n : int ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   nrhs : int ->
   br : int ->
   bc : int ->
   n : int ->
   kl : int ->
   ku : int ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   nrhs : int ->
   br : int ->
   bc : int ->
   a : mat ->
   n : int ->
   uplo : char ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   work : vec ->
   lwork : int ->
   nrhs : int ->
   let info =
     direct_sysv
       ~ar ~ac ~a ~n ~uplo
-      ~ipiv:empty_int_vec ~work ~lwork:~-1 ~nrhs ~br ~bc ~b
+      ~ipiv:empty_int32_vec ~work ~lwork:~-1 ~nrhs ~br ~bc ~b
   in
   if info = 0 then int_of_numberxx work.{1}
   else xxsv_err loc n nrhs b (info + 1)
   ap : vec ->
   n : int ->
   uplo : char ->
-  ipiv : int_vec ->
+  ipiv : int32_vec ->
   nrhs : int ->
   br : int ->
   bc : int ->

lib/impl_SDCZ.mli

 val larnv :
   ?idist : [ `Uniform0 | `Uniform1 | `Normal ] ->
   ?ofsiseed : int ->
-  ?iseed : int_vec ->
+  ?iseed : int32_vec ->
   ?n : int ->
   ?ofsx : int ->
   ?x : vec ->
 val getrf :
   ?m : int ->
   ?n : int ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?ar : int ->
   ?ac : int ->
   mat ->
-  int_vec
+  int32_vec
 (** [getrf ?m ?n ?ipiv ?ar ?ac a] computes an LU factorization of a
     general [m]-by-[n] matrix [a] using partial pivoting with row
     interchanges.  See LAPACK documentation.
 
 val getrs :
   ?n : int ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?trans : trans3 ->
   ?ar : int ->
   ?ac : int ->
 
 val getri :
   ?n : int ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?work : vec ->
   ?ar : int ->
   ?ac : int ->
 val sytrf :
   ?n : int ->
   ?up : bool ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?work : vec ->
   ?ar : int ->
   ?ac : int ->
   mat ->
-  int_vec
+  int32_vec
 (** [sytrf ?n ?up ?ipiv ?work ?ar ?ac a] computes the factorization of
     the real symmetric matrix [a] using the Bunch-Kaufman diagonal
     pivoting method.
 val sytrs :
   ?n : int ->
   ?up : bool ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?ar : int ->
   ?ac : int ->
   mat ->
 val sytri :
   ?n : int ->
   ?up : bool ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?work : vec ->
   ?ar : int ->
   ?ac : int ->
 
 val gesv :
   ?n : int ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?ar : int ->
   ?ac : int ->
   mat ->
 
 val gbsv :
   ?n : int ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?abr : int ->
   ?abc : int ->
   mat ->
 val sysv :
   ?n : int ->
   ?up : bool ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?work : vec ->
   ?ar : int ->
   ?ac : int ->
 val spsv :
   ?n : int ->
   ?up : bool ->
-  ?ipiv : int_vec ->
+  ?ipiv : int32_vec ->
   ?ofsap : int ->
   vec ->
   ?nrhs : int ->
 open Common
 
 (* Zero-sized dummy vector (int) *)
-let empty_int_vec = create_int_vec 0
+let empty_int32_vec = create_int32_vec 0
 
 (* Char indicating if matrix is unit triangular *)
 let get_diag_char = function true -> 'U' | false -> 'N'
 
 let getrf_get_ipiv loc ipiv m n =
   match ipiv with
-  | None -> create_int_vec (min m n)
+  | None -> create_int32_vec (min m n)
   | Some ipiv ->
       check_vec loc ipiv_str ipiv (min m n);
       ipiv
 
 let sytrf_get_ipiv loc ipiv n =
   match ipiv with
-  | None -> create_int_vec n
+  | None -> create_int32_vec n
   | Some ipiv ->
       check_vec loc ipiv_str ipiv n;
       ipiv
 
 let xxsv_get_ipiv loc ipiv n =
   match ipiv with
-  | None -> create_int_vec n
+  | None -> create_int32_vec n
   | Some ipiv ->
       check_vec loc ipiv_str ipiv n;
       ipiv
   acc.r += x.r*x.r - y.i*y.i; \
   acc.i += x.r*y.i + x.i*y.r
 #include "fold2_col.c"
+
+/* Since executing the (small) callback may dominate the running time,
+ * specialize the function when the order is the lexicographical one
+ * on complex numbers.  In this case the callback is not used. */
+
+/* NaN are put last (greater than anything) to ensure the algo termination.
+   If both a and b are NaN, return false (consider NaN equal for this). */
+#define ANY_NAN(x) (isnan(x.r) || isnan(x.i))
+#define NAN_LAST(a, b, SORT)                                            \
+  (ANY_NAN(b) ? (!ANY_NAN(a)) : (!ANY_NAN(a) && (SORT)))
+
+#define NAME LFUN(sort_incr)
+#define NAME_PERM LFUN(sort_incr_perm)
+#define BC_NAME_PERM LFUN(sort_incr_perm_bc)
+#define OCAML_SORT_LT(a, b) \
+  NAN_LAST(a, b, a.r < b.r || (a.r == b.r && a.i < b.i))
+#include "vec_sort.c"
+
+#define NAME LFUN(sort_decr)
+#define NAME_PERM LFUN(sort_decr_perm)
+#define BC_NAME_PERM LFUN(sort_decr_perm_bc)
+#define OCAML_SORT_LT(a, b) \
+  NAN_LAST(a, b, a.r > b.r || (a.r == b.r && a.i > b.i))
+#include "vec_sort.c"
+
+#define NAME LFUN(sort)
+#define NAME_PERM LFUN(sort_perm)
+#define BC_NAME_PERM LFUN(sort_perm_bc)
+#define OCAML_SORT_LT(a, b)                                     \
+  NAN_LAST(a, b, (va = copy_two_doubles(a.r, a.i),              \
+                  vb = copy_two_doubles(b.r, b.i),              \
+                  Int_val(caml_callback2(vCMP, va, vb)) < 0))
+#define OCAML_SORT_CALLBACK
+#include "vec_sort.c"
+#undef OCAML_SORT_CALLBACK
 
      Christophe Troestler
      email: Christophe.Troestler@umons.ac.be
-     WWW: http://math.umh.ac.be/an/
+     WWW: http://math.umons.ac.be/anum/
 
    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Lesser General Public

File contents unchanged.

    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *)
 
+open Printf
 open Bigarray
-open Printf
+open Numberxx
+open Common
 open Utils
-open Numberxx
 
 (* Creation of vectors and dimension accessor *)
 
 
 (* SORT *)
 
+external direct_sort_incr :
+  cmp : (num_type -> num_type -> int) -> (* not used, ususal order *)
+  n : int ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  unit = "lacaml_NPRECsort_incr"
+
+external direct_sort_incr_perm :
+  cmp : (num_type -> num_type -> int) -> (* not used, ususal order *)
+  n : int ->
+  ofsp : int ->
+  incp : int ->
+  p : int_vec ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  unit = "lacaml_NPRECsort_incr_perm_bc" "lacaml_NPRECsort_incr_perm"
+
+external direct_sort_decr :
+  cmp : (num_type -> num_type -> int) -> (* not used, usual decreasing order *)
+  n : int ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  unit = "lacaml_NPRECsort_decr"
+
+external direct_sort_decr_perm :
+  cmp : (num_type -> num_type -> int) -> (* not used, ususal order *)
+  n : int ->
+  ofsp : int ->
+  incp : int ->
+  p : int_vec ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  unit = "lacaml_NPRECsort_decr_perm_bc" "lacaml_NPRECsort_decr_perm"
+
+external direct_sort :
+  cmp : (num_type -> num_type -> int) ->
+  n : int ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  unit = "lacaml_NPRECsort"
+
+external direct_sort_perm :
+  cmp : (num_type -> num_type -> int) -> (* not used, ususal order *)
+  n : int ->
+  ofsp : int ->
+  incp : int ->
+  p : int_vec ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  unit = "lacaml_NPRECsort_perm_bc" "lacaml_NPRECsort_perm"
+
+let vec_sort_str = "Vec.sort"
+let p_str = "p"
+let dummy_cmp _ _ = 0
+
+let sort ?cmp ?(decr = false) ?n ?ofsp ?incp ?p ?ofsx ?incx x =
+  let ofsx, incx = get_vec_geom vec_sort_str x_str ofsx incx in
+  let n = get_dim_vec vec_sort_str x_str ofsx incx x n_str n in
+  match p with
+  | None ->
+      (match cmp with
+       | None ->
+           if decr then direct_sort_decr ~cmp:dummy_cmp ~n ~ofsx ~incx ~x
+           else direct_sort_incr ~cmp:dummy_cmp ~n ~ofsx ~incx ~x
+       | Some cmp ->
+           let cmp = if decr then (fun x1 x2 -> cmp x2 x1) else cmp in
+           direct_sort ~cmp ~n ~ofsx ~incx ~x
+      )
+  | Some p ->
+      let ofsp, incp = get_vec_geom vec_sort_str p_str ofsp incp in
+      check_vec vec_sort_str p_str p n;
+      (match cmp with
+       | None ->
+           if decr then direct_sort_decr_perm ~cmp:dummy_cmp ~n
+                                              ~ofsp ~incp ~p ~ofsx ~incx ~x
+           else direct_sort_incr_perm ~cmp:dummy_cmp ~n
+                                      ~ofsp ~incp ~p ~ofsx ~incx ~x
+       | Some cmp ->
+           let cmp = if decr then (fun x1 x2 -> cmp x2 x1) else cmp in
+           direct_sort_perm ~cmp ~n ~ofsp ~incp ~p ~ofsx ~incx ~x
+      )
+
 
 (* NEG *)
 
 *)
 
 open Bigarray
+open Common
 open Numberxx
 
 (** {6 Creation/conversion of vectors and dimension accessor} *)
     @param incx default = 1
 *)
 
+val sort :
+  ?cmp : (num_type -> num_type -> int) ->
+  ?decr : bool ->
+  ?n : int ->
+  ?ofsp : int ->
+  ?incp : int ->
+  ?p : int_vec ->
+  ?ofsx : int ->
+  ?incx : int ->
+  vec
+  -> unit
+(** [sort ?cmp ?n ?ofsx ?incx x] sorts the array [x] in increasing
+    order according to the comparison function [cmp].
+
+    @param cmp a function such that [cmp a b < 0] if [a] is less than
+      [b], [cmp a b = 0] if [a] equal [b] and [cmp a b > 0] if [a] is
+      greater than [b] for the desired order.  Default: the usual
+      order on floating point values or the lexicographic order on
+      complex ones (a special routine makes it fast).  Whatever the
+      order you choose, NaNs (in any component for complex numbers)
+      are considered larger than any other value (so they will be
+      last, in no specified order, in the sorted vector).  Therefore,
+      NaN are never passed to [cmp].
+
+    @param p if you pass a vector of size [ofsp+(n - 1)(abs incp)],
+      the vector [x] will be unchanged and the permutation to sort it
+      will be stored in [p].  Thus [x.{p.{ofsp + (i-1) * incp}}] will
+      give the elements of [x] in increasing order.  Default: no
+      vector is provided.
+
+    @param decr sort in decreasing order (stays fast for the default [cmp]).
+    @param n default = greater [n] s.t. [ofsx+(n-1)(abs incx) <= dim x]
+    @param ofsp default = 1
+    @param incp default = 1
+    @param ofsx default = 1
+    @param incx default = 1
+ *)
+
 
 (** {6 Operations on two vectors} *)
 
 #define INIT 0.0
 #define FUNC(acc, x, y) x -= y; x *= x; acc += x
 #include "fold2_col.c"
+
+/* Since executing the (small) callback may dominate the running time,
+ * specialize the function when the order is the usual one on floats.
+ * In this case the callback is not used. */
+
+/* NaN are put last (greater than anything) to ensure the algo termination.
+   If both a and b are NaN, return false (consider NaN equal for this). */
+#define NAN_LAST(a, b, SORT)                            \
+  (isnan(b) ? (!isnan(a)) : (!isnan(a) && (SORT)))
+
+#define NAME LFUN(sort_incr)
+#define NAME_PERM LFUN(sort_incr_perm)
+#define BC_NAME_PERM LFUN(sort_incr_perm_bc)
+#define OCAML_SORT_LT(a, b) NAN_LAST(a, b, a < b)
+#include "vec_sort.c"
+
+#define NAME LFUN(sort_decr)
+#define NAME_PERM LFUN(sort_decr_perm)
+#define BC_NAME_PERM LFUN(sort_decr_perm_bc)
+#define OCAML_SORT_LT(a, b) NAN_LAST(a, b, a > b)
+#include "vec_sort.c"
+
+#define NAME LFUN(sort)
+#define NAME_PERM LFUN(sort_perm)
+#define BC_NAME_PERM LFUN(sort_perm_bc)
+#define OCAML_SORT_LT(a, b)                                     \
+  NAN_LAST(a, b, (va = caml_copy_double(a),                     \
+                  vb = caml_copy_double(b),                     \
+                  Int_val(caml_callback2(vCMP, va, vb)) < 0))
+#define OCAML_SORT_CALLBACK
+#include "vec_sort.c"
+#undef OCAML_SORT_CALLBACK
+/* File: vec_sort.c
+
+   Copyright (C) 2012-
+
+     Christophe Troestler
+     email: Christophe.Troestler@umons.ac.be
+     WWW: http://math.umons.ac.be/anum/
+
+   This library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2 of the License, or (at your option) any later version.
+
+   This library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with this library; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+/* Implementation of quicksort based on the glibc one. */
+
+#ifndef _VEC_SORT
+#define _VEC_SORT
+
+#define SWAP(TY, a, b)                          \
+  do {                                          \
+    TY tmp = *a;                                \
+    *a = *b;                                    \
+    *b = tmp;                                   \
+  } while(0)
+
+/* Discontinue quicksort algorithm when partition gets below this size.
+   This particular magic number was chosen to work best on a Sun 4/260. */
+#define MAX_THRESH 4
+
+/* The next 4 #defines implement a very fast in-line stack abstraction
+   to store unfulfilled partition obligations.  The stack node
+   declaration is performed in the body of the function below.
+
+     typedef struct
+       {
+         NUMBER *lo;
+         NUMBER *hi;
+       } stack_node;
+*/
+/* The stack needs log (total_elements) entries (we could even subtract
+   log(MAX_THRESH)).  Since total_elements has type size_t, we get as
+   upper bound for log (total_elements):
+   bits per byte (CHAR_BIT) * sizeof(size_t).  */
+#define STACK_SIZE      (8 * sizeof(size_t))
+#define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top))
+#define POP(low, high)  ((void) (--top, (low = top->lo), (high = top->hi)))
+#define STACK_NOT_EMPTY (stack < top)
+
+#endif /* _VEC_SORT */
+
+#define QUICKSORT(TY, base_ptr, INCX, max_thresh)                       \
+  if (N > MAX_THRESH)                                                   \
+    {                                                                   \
+      TY *lo = base_ptr;                                                \
+      TY *hi = &lo[(N - 1) * INCX];                                     \
+      struct {                                                          \
+        TY *lo;                                                         \
+        TY *hi;                                                         \
+      } stack[STACK_SIZE], *top = stack;                                \
+                                                                        \
+      PUSH (NULL, NULL);                                                \
+                                                                        \
+      while (STACK_NOT_EMPTY)                                           \
+        {                                                               \
+          TY *left_ptr;                                                 \
+          TY *right_ptr;                                                \
+                                                                        \
+          /* Select median value from among LO, MID, and HI. Rearrange  \
+             LO and HI so the three values are sorted. This lowers the  \
+             probability of picking a pathological pivot value and      \
+             skips a comparison for both the LEFT_PTR and RIGHT_PTR in  \
+             the while loops. */                                        \
+                                                                        \
+          TY *mid = lo + INCX * ((hi - lo) / INCX >> 1);                \
+                                                                        \
+          if (QUICKSORT_LT(mid, lo)) {                                  \
+            SWAP (TY, mid, lo);                                         \
+          }                                                             \
+          if (QUICKSORT_LT(hi, mid)) {                                  \
+            SWAP (TY, mid, hi);                                         \
+          }                                                             \
+          else                                                          \
+            goto jump_over;                                             \
+          if (QUICKSORT_LT(mid, lo)) {                                  \
+            SWAP (TY, mid, lo);                                         \
+          }                                                             \
+          jump_over:;                                                   \
+                                                                        \
+          left_ptr  = lo + INCX;                                        \
+          right_ptr = hi - INCX;                                        \
+                                                                        \
+          /* Here's the famous ``collapse the walls'' section of quicksort. \
+             Gotta like those tight inner loops!  They are the main reason \
+             that this algorithm runs much faster than others. */       \
+          do                                                            \
+            {                                                           \
+              while (QUICKSORT_LT(left_ptr, mid))                       \
+                left_ptr += INCX;                                       \
+                                                                        \
+              while (QUICKSORT_LT(mid, right_ptr))                      \
+                right_ptr -= INCX;                                      \
+                                                                        \
+              if (left_ptr < right_ptr)                                 \
+                {                                                       \
+                  SWAP (TY, left_ptr, right_ptr);                       \
+                  if (mid == left_ptr)                                  \
+                    mid = right_ptr;                                    \
+                  else if (mid == right_ptr)                            \
+                    mid = left_ptr;                                     \
+                  left_ptr += INCX;                                     \
+                  right_ptr -= INCX;                                    \
+                }                                                       \
+              else if (left_ptr == right_ptr)                           \
+                {                                                       \
+                  left_ptr += INCX;                                     \
+                  right_ptr -= INCX;                                    \
+                  break;                                                \
+                }                                                       \
+            }                                                           \
+          while (left_ptr <= right_ptr);                                \
+                                                                        \
+          /* Set up pointers for next iteration.  First determine whether \
+             left and right partitions are below the threshold size.  If so, \
+             ignore one or both.  Otherwise, push the larger partition's \
+             bounds on the stack and continue sorting the smaller one. */ \
+                                                                        \
+          if ((size_t) (right_ptr - lo) <= max_thresh)                  \
+            {                                                           \
+              if ((size_t) (hi - left_ptr) <= max_thresh)               \
+                /* Ignore both small partitions. */                     \
+                POP (lo, hi);                                           \
+              else                                                      \
+                /* Ignore small left partition. */                      \
+                lo = left_ptr;                                          \
+            }                                                           \
+          else if ((size_t) (hi - left_ptr) <= max_thresh)              \
+            /* Ignore small right partition. */                         \
+            hi = right_ptr;                                             \
+          else if ((right_ptr - lo) > (hi - left_ptr))                  \
+            {                                                           \
+              /* Push larger left partition indices. */                 \
+              PUSH (lo, right_ptr);                                     \
+              lo = left_ptr;                                            \
+            }                                                           \
+          else                                                          \
+            {                                                           \
+              /* Push larger right partition indices. */                \
+              PUSH (left_ptr, hi);                                      \
+              hi = right_ptr;                                           \
+            }                                                           \
+        }                                                               \
+    }                                                                   \
+                                                                        \
+  /* Once the BASE_PTR array is partially sorted by quicksort the rest  \
+     is completely sorted using insertion sort, since this is efficient \
+     for partitions below MAX_THRESH size. BASE_PTR points to the beginning \
+     of the array to sort, and END_PTR points at the very last element in \
+     the array (*not* one beyond it!). */                               \
+  {                                                                     \
+    TY *const end_ptr = &base_ptr[(N - 1) * INCX];                      \
+    TY *tmp_ptr = base_ptr;                                             \
+    TY *thresh = /* min(end_ptr, base_ptr + max_thresh) */              \
+      end_ptr < (base_ptr + max_thresh) ? end_ptr : (base_ptr + max_thresh); \
+    register TY *run_ptr;                                               \
+                                                                        \
+    /* Find smallest element in first threshold and place it at the     \
+       array's beginning.  This is the smallest array element,          \
+       and the operation speeds up insertion sort's inner loop. */      \
+                                                                        \
+    for (run_ptr = tmp_ptr + INCX; run_ptr <= thresh; run_ptr += INCX)  \
+      if (QUICKSORT_LT(run_ptr, tmp_ptr))                               \
+        tmp_ptr = run_ptr;                                              \
+                                                                        \
+    if (tmp_ptr != base_ptr) {                                          \
+      SWAP (TY, tmp_ptr, base_ptr);                                     \
+    }                                                                   \
+                                                                        \
+    /* Insertion sort, running from left-hand-side up to right-hand-side.  */ \
+                                                                        \
+    run_ptr = base_ptr + INCX;                                          \
+    while ((run_ptr += INCX) <= end_ptr)                                \
+      {                                                                 \
+        tmp_ptr = run_ptr - INCX;                                       \
+        while (QUICKSORT_LT(run_ptr, tmp_ptr))                          \
+          tmp_ptr -= INCX;                                              \
+                                                                        \
+        tmp_ptr += INCX;                                                \
+        if (tmp_ptr != run_ptr)                                         \
+          {                                                             \
+            TY *trav;                                                   \
+                                                                        \
+            trav = run_ptr + INCX;                                      \
+            while (--trav >= run_ptr)                                   \
+              {                                                         \
+                TY c = *trav;                                           \
+                TY *hi, *lo;                                            \
+                                                                        \
+                for (hi = lo = trav; (lo -= INCX) >= tmp_ptr; hi = lo)  \
+                  *hi = *lo;                                            \
+                *hi = c;                                                \
+              }                                                         \
+          }                                                             \
+      }                                                                 \
+  }
+
+
+CAMLprim value NAME(value vCMP, value vN,
+                    value vOFSX, value vINCX, value vX)
+{
+  CAMLparam2(vCMP, vX);
+#if defined(OCAML_SORT_CALLBACK)
+  CAMLlocal2(va, vb);
+#endif
+  const size_t GET_INT(N);
+  int GET_INT(INCX);
+  VEC_PARAMS(X);
+
+  NUMBER *const base_ptr = X_data;
+  const size_t max_thresh = MAX_THRESH * sizeof(NUMBER) * INCX;
+
+  if (N == 0) CAMLreturn(Val_unit);
+
+#ifndef OCAML_SORT_CALLBACK
+  caml_enter_blocking_section();  /* Allow other threads */
+#endif
+
+#define QUICKSORT_LT(a, b) OCAML_SORT_LT((*a), (*b))
+  QUICKSORT(NUMBER, base_ptr, INCX, max_thresh);
+#undef QUICKSORT_LT
+
+#ifndef OCAML_SORT_CALLBACK
+  caml_leave_blocking_section();  /* Disallow other threads */
+#endif
+
+  CAMLreturn(Val_unit);
+}
+
+
+CAMLprim value NAME_PERM(value vCMP, value vN,
+                         value vOFSP, value vINCP, value vP,
+                         value vOFSX, value vINCX, value vX)
+{
+  CAMLparam3(vCMP, vP, vX);
+#if defined(OCAML_SORT_CALLBACK)
+  CAMLlocal2(va, vb);
+#endif
+  const size_t GET_INT(N);
+  int GET_INT(INCX),
+      GET_INT(INCP);
+  VEC_PARAMS(X);
+  intnat OFSX = Long_val(vOFSX);
+  intnat *P_data = ((intnat *) Caml_ba_data_val(vP)) + (Long_val(vOFSP) - 1);
+  int i;
+
+  NUMBER *const X = X_data - OFSX;  /* so P values are FORTRAN indices */
+  intnat *const base_ptr = P_data;
+  const size_t max_thresh = MAX_THRESH * sizeof(intnat) * INCP;
+
+  if (N == 0) CAMLreturn(Val_unit);
+
+#ifndef OCAML_SORT_CALLBACK
+  caml_enter_blocking_section();  /* Allow other threads */
+#endif
+
+  /* Initialize the permutation to the "identity". */
+  for(i = 0; i < N; i += 1)
+    P_data[i * INCP] = OFSX + i * INCX;
+#define QUICKSORT_LT(a, b) OCAML_SORT_LT((X[*a]), (X[*b]))
+  QUICKSORT(intnat, base_ptr, INCP, max_thresh);
+#undef QUICKSORT_LT
+
+#ifndef OCAML_SORT_CALLBACK
+  caml_leave_blocking_section();  /* Disallow other threads */
+#endif
+
+  CAMLreturn(Val_unit);
+}
+
+
+CAMLprim value BC_NAME_PERM(value *argv, int argn)
+{
+  return NAME_PERM(argv[0], argv[1], argv[2], argv[3], argv[4],
+                   argv[5], argv[6], argv[7]);
+}
+
+#undef NAME
+#undef NAME_PERM
+#undef BC_NAME_PERM
+#undef OCAML_SORT_LT
       (* Files included, tailored with macros. *)
       dep ["compile"; "c"]
           ["lib"/"fold_col.c"; "lib"/"fold2_col.c";
-           "lib"/"vec_map.c"; "lib"/"vec_combine.c"];
+           "lib"/"vec_map.c"; "lib"/"vec_combine.c"; "lib"/"vec_sort.c"];
 
       (* Special rules for precision dependent C code. *)
       let lacaml_cc desc ~prod ~dep flags =
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.