Markus Mottl avatar Markus Mottl committed c78686d

Major overhaul

Comments (0)

Files changed (31)

+2009-05-30:  Major overhaul:
+
+             Fixed too liberal transpose parameters in several
+             functions.  The conjugate transpose was not available
+             in some.
+
+             Make convenience definitions for pp_num, pp_vec, pp_mat
+             available on opening Lacaml.Impl.{S,D,C,Z} to be able
+             to print more generically.
+
+             Added new functions:
+
+               * dotu  (only for complex numbers)
+               * dotc  (only for complex numbers)
+               * ger   (all numbers)
+               * lassq
+               * Mat.syrk_trace
+               * Mat.symm2_trace
+
+             Fixed several functions to allow for complex multipliers:
+             gemv, gbmv, symv, gemm, symm, trmm, syrk, syr2k.
+
+             Added new "stable" flag to Vec.sqr_nrm2 if over/underflow
+             protection is required.
+
+             Numerous internal improvements to make adding functionality
+             even easier and to improve performance and numeric
+             stability.
+
 2009-05-16:  Added new function:
 
                * Mat.syrk_diag
 
-	     Renamed functions and changed API:
+             Renamed functions and changed API:
 
                * Mat.prod_diag -> Mat.gemm_diag
                * Mat.prod_trace -> Mat.gemm_trace
 name="lacaml"
-version="5.1.0"
+version="5.2.0"
 description="LACAML - BLAS/LAPACK-interface for OCaml"
 
 requires="lacaml.core"
 
 OCAMLFLAGS := -w A -warn-error A -for-pack Lacaml
 LIB_PACK_NAME := lacaml
-#CFLAGS	:= -fPIC -DPIC -Wall -Werror -pedantic -Wno-unused -Wno-long-long
+#CFLAGS	:= -fPIC -DPIC -Wall -Werror -Wno-unused -Wno-long-long
 CFLAGS := -O2 -fPIC -DPIC
 CLIBS := $(LAPACK) $(BLAS)
 RESULT := lacaml
 let mat_of_vec v =
   array2_of_genarray (reshape (genarray_of_array1 v) [| Array1.dim v; 1 |])
 
+type trans2 = [ `N | `T ]
 type side = [ `L | `R ]
 type diag = [ `U | `N ]
 type norm2 = [ `O | `I ]
 
 open Bigarray
 
+type trans2 = [ `N | `T ]
+(** Transpose parameter (normal or transposed) *)
+
 type side = [ `L | `R ]
 (** Side parameter (left or right) *)
 
 type rvec = (float, float32_elt, fortran_layout) Array1.t
 type mat = (Complex.t, prec, fortran_layout) Array2.t
 
-type trans2 = [ `N | `C ]
 type trans3 = [ `N | `T | `C ]
 
 let prec = complex32
 type rvec = (float, float64_elt, fortran_layout) Array1.t
 type mat = (Complex.t, prec, fortran_layout) Array2.t
 
-type trans2 = [ `N | `C ]
 type trans3 = [ `N | `T | `C ]
 
 let prec = complex64
 type rvec = (float, float32_elt, fortran_layout) Array1.t
 type mat = (float, prec, fortran_layout) Array2.t
 
-type trans2 = [ `N | `T ]
 type trans3 = [ `N | `T ]
 
 let prec = float32
 type rvec = (float, float64_elt, fortran_layout) Array1.t
 type mat = (float, prec, fortran_layout) Array2.t
 
-type trans2 = [ `N | `T ]
 type trans3 = [ `N | `T ]
 
 let prec = float64
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
-#include "f2c.h"
 #include "lacaml_macros.h"
-#include "utils_c.h"
 
 CAMLprim value NAME(value vN, value vOFSX, value vINCX, value vX)
 {
 (** Modules with functions specialized for (S)ingle and (D)ouble
     precision, and for (C)omplex and double complex (Z) numbers. *)
 
+open Io
+
+module Real_io = struct
+  let pp_num ppf n = !pp_float_el_default ppf n
+  let pp_vec = pp_fvec
+  let pp_mat = pp_fmat
+end
+
+module Complex_io = struct
+  let pp_num ppf n = !pp_complex_el_default ppf n
+  let pp_vec = pp_cvec
+  let pp_mat = pp_cmat
+end
+
 module S = struct
   include Float32
 
   include Impl2_S
   include Impl4_S
 
+  include Real_io
+
   module Vec = struct
     include Vec2_S
     include Vec4_S
   include Impl2_D
   include Impl4_D
 
+  include Real_io
+
   module Vec = struct
     include Vec2_D
     include Vec4_D
   include Impl2_C
   include Impl4_C
 
+  include Complex_io
+
   module Vec = struct
     include Vec2_C
     include Vec4_C
 module Z = struct
   include Complex64
 
+  include Complex_io
+
   include Impl2_Z
   include Impl4_Z
 
 
 (* BLAS-1 interface *)
 
-(* NRM2 *)
-
-external direct_nrm2 :
+external direct_dotu :
   n : int ->
+  ofsy : int ->
+  incy : int ->
+  y : vec ->
   ofsx : int ->
   incx : int ->
   x : vec ->
-  float = "lacaml_CPRECnrm2_stub"
+  num_type = "lacaml_CPRECdotu_stub_bc" "lacaml_CPRECdotu_stub"
 
-let nrm2 ?n ?ofsx ?incx x =
-  let loc = "Lacaml.Impl.CPREC.nrm2" in
+external direct_dotc :
+  n : int ->
+  ofsy : int ->
+  incy : int ->
+  y : vec ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  num_type = "lacaml_CPRECdotc_stub_bc" "lacaml_CPRECdotc_stub"
+
+let gen_dot loc dot_fun ?n ?ofsx ?incx ~x ?ofsy ?incy y =
   let ofsx, incx = get_vec_geom loc x_str ofsx incx in
+  let ofsy, incy = get_vec_geom loc y_str ofsy incy in
   let n = get_dim_vec loc x_str ofsx incx x n_str n in
-  direct_nrm2 ~n ~ofsx ~incx ~x
+  check_vec loc y_str y (ofsy + (n - 1) * abs incy);
+  dot_fun ~n ~ofsy ~incy ~y ~ofsx ~incx ~x
+
+let dotu = gen_dot "Lacaml.Impl.CPREC.dotu" direct_dotu
+let dotc = gen_dot "Lacaml.Impl.CPREC.dotc" direct_dotc
 
 
 (* Auxiliary routines *)
 
 (** {6 BLAS-1 interface} *)
 
-(*
-val nrm2 : ?n : int -> ?ofsx : int -> ?incx : int -> vec -> float
-(** [nrm2 ?n ?ofsx ?incx x] see BLAS documentation!
+val dotu :
+  ?n : int ->
+  ?ofsx : int ->
+  ?incx : int ->
+  x : vec ->
+  ?ofsy : int ->
+  ?incy : int ->
+  vec
+  -> num_type
+(** [dotu ?n ?ofsy ?incy y ?ofsx ?incx x] see BLAS documentation!
+
     @param n default = greater n s.t. [ofsx+(n-1)(abs incx) <= dim x]
+    @param ofsy default = 1
+    @param incy default = 1
     @param ofsx default = 1
-    @param incx default = 1 *)
+    @param incx default = 1
+*)
+
+val dotc :
+  ?n : int ->
+  ?ofsx : int ->
+  ?incx : int ->
+  x : vec ->
+  ?ofsy : int ->
+  ?incy : int ->
+  vec
+  -> num_type
+(** [dotc ?n ?ofsy ?incy y ?ofsx ?incx x] see BLAS documentation!
+
+    @param n default = greater n s.t. [ofsx+(n-1)(abs incx) <= dim x]
+    @param ofsy default = 1
+    @param incy default = 1
+    @param ofsx default = 1
+    @param incx default = 1
 *)
 
 
 
 /*** BLAS-1 */
 
-/** NRM2 */
+/** DOTU */
 
-extern real scnrm2_(integer *N, complex *X, integer *INCX);
-extern doublereal dznrm2_(integer *N, doublecomplex *X, integer *INCX);
+extern COMPLEX FUN(dotu)(
+  integer *N,
+  COMPLEX *X, integer *INCX,
+  COMPLEX *Y, integer *INCY);
 
-CAMLprim value LFUN(nrm2_stub)(value vN, value vOFSX, value vINCX, value vX)
+CAMLprim value LFUN(dotu_stub)(
+  value vN,
+  value vOFSY, value vINCY, value vY,
+  value vOFSX, value vINCX, value vX)
 {
-  CAMLparam1(vX);
+  CAMLparam2(vX, vY);
 
-  int GET_INT(N),
-      GET_INT(INCX);
+  integer GET_INT(N),
+          GET_INT(INCX),
+          GET_INT(INCY);
 
-  REAL res;
+  COMPLEX res;
 
   VEC_PARAMS(X);
+  VEC_PARAMS(Y);
 
   caml_enter_blocking_section();  /* Allow other threads */
-#ifndef LACAML_DOUBLE
-  res = scnrm2_(&N, X_data, &INCX);
-#else
-  res = dznrm2_(&N, X_data, &INCX);
-#endif
+  res =
+    FUN(dotu)(
+      &N,
+      X_data, &INCX,
+      Y_data, &INCY);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(caml_copy_double(res));
+  CAMLreturn(copy_two_doubles(res.r, res.i));
+}
+
+CAMLprim value LFUN(dotu_stub_bc)(value *argv, int argn)
+{
+  return LFUN(dotu_stub)(argv[0], argv[1], argv[2], argv[3],
+                         argv[4], argv[5], argv[6]);
+}
+
+
+/** DOTC */
+
+extern COMPLEX FUN(dotc)(
+  integer *N,
+  COMPLEX *X, integer *INCX,
+  COMPLEX *Y, integer *INCY);
+
+CAMLprim value LFUN(dotc_stub)(
+  value vN,
+  value vOFSY, value vINCY, value vY,
+  value vOFSX, value vINCX, value vX)
+{
+  CAMLparam2(vX, vY);
+
+  integer GET_INT(N),
+          GET_INT(INCX),
+          GET_INT(INCY);
+
+  COMPLEX res;
+
+  VEC_PARAMS(X);
+  VEC_PARAMS(Y);
+
+  caml_enter_blocking_section();  /* Allow other threads */
+  res =
+    FUN(dotc)(
+      &N,
+      X_data, &INCX,
+      Y_data, &INCY);
+  caml_leave_blocking_section();  /* Disallow other threads */
+
+  CAMLreturn(copy_two_doubles(res.r, res.i));
+}
+
+CAMLprim value LFUN(dotc_stub_bc)(value *argv, int argn)
+{
+  return LFUN(dotc_stub)(argv[0], argv[1], argv[2], argv[3],
+                         argv[4], argv[5], argv[6]);
 }
 
 
 
   v_rcond = caml_copy_double(RCOND);
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
+  Field(v_res, 0) = Val_long(INFO);
   Field(v_res, 1) = v_rcond;
 
   CAMLreturn(v_res);
 
   v_rcond = caml_copy_double(RCOND);
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
+  Field(v_res, 0) = Val_long(INFO);
   Field(v_res, 1) = v_rcond;
 
   CAMLreturn(v_res);
 
   v_rcond = caml_copy_double(RCOND);
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
+  Field(v_res, 0) = Val_long(INFO);
   Field(v_res, 1) = v_rcond;
 
   CAMLreturn(v_res);
     &INFO);
   caml_leave_blocking_section(); /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(gesvd_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section(); /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(geev_stub_bc)(value *argv, int argn)
   direct_dot ~n ~ofsy ~incy ~y ~ofsx ~incx ~x
 
 
-(* NRM2 *)
-
-external direct_nrm2 :
-  n : int ->
-  ofsx : int ->
-  incx : int ->
-  x : vec
-  -> float = "lacaml_FPRECnrm2_stub"
-
-let nrm2 ?n ?ofsx ?incx x =
-  let loc = "Lacaml.Impl.FPREC.nrm2" in
-  let ofsx, incx = get_vec_geom loc x_str ofsx incx in
-  let n = get_dim_vec loc x_str ofsx incx x n_str n in
-  direct_nrm2 ~n ~ofsx ~incx ~x
-
-
 (* ASUM *)
 
 external direct_asum :
   x : vec
   -> unit = "lacaml_FPRECsbmv_stub_bc" "lacaml_FPRECsbmv_stub"
 
-let sbmv ?ofsy ?incy ?y ?(ar = 1) ?(ac = 1) a ?n ?k ?(up = true) ?(alpha = 1.0)
+let sbmv ?n ?k ?ofsy ?incy ?y ?(ar = 1) ?(ac = 1) a ?(up = true) ?(alpha = 1.0)
     ?(beta = 0.0) ?ofsx ?incx x =
   let loc = "Lacaml.Impl.FPREC.sbmv" in
   (* [a] is a band matrix of size [k+1]*[n]. *)
     ~alpha ~beta ~ofsx ~incx ~x;
   y
 
+(* GER *)
+
+external direct_ger :
+  m : int ->
+  n : int ->
+  alpha : float ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  ofsy : int ->
+  incy : int ->
+  y : vec ->
+  ar : int ->
+  ac : int ->
+  a : mat
+  -> unit = "lacaml_FPRECger_stub_bc" "lacaml_FPRECger_stub"
+
+let ger ?m ?n ?(alpha = 1.0) ?ofsx ?incx x ?ofsy ?incy y ?(ar = 1) ?(ac = 1) a =
+  let loc = "Lacaml.Impl.FPREC.ger" in
+  let m = get_dim1_mat loc a_str a ar m_str m in
+  let n = get_dim2_mat loc a_str a ac n_str n in
+  let ofsx, incx = get_vec_geom loc x_str ofsx incx in
+  check_vec loc x_str x (ofsx + (m - 1) * abs incx);
+  let ofsy, incy = get_vec_geom loc y_str ofsy incy in
+  check_vec loc y_str y (ofsy + (n - 1) * abs incy);
+  direct_ger ~m ~n ~alpha ~ofsx ~incx ~x ~ofsy ~incy ~y ~ar ~ac ~a;
+  a
+
 (* SYR *)
 
 external direct_syr :
   a : mat
   -> unit = "lacaml_FPRECsyr_stub_bc" "lacaml_FPRECsyr_stub"
 
-let syr ?(alpha = 1.0) ?(up = true) ?ofsx ?incx x ?n ?(ar = 1) ?(ac = 1) a =
+let syr ?n ?(alpha = 1.0) ?(up = true) ?ofsx ?incx x ?(ar = 1) ?(ac = 1) a =
   let loc = "Lacaml.Impl.FPREC.syr" in
   let n = get_n_of_a loc ar ac a n in
   let ofsx, incx = get_vec_geom loc x_str ofsx incx in
   vec
   -> float
 (** [dot ?n ?ofsy ?incy y ?ofsx ?incx x] see BLAS documentation!
+
     @param n default = greater n s.t. [ofsx+(n-1)(abs incx) <= dim x]
     @param ofsy default = 1
     @param incy default = 1
     @param ofsx default = 1
-    @param incx default = 1 *)
-
-val nrm2 : ?n : int -> ?ofsx : int -> ?incx : int -> vec -> float
-(** [nrm2 ?n ?ofsx ?incx x] see BLAS documentation!
-    @param n default = greater n s.t. [ofsx+(n-1)(abs incx) <= dim x]
-    @param ofsx default = 1
-    @param incx default = 1 *)
+    @param incx default = 1
+*)
 
 val asum : ?n : int -> ?ofsx : int -> ?incx : int -> vec -> float
 (** [asum ?n ?ofsx ?incx x] see BLAS documentation!
 (** {6 BLAS-2 interface} *)
 
 val sbmv :
+  ?n : int ->
+  ?k : int ->
   ?ofsy : int ->
   ?incy : int ->
   ?y : vec ->
   ?ar : int ->
   ?ac : int ->
   mat ->
-  ?n : int ->
-  ?k : int ->
   ?up : bool ->
   ?alpha : float ->
   ?beta : float ->
   ?incx : int ->
   vec
   -> vec
-(** [sbmv ?ofsy ?incy ?y ?ar ?ac a ?n ?k ?up ?alpha ?beta ?ofsx ?incx x] see
+(** [sbmv ?n ?k ?ofsy ?incy ?y ?ar ?ac a ?up ?alpha ?beta ?ofsx ?incx x] see
     BLAS documentation!
+
     @return vector [y], which is overwritten.
+
+    @param n default = number of available columns to the right of [ac].
+    @param k default = number of available rows in matrix [a] - 1
     @param ofsy default = 1
     @param incy default = 1
     @param ar default = 1
     @param ac default = 1
     @param y default = uninitialized vector of minimal length (see BLAS)
-    @param n default = number of available columns to the right of [ac].
-    @param k default = number of available rows in matrix [a] - 1
     @param up default = true i.e., upper band of [a] is supplied
     @param alpha default = 1.0
     @param beta default = 0.0
     @param ofsx default = 1
-    @param incx default = 1 *)
+    @param incx default = 1
+*)
+
+val ger :
+  ?m : int ->
+  ?n : int ->
+  ?alpha : float ->
+  ?ofsx : int ->
+  ?incx : int ->
+  vec ->
+  ?ofsy : int ->
+  ?incy : int ->
+  vec ->
+  ?ar : int ->
+  ?ac : int ->
+  mat
+  -> mat
+(** [ger ?m ?n ?alpha ?ofsx ?incx x ?ofsy ?incy y n ?ar ?ac a] see
+    BLAS documentation!
+
+    @return vector [a], which is overwritten
+
+    @param m default = number of rows of [a]
+    @param n default = number of columns of [a]
+    @param alpha default = 1.0
+    @param ofsx default = 1
+    @param incx default = 1
+    @param ofsy default = 1
+    @param incy default = 1
+    @param ar default = 1
+    @param ac default = 1
+*)
 
 val syr :
+  ?n : int ->
   ?alpha : float ->
   ?up : bool ->
   ?ofsx : int ->
   ?incx : int ->
   vec ->
-  ?n : int ->
   ?ar : int ->
   ?ac : int ->
   mat
   -> mat
-(** [syr ?alpha ?up ?ofsx ?incx x ?n ?ar ?ac a] see BLAS documentation!
+(** [syr ?n ?alpha ?up ?ofsx ?incx x ?ar ?ac a] see BLAS documentation!
+
     @return vector [a], which is overwritten
+
+    @param n default = number of rows of [a]
     @param alpha default = 1.0
     @param up default = true i.e., upper triangle of [a] is supplied
     @param ofsx default = 1
     @param incx default = 1
     @param ar default = 1
     @param ac default = 1
-    @param n default = number of rows of [a] *)
+*)
 
 (** {6 LAPACK interface} *)
 
   y
 
 
+(* NRM2 *)
+
+external direct_nrm2 :
+  n : int ->
+  ofsx : int ->
+  incx : int ->
+  x : vec
+  -> float = "lacaml_NPRECnrm2_stub"
+
+let nrm2 ?n ?ofsx ?incx x =
+  let loc = "Lacaml.Impl.NPREC.nrm2" in
+  let ofsx, incx = get_vec_geom loc x_str ofsx incx in
+  let n = get_dim_vec loc x_str ofsx incx x n_str n in
+  direct_nrm2 ~n ~ofsx ~incx ~x
+
+
 (* AXPY *)
 
 external direct_axpy :
   m : int ->
   n : int ->
   trans : char ->
-  alpha : float ->
-  beta : float ->
+  alpha : num_type ->
+  beta : num_type ->
   ofsx : int ->
   incx : int ->
   x : vec ->
   unit = "lacaml_NPRECgemv_stub_bc" "lacaml_NPRECgemv_stub"
 
-let gemv ?m ?n ?(beta = 0.0) ?ofsy ?incy ?y ?(trans = `N) ?(alpha = 1.0)
+let gemv ?m ?n ?(beta = zero) ?ofsy ?incy ?y ?(trans = `N) ?(alpha = one)
       ?(ar = 1) ?(ac = 1) a ?ofsx ?incx x =
   let loc = "Lacaml.Impl.NPREC.gemv" in
   let m, n, ofsx, incx, ofsy, incy, y, trans =
   kl : int ->
   ku : int ->
   trans : char ->
-  alpha : float ->
-  beta : float ->
+  alpha : num_type ->
+  beta : num_type ->
   ofsx : int ->
   incx : int ->
   x : vec ->
   unit = "lacaml_NPRECgbmv_stub_bc" "lacaml_NPRECgbmv_stub"
 
-let gbmv ?m ?n ?(beta = 0.0) ?ofsy ?incy ?y ?(trans = `N) ?(alpha = 1.0)
+let gbmv ?m ?n ?(beta = zero) ?ofsy ?incy ?y ?(trans = `N) ?(alpha = one)
       ?(ar = 1) ?(ac = 1) a kl ku ?ofsx ?incx x =
   let loc = "Lacaml.Impl.NPREC.gbmv" in
   check_var_ltz loc kl_str kl;
   a : mat ->
   n : int ->
   uplo : char ->
-  alpha : float ->
-  beta : float ->
+  alpha : num_type ->
+  beta : num_type ->
   ofsx : int ->
   incx : int ->
   x : vec ->
   unit = "lacaml_NPRECsymv_stub_bc" "lacaml_NPRECsymv_stub"
 
-let symv ?n ?(beta = 0.0) ?ofsy ?incy ?y ?(up = true) ?(alpha = 1.0)
+let symv ?n ?(beta = zero) ?ofsy ?incy ?y ?(up = true) ?(alpha = one)
       ?(ar = 1) ?(ac = 1) a ?ofsx ?incx x =
   let loc = "Lacaml.Impl.NPREC.symv" in
   let n, ofsx, incx, ofsy, incy, y, uplo =
   cr : int ->
   cc : int ->
   c : mat ->
-  alpha : float ->
-  beta : float ->
+  alpha : num_type ->
+  beta : num_type ->
   unit = "lacaml_NPRECgemm_stub_bc" "lacaml_NPRECgemm_stub"
 
-let gemm ?m ?n ?k ?(beta = 0.0) ?(cr = 1) ?(cc = 1) ?c
-      ?(transa = `N) ?(alpha = 1.0) ?(ar = 1) ?(ac = 1) a
+let gemm ?m ?n ?k ?(beta = zero) ?(cr = 1) ?(cc = 1) ?c
+      ?(transa = `N) ?(alpha = one) ?(ar = 1) ?(ac = 1) a
       ?(transb = `N) ?(br = 1) ?(bc = 1) b =
   let loc = "Lacaml.Impl.NPREC.gemm" in
   let m, n, k, transa, transb, c =
   cr : int ->
   cc : int ->
   c : mat ->
-  alpha : float ->
-  beta : float ->
+  alpha : num_type ->
+  beta : num_type ->
   unit = "lacaml_NPRECsymm_stub_bc" "lacaml_NPRECsymm_stub"
 
 let symm ?m ?n ?(side = `L) ?(up = true)
-      ?(beta = 0.0) ?(cr = 1) ?(cc = 1) ?c
-      ?(alpha = 1.0) ?(ar = 1) ?(ac = 1) a ?(br = 1) ?(bc = 1) b =
+      ?(beta = zero) ?(cr = 1) ?(cc = 1) ?c
+      ?(alpha = one) ?(ar = 1) ?(ac = 1) a ?(br = 1) ?(bc = 1) b =
   let loc = "Lacaml.Impl.NPREC.symm" in
   let m, n, side, uplo, c =
     symm_get_params loc Mat.create ar ac a br bc b cr cc c m n side up in
   br : int ->
   bc : int ->
   b : mat ->
-  alpha : float ->
+  alpha : num_type ->
   unit = "lacaml_NPRECtrmm_stub_bc" "lacaml_NPRECtrmm_stub"
 
 let trmm ?m ?n ?(side = `L) ?(up = true) ?(trans = `N) ?(diag = `N)
-      ?(br = 1) ?(bc = 1) ~b ?(alpha = 1.0) ?(ar = 1) ?(ac = 1) a =
+      ?(br = 1) ?(bc = 1) ~b ?(alpha = one) ?(ar = 1) ?(ac = 1) a =
   let loc = "Lacaml.Impl.NPREC.trmm" in
   let m, n, side, uplo, trans, diag =
     trmm_get_params loc ar ac a br bc b m n side up trans diag
   cr : int ->
   cc : int ->
   c : mat ->
-  alpha : float ->
-  beta : float ->
+  alpha : num_type ->
+  beta : num_type ->
   unit = "lacaml_NPRECsyrk_stub_bc" "lacaml_NPRECsyrk_stub"
 
-let syrk ?n ?k ?(up = true) ?(beta = 0.0) ?(cr = 1) ?(cc = 1) ?c
-      ?(trans = `N) ?(alpha = 1.0) ?(ar = 1) ?(ac = 1) a =
+let syrk ?n ?k ?(up = true) ?(beta = zero) ?(cr = 1) ?(cc = 1) ?c
+      ?(trans = `N) ?(alpha = one) ?(ar = 1) ?(ac = 1) a =
   let loc = "Lacaml.Impl.NPREC.syrk" in
   let n, k, uplo, trans, c =
     syrk_get_params loc Mat.create ar ac a cr cc c n k up trans in
   cr : int ->
   cc : int ->
   c : mat ->
-  alpha : float ->
-  beta : float ->
+  alpha : num_type ->
+  beta : num_type ->
   unit = "lacaml_NPRECsyr2k_stub_bc" "lacaml_NPRECsyr2k_stub"
 
-let syr2k ?n ?k ?(up = true) ?(beta = 0.0) ?(cr = 1) ?(cc = 1) ?c
-      ?(trans = `N) ?(alpha = 1.0) ?(ar = 1) ?(ac = 1) a ?(br = 1) ?(bc = 1) b =
+let syr2k ?n ?k ?(up = true) ?(beta = zero) ?(cr = 1) ?(cc = 1) ?c
+      ?(trans = `N) ?(alpha = one) ?(ar = 1) ?(ac = 1) a ?(br = 1) ?(bc = 1) b =
   let loc = "Lacaml.Impl.NPREC.syr2k" in
   let n, k, uplo, trans, c =
     syr2k_get_params loc Mat.create ar ac a br bc b cr cc c n k up trans
   b
 
 
+(* LASSQ *)
+
+external direct_lassq :
+  n : int ->
+  ofsx : int ->
+  incx : int ->
+  x : vec ->
+  scale : float ->
+  sumsq : float ->
+  float * float = "lacaml_NPREClassq_stub_bc" "lacaml_NPREClassq_stub"
+
+let lassq ?n ?(scale = 0.) ?(sumsq = 1.) ?ofsx ?incx x =
+  let loc = "Lacaml.Impl.NPREC.lassq" in
+  let ofsx, incx = get_vec_geom loc x_str ofsx incx in
+  let n = get_dim_vec loc x_str ofsx incx x n_str n in
+  direct_lassq ~n ~ofsx ~incx ~x ~scale ~sumsq
+
+
 (* LARNV *)
 
 external direct_larnv :

lib/impl_SDCZ.mli

   vec ->
   unit
 (** [swap ?n ?ofsx ?incx ~x ?ofsy ?incy y] see BLAS documentation!
-    @param n default = 1
+    @param n default = greater n s.t. [ofsx+(n-1)(abs incx) <= dim x]
     @param ofsx default = 1
     @param incx default = 1
     @param ofsy default = 1
     @param ofsx default = 1
     @param incx default = 1 *)
 
+val nrm2 : ?n : int -> ?ofsx : int -> ?incx : int -> vec -> float
+(** [nrm2 ?n ?ofsx ?incx x] see BLAS documentation!
+    @param n default = greater n s.t. [ofsx+(n-1)(abs incx) <= dim x]
+    @param ofsx default = 1
+    @param incx default = 1
+*)
+
 val axpy :
   ?n : int ->
   ?alpha : num_type ->
   unit
 (** [axpy ?n ?alpha ?ofsx ?incx ~x ?ofsy ?incy y] see BLAS documentation!
     @param n default = greater n s.t. [ofsx+(n-1)(abs incx) <= dim x]
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ofsx default = 1
     @param incx default = 1
     @param ofsy default = 1
 val gemv :
   ?m : int ->
   ?n : int ->
-  ?beta : float  ->
+  ?beta : num_type  ->
   ?ofsy : int ->
   ?incy : int ->
   ?y : vec ->
   ?trans : trans3 ->
-  ?alpha : float ->
+  ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
     @return vector [y], which is overwritten.
     @param m default = number of available rows in matrix [a]
     @param n default = available columns in matrix [a]
-    @param beta default = 0.0
+    @param beta default = [{ re = 0.; im = 0. }]
     @param ofsy default = 1
     @param incy default = 1
     @param y default = vector with minimal required length (see BLAS)
     @param trans default = `N
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ar default = 1
     @param ac default = 1
     @param ofsx default = 1
 val gbmv :
   ?m : int ->
   ?n : int ->
-  ?beta : float ->
+  ?beta : num_type ->
   ?ofsy : int ->
   ?incy : int ->
   ?y : vec ->
   ?trans : trans3 ->
-  ?alpha : float ->
+  ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
     @return vector [y], which is overwritten.
     @param m default = same as [n] (i.e., [a] is a square matrix)
     @param n default = available number of columns in matrix [a]
-    @param beta default = 0.0
+    @param beta default = [{ re = 0.; im = 0. }]
     @param ofsy default = 1
     @param incy default = 1
     @param y default = vector with minimal required length (see BLAS)
     @param trans default = `N
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ar default = 1
     @param ac default = 1
     @param ofsx default = 1
 
 val symv :
   ?n : int ->
-  ?beta : float ->
+  ?beta : num_type ->
   ?ofsy : int ->
   ?incy : int ->
   ?y : vec ->
   ?up : bool ->
-  ?alpha : float ->
+  ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
     see BLAS documentation!
     @return vector [y], which is overwritten.
     @param n default = dimension of symmetric matrix [a]
-    @param beta default = 0.0
+    @param beta default = [{ re = 0.; im = 0. }]
     @param ofsy default = 1
     @param incy default = 1
     @param y default = vector with minimal required length (see BLAS)
     @param up default = true (upper triangular portion of [a] is accessed)
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ar default = 1
     @param ac default = 1
     @param ofsx default = 1
   ?m : int ->
   ?n : int ->
   ?k : int ->
-  ?beta : float ->
+  ?beta : num_type ->
   ?cr : int ->
   ?cc : int ->
   ?c : mat ->
   ?transa : trans3 ->
-  ?alpha : float ->
+  ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
     @param n default = number of columns of [b] (or tr [b]) and [c]
     @param k default = number of columns of [a] (or tr [a]) and
                        number of rows of [b] (or tr [b])
-    @param beta default = 0.0
+    @param beta default = [{ re = 0.; im = 0. }]
     @param cr default = 1
     @param cc default = 1
     @param c default = matrix with minimal required dimension
     @param transa default = `N
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ar default = 1
     @param ac default = 1
     @param transb default = `N
   ?n : int ->
   ?side : side ->
   ?up : bool ->
-  ?beta : float ->
+  ?beta : num_type ->
   ?cr : int ->
   ?cc : int ->
   ?c : mat ->
-  ?alpha : float ->
+  ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
     @param n default = number of columns of [c]
     @param side default = `L (left - multiplication is [a][b])
     @param up default = true (upper triangular portion of [a] is accessed)
-    @param beta default = 0.0
+    @param beta default = [{ re = 0.; im = 0. }]
     @param cr default = 1
     @param cc default = 1
     @param c default = matrix with minimal required dimension
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ar default = 1
     @param ac default = 1
     @param br default = 1
   ?br : int ->
   ?bc : int ->
   b : mat ->
-  ?alpha : float ->
+  ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
     @param up default = true (upper triangular portion of [a] is accessed)
     @param trans default = `N
     @param diag default = `N (non-unit)
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ar default = 1
     @param ac default = 1
     @param br default = 1
   ?n : int ->
   ?k : int ->
   ?up : bool ->
-  ?beta : float ->
+  ?beta : num_type ->
   ?cr : int ->
   ?cc : int ->
   ?c : mat ->
-  ?trans : trans3 ->
-  ?alpha : float ->
+  ?trans : trans2 ->
+  ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
     @param n default = number of rows of [a] (or [a]'), [c]
     @param k default = number of columns of [a] (or [a]')
     @param up default = true (upper triangular portion of [c] is accessed)
-    @param beta default = 0.0
+    @param beta default = [{ re = 0.; im = 0. }]
     @param cr default = 1
     @param cc default = 1
     @param c default = matrix with minimal required dimension
     @param trans default = `N
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ar default = 1
     @param ac default = 1 *)
 
   ?n : int ->
   ?k : int ->
   ?up : bool ->
-  ?beta : float ->
+  ?beta : num_type ->
   ?cr : int ->
   ?cc : int ->
   ?c : mat ->
-  ?trans : trans3 ->
-  ?alpha : float ->
+  ?trans : trans2 ->
+  ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
     @param n default = number of rows of [a] (or [a]'), [c]
     @param k default = number of columns of [a] (or [a]')
     @param up default = true (upper triangular portion of [c] is accessed)
-    @param beta default = 0.0
+    @param beta default = [{ re = 0.; im = 0. }]
     @param cr default = 1
     @param cc default = 1
     @param c default = matrix with minimal required dimension
     @param trans default = `N
-    @param alpha default = 1.0
+    @param alpha default = [{ re = 1.; im = 0. }]
     @param ar default = 1
     @param ac default = 1
     @param br default = 1
     @param uplo default = whole matrix
 *)
 
+val lassq :
+  ?n : int ->
+  ?scale : float ->
+  ?sumsq : float ->
+  ?ofsx : int ->
+  ?incx : int ->
+  vec ->
+  float * float
+(** [lassq ?n ?ofsx ?incx ?scale ?sumsq] @return [(scl, ssq)], where
+    [scl] is a scaling factor and [ssq] the sum of squares of vector
+    [x] starting at [ofs] and using increment [incx] and initial
+    [scale] and [sumsq].  See LAPACK-documentation for details!
+
+    @param n default = greater n s.t. [ofsx+(n-1)(abs incx) <= dim x]
+    @param ofsx default = 1
+    @param incx default = 1
+    @param scale default = 0.
+    @param sumsq default = 1.
+*)
+
 val larnv :
   ?idist : [ `Uniform0 | `Uniform1 | `Normal ] ->
   ?ofsiseed : int ->

lib/impl_SDCZ_c.c

   integer GET_INT(N),
           GET_INT(INCX);
 
-  CREATE_NUMBERP(ALPHA);
+  CREATE_NUMBER(ALPHA);
 
   VEC_PARAMS(X);
 
   caml_enter_blocking_section();  /* Allow other threads */
   FUN(scal)(
     &N,
-    pALPHA,
+    &ALPHA,
     X_data, &INCX);
   caml_leave_blocking_section();  /* Disallow other threads */
 
 }
 
 
+/** NRM2 */
+
+#ifndef LACAML_COMPLEX          /* Real number */
+extern REAL FUN(nrm2)(integer *N, REAL *X, integer *INCX);
+#else
+#ifndef LACAML_DOUBLE
+extern real scnrm2_(integer *N, complex *X, integer *INCX);
+#else
+extern doublereal dznrm2_(integer *N, doublecomplex *X, integer *INCX);
+#endif
+#endif
+
+CAMLprim value LFUN(nrm2_stub)(value vN, value vOFSX, value vINCX, value vX)
+{
+  CAMLparam1(vX);
+
+  integer GET_INT(N),
+          GET_INT(INCX);
+
+  REAL res;
+
+  VEC_PARAMS(X);
+
+  caml_enter_blocking_section();  /* Allow other threads */
+#ifndef LACAML_COMPLEX          /* Real number */
+  res = FUN(nrm2)(&N, X_data, &INCX);
+#else
+#ifndef LACAML_DOUBLE
+  res = scnrm2_(&N, X_data, &INCX);
+#else
+  res = dznrm2_(&N, X_data, &INCX);
+#endif
+#endif
+  caml_leave_blocking_section();  /* Disallow other threads */
+
+  CAMLreturn(caml_copy_double(res));
+}
+
+
 /** AXPY */
 
 extern void FUN(axpy)(
           GET_INT(INCX),
           GET_INT(INCY);
 
-  CREATE_NUMBERP(ALPHA);
+  CREATE_NUMBER(ALPHA);
 
   VEC_PARAMS(X);
   VEC_PARAMS(Y);
   caml_enter_blocking_section();  /* Allow other threads */
   FUN(axpy)(
     &N,
-    pALPHA,
+    &ALPHA,
     X_data, &INCX,
     Y_data, &INCY);
   caml_leave_blocking_section();  /* Disallow other threads */
   index = FUN2(i,amax)(&N, X_data, &INCX);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(index));
+  CAMLreturn(Val_long(index));
 }
 
 
           GET_INT(INCX),
           GET_INT(INCY);
 
-  CREATE_NUMBERP(ALPHA);
-  CREATE_NUMBERP(BETA);
+  CREATE_NUMBER(ALPHA);
+  CREATE_NUMBER(BETA);
 
   MAT_PARAMS(A);
   VEC_PARAMS(X);
   FUN(gemv)(
     &TRANS,
     &M, &N,
-    pALPHA,
+    &ALPHA,
     A_data, &rows_A,
     X_data, &INCX,
-    pBETA,
+    &BETA,
     Y_data, &INCY);
   caml_leave_blocking_section();  /* Disallow other threads */
 
           GET_INT(INCX),
           GET_INT(INCY);
 
-  CREATE_NUMBERP(ALPHA);
-  CREATE_NUMBERP(BETA);
+  CREATE_NUMBER(ALPHA);
+  CREATE_NUMBER(BETA);
 
   MAT_PARAMS(A);
   VEC_PARAMS(X);
   FUN(gbmv)(
     &TRANS,
     &M, &N, &KL, &KU,
-    pALPHA,
+    &ALPHA,
     A_data, &rows_A,
     X_data, &INCX,
-    pBETA,
+    &BETA,
     Y_data, &INCY);
   caml_leave_blocking_section();  /* Disallow other threads */
 
           GET_INT(INCX),
           GET_INT(INCY);
 
-  CREATE_NUMBERP(ALPHA);
-  CREATE_NUMBERP(BETA);
+  CREATE_NUMBER(ALPHA);
+  CREATE_NUMBER(BETA);
 
   MAT_PARAMS(A);
   VEC_PARAMS(X);
   FUN(symv)(
     &UPLO,
     &N,
-    pALPHA,
+    &ALPHA,
     A_data, &rows_A,
     X_data, &INCX,
-    pBETA,
+    &BETA,
     Y_data, &INCY);
   caml_leave_blocking_section();  /* Disallow other threads */
 
   char GET_INT(TRANSA), GET_INT(TRANSB);
   integer GET_INT(M), GET_INT(N), GET_INT(K);
 
-  CREATE_NUMBERP(ALPHA);
-  CREATE_NUMBERP(BETA);
+  CREATE_NUMBER(ALPHA);
+  CREATE_NUMBER(BETA);
 
   MAT_PARAMS(A);
   MAT_PARAMS(B);
   FUN(gemm)(
     &TRANSA, &TRANSB,
     &M, &N, &K,
-    pALPHA,
+    &ALPHA,
     A_data, &rows_A,
     B_data, &rows_B,
-    pBETA,
+    &BETA,
     C_data, &rows_C);
   caml_leave_blocking_section();  /* Disallow other threads */
 
   char GET_INT(SIDE), GET_INT(UPLO);
   integer GET_INT(M), GET_INT(N);
 
-  CREATE_NUMBERP(ALPHA);
-  CREATE_NUMBERP(BETA);
+  CREATE_NUMBER(ALPHA);
+  CREATE_NUMBER(BETA);
 
   MAT_PARAMS(A);
   MAT_PARAMS(B);
   FUN(symm)(
     &SIDE, &UPLO,
     &M, &N,
-    pALPHA,
+    &ALPHA,
     A_data, &rows_A,
     B_data, &rows_B,
-    pBETA,
+    &BETA,
     C_data, &rows_C);
   caml_leave_blocking_section();  /* Disallow other threads */
 
   char GET_INT(SIDE), GET_INT(UPLO), GET_INT(TRANS), GET_INT(DIAG);
   integer GET_INT(M), GET_INT(N);
 
-  CREATE_NUMBERP(ALPHA);
+  CREATE_NUMBER(ALPHA);
 
   MAT_PARAMS(A);
   MAT_PARAMS(B);
   FUN(trmm)(
     &SIDE, &UPLO, &TRANS, &DIAG,
     &M, &N,
-    pALPHA,
+    &ALPHA,
     A_data, &rows_A,
     B_data, &rows_B);
   caml_leave_blocking_section();  /* Disallow other threads */
   char GET_INT(UPLO), GET_INT(TRANS);
   integer GET_INT(N), GET_INT(K);
 
-  CREATE_NUMBERP(ALPHA);
-  CREATE_NUMBERP(BETA);
+  CREATE_NUMBER(ALPHA);
+  CREATE_NUMBER(BETA);
 
   MAT_PARAMS(A);
   MAT_PARAMS(C);
   FUN(syrk)(
     &UPLO, &TRANS,
     &N, &K,
-    pALPHA,
+    &ALPHA,
     A_data, &rows_A,
-    pBETA,
+    &BETA,
     C_data, &rows_C);
   caml_leave_blocking_section();  /* Disallow other threads */
 
   char GET_INT(UPLO), GET_INT(TRANS);
   integer GET_INT(N), GET_INT(K);
 
-  CREATE_NUMBERP(ALPHA);
-  CREATE_NUMBERP(BETA);
+  CREATE_NUMBER(ALPHA);
+  CREATE_NUMBER(BETA);
 
   MAT_PARAMS(A);
   MAT_PARAMS(B);
   FUN(syr2k)(
     &UPLO, &TRANS,
     &N, &K,
-    pALPHA,
+    &ALPHA,
     A_data, &rows_A,
     B_data, &rows_B,
-    pBETA,
+    &BETA,
     C_data, &rows_C);
   caml_leave_blocking_section();  /* Disallow other threads */
 
   MAT_PARAMS(A);
   MAT_PARAMS(B);
 
-  caml_enter_blocking_section();
+  caml_enter_blocking_section();  /* Allow other threads */
   FUN(lacpy)(
     &UPLO, &M, &N,
     A_data, &rows_A,
     B_data, &rows_B);
-  caml_leave_blocking_section();
+  caml_leave_blocking_section();  /* Disallow other threads */
 
   CAMLreturn(Val_unit);
 }
 }
 
 
+/** LASSQ */
+
+extern void FUN(lassq)(
+  integer *N,
+  NUMBER *X, integer *INCX,
+  REAL *SCALE, REAL *SUMSQ);
+
+CAMLprim value LFUN(lassq_stub)(
+  value vN,
+  value vOFSX, value vINCX, value vX,
+  value vSCALE, value vSUMSQ)
+{
+  CAMLparam1(vX);
+  CAMLlocal2(v_scl, v_smsq);
+  value v_res;
+
+  integer GET_INT(N), GET_INT(INCX);
+
+  VEC_PARAMS(X);
+
+  REAL GET_DOUBLE(SCALE), GET_DOUBLE(SUMSQ);
+
+  caml_enter_blocking_section();  /* Allow other threads */
+  FUN(lassq)(
+    &N,
+    X_data, &INCX,
+    &SCALE, &SUMSQ);
+  caml_leave_blocking_section();  /* Disallow other threads */
+
+  v_scl = caml_copy_double(SCALE);
+  v_smsq = caml_copy_double(SUMSQ);
+
+  v_res = caml_alloc_small(2, 0);
+  Field(v_res, 0) = v_scl;
+  Field(v_res, 1) = v_smsq;
+
+  CAMLreturn(v_res);
+}
+
+CAMLprim value LFUN(lassq_stub_bc)(value *argv, int argn)
+{
+  return LFUN(lassq_stub)(argv[0], argv[1], argv[2], argv[3], argv[4], argv[5]);
+}
+
+
 /** LANGE */
 
 extern REAL FUN(lange)(
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(getrf_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(getrs_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(getri_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(sytrf_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(sytrs_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(sytri_stub_bc)(value *argv, int argn)
   FUN(potrf)(&UPLO, &N, A_data, &rows_A, &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 /** POTRS */
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(potrs_stub_bc)(value *argv, int argn)
   FUN(potri)(&UPLO, &N, A_data, &rows_A, &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 /** TRTRS */
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(trtrs_stub_bc)(value *argv, int argn)
   FUN(trtri)(&UPLO, &DIAG, &N, A_data, &rows_A, &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(trtri_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(gesv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(gbsv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(gtsv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(posv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(ppsv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(pbsv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(ptsv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(sysv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(spsv_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(gels_stub_bc)(value *argv, int argn)
 }
 
 
-/** NRM2 */
-
-extern REAL FUN(nrm2)(integer *N, REAL *X, integer *INCX);
-
-CAMLprim value LFUN(nrm2_stub)(value vN, value vOFSX, value vINCX, value vX)
-{
-  CAMLparam1(vX);
-
-  integer GET_INT(N),
-          GET_INT(INCX);
-
-  REAL res;
-
-  VEC_PARAMS(X);
-
-  caml_enter_blocking_section();  /* Allow other threads */
-  res = FUN(nrm2)(&N, X_data, &INCX);
-  caml_leave_blocking_section();  /* Disallow other threads */
-
-  CAMLreturn(caml_copy_double(res));
-}
-
-
 /** ASUM */
 
 extern REAL FUN(asum)(integer *N, REAL *X, integer *INCX);
       argv[7], argv[8], argv[9], argv[10], argv[11], argv[12], argv[13]);
 }
 
+
+/** GER */
+
+extern void FUN(ger)(
+  integer *M,
+  integer *N,
+  REAL *ALPHA,
+  REAL *X, integer *INCX,
+  REAL *Y, integer *INCY,
+  REAL *A, integer *LDA);
+
+CAMLprim value LFUN(ger_stub)(
+  value vM, value vN,
+  value vALPHA,
+  value vOFSX, value vINCX, value vX,
+  value vOFSY, value vINCY, value vY,
+  value vAR, value vAC, value vA)
+{
+  CAMLparam3(vA, vX, vY);
+
+  integer GET_INT(M), GET_INT(N), GET_INT(INCX), GET_INT(INCY);
+
+  REAL GET_DOUBLE(ALPHA);
+
+  MAT_PARAMS(A);
+  VEC_PARAMS(X);
+  VEC_PARAMS(Y);
+
+  caml_enter_blocking_section();  /* Allow other threads */
+  FUN(ger)(
+    &M,
+    &N,
+    &ALPHA,
+    X_data, &INCX,
+    Y_data, &INCY,
+    A_data, &rows_A);
+  caml_leave_blocking_section();  /* Disallow other threads */
+
+  CAMLreturn(Val_unit);
+}
+
+CAMLprim value LFUN(ger_stub_bc)(value *argv, int argn)
+{
+  return
+    LFUN(ger_stub)(
+      argv[0], argv[1], argv[2], argv[3], argv[4], argv[5], argv[6],
+      argv[7], argv[8], argv[9], argv[10], argv[11]);
+}
+
+
 /** SYR */
 
 extern void FUN(syr)(
 
   v_rcond = caml_copy_double(RCOND);
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
+  Field(v_res, 0) = Val_long(INFO);
   Field(v_res, 1) = v_rcond;
 
   CAMLreturn(v_res);
 
   v_rcond = caml_copy_double(RCOND);
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
+  Field(v_res, 0) = Val_long(INFO);
   Field(v_res, 1) = v_rcond;
 
   CAMLreturn(v_res);
 
   v_rcond = caml_copy_double(RCOND);
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
+  Field(v_res, 0) = Val_long(INFO);
   Field(v_res, 1) = v_rcond;
 
   CAMLreturn(v_res);
   caml_leave_blocking_section();  /* Disallow other threads */
 
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
-  Field(v_res, 1) = Val_int(RANK);
+  Field(v_res, 0) = Val_long(INFO);
+  Field(v_res, 1) = Val_long(RANK);
 
   CAMLreturn(v_res);
 }
   caml_leave_blocking_section();  /* Disallow other threads */
 
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
-  Field(v_res, 1) = Val_int(RANK);
+  Field(v_res, 0) = Val_long(INFO);
+  Field(v_res, 1) = Val_long(RANK);
 
   CAMLreturn(v_res);
 }
   caml_leave_blocking_section();  /* Disallow other threads */
 
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
-  Field(v_res, 1) = Val_int(RANK);
+  Field(v_res, 0) = Val_long(INFO);
+  Field(v_res, 1) = Val_long(RANK);
 
   CAMLreturn(v_res);
 }
     &INFO);
   caml_leave_blocking_section(); /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(gesvd_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section(); /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(gesdd_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section(); /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(geev_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(syev_stub_bc)(value *argv, int argn)
     &INFO);
   caml_leave_blocking_section();  /* Disallow other threads */
 
-  CAMLreturn(Val_int(INFO));
+  CAMLreturn(Val_long(INFO));
 }
 
 CAMLprim value LFUN(syevd_stub_bc)(value *argv, int argn)
   caml_leave_blocking_section();  /* Disallow other threads */
 
   v_res = caml_alloc_small(2, 0);
-  Field(v_res, 0) = Val_int(INFO);
-  Field(v_res, 1) = Val_int(M);
+  Field(v_res, 0) = Val_long(INFO);
+  Field(v_res, 1) = Val_long(M);
 
   CAMLreturn(v_res);
 }
   ftnlen NAME_LEN = caml_string_length(vNAME),
          OPTS_LEN = caml_string_length(vOPTS);
 
-  return Val_int (ilaenv_(&ISPEC, NAME, OPTS,
+  return Val_long(ilaenv_(&ISPEC, NAME, OPTS,
                           &N1, &N2, &N3, &N4, NAME_LEN, OPTS_LEN));
 }
 

lib/lacaml_macros.h

 #include <caml/bigarray.h>
 #include <caml/signals.h>
 
+#include "utils_c.h"
+
 /* Defines precision-dependent macros */
 #ifndef LACAML_DOUBLE           /* Single precision */
 
 #define FABS fabsf
 
 #ifndef LACAML_COMPLEX          /* Real number */
-#define COPY_NUMBER(x) caml_copy_double(x)
-#define NUMBER_ZERO 0
-#define NUMBER_ONE 1
 #define FUN(name) s##name##_
 #define FUN2(prefix,name) prefix##s##name##_ /* -> IxAMAX */
 #define LFUN(name) lacaml_S##name
-
-#define CREATE_NUMBERP(name) \
-  real name = Double_val(v##name); \
-  real *p##name = &name
-
-#define INIT_NUMBER(name)
-
+#define CREATE_NUMBER(name) real name = Double_val(v##name)
 #else                           /* Complex number */
-#define COPY_NUMBER(x) copy_two_doubles(x.r, x.i)
-#define NUMBER_ZERO { 0, 0 }
-#define NUMBER_ONE { 1, 0 }
-
 #define FUN(name) c##name##_
 #define FUN2(prefix,name) prefix##c##name##_ /* -> IxAMAX */
 #define LFUN(name) lacaml_C##name
-
-#define CREATE_NUMBERP(name) \
-  complex name; \
-  complex *p##name = &name
-
-#define INIT_NUMBER(name) \
-  name.r = Double_field(v##name, 0); \
-  name.i = Double_field(v##name, 1)
-
+#define CREATE_NUMBER(name) complex name
 #endif  /* LACAML_COMPLEX */
 
 #else                           /* Double precision */
 #define FABS fabs
 
 #ifndef LACAML_COMPLEX          /* Real number */
-#define COPY_NUMBER(x) caml_copy_double(x)
-#define NUMBER_ZERO 0
-#define NUMBER_ONE 1
 #define FUN(name) d##name##_
 #define FUN2(prefix,name) prefix##d##name##_ /* -> IxAMAX */
 #define LFUN(name) lacaml_D##name
-
-#define CREATE_NUMBERP(name) \
-  doublereal name; \
-  doublereal *p##name = &name
-
-#define INIT_NUMBER(name) \
-  name = Double_val(v##name)
-
+#define CREATE_NUMBER(name) doublereal name
 #else                           /* Complex number */
-#define COPY_NUMBER(x) copy_two_doubles(x.r, x.i)
-#define NUMBER_ZERO { 0, 0 }
-#define NUMBER_ONE { 1, 0 }
 #define FUN(name) z##name##_
 #define FUN2(prefix,name) prefix##z##name##_ /* -> IxAMAX */
 #define LFUN(name) lacaml_Z##name
-
-#define CREATE_NUMBERP(name) \
-  doublecomplex name; \
-  doublecomplex *p##name = &name
-
-#define INIT_NUMBER(name) \
-  name.r = Double_field(v##name, 0); \
-  name.i = Double_field(v##name, 1)
-
+#define CREATE_NUMBER(name) doublecomplex name
 #endif  /* LACAML_COMPLEX */
 
 #endif  /* LACAML_DOUBLE */
 
-/* Defines kind of number */
+/* Real vs. imaginery */
 #ifndef LACAML_COMPLEX          /* Real number */
 #define NUMBER REAL
+#define COPY_NUMBER(x) caml_copy_double(x)
+#define NUMBER_ZERO 0
+#define NUMBER_ONE 1
+#define NUMBER_MINUS_ONE (-1)
+#define NUMBER_EQUAL(X, Y) (X == Y)
+#define INIT_NUMBER(name) name = Double_val(v##name)
+#define DOTU FUN(dot)
+#define ADD_NUMBER(X, Y) (X + Y)
+#define SUB_NUMBER(X, Y) (X - Y)
+#define MUL_NUMBER(X, Y) (X * Y)
+#define MUL_NUMBERP(X, Y) (*X * *Y)
+#define NEG_NUMBER(X) (-X)
 #else                           /* Complex number */
 #define NUMBER COMPLEX
-#endif  /* LACAML_COMPLEX */
+#define COPY_NUMBER(x) copy_two_doubles(x.r, x.i)
+#define NUMBER_ZERO { 0, 0 }
+#define NUMBER_ONE { 1, 0 }
+#define NUMBER_MINUS_ONE { -1, 0 }
+#define NUMBER_EQUAL(X, Y) (X.r == Y.r && X.i == Y.i)
+#define INIT_NUMBER(name) \
+  name.r = Double_field(v##name, 0); \
+  name.i = Double_field(v##name, 1)
+#define DOTU FUN(dotu)
+#define DOTC FUN(dotc)
+#define ADD_NUMBER(X, Y) ((NUMBER) { X.r + Y.r, X.i + Y.i })
+#define SUB_NUMBER(X, Y) ((NUMBER) { X.r - Y.r, X.i - Y.i })
+#define MUL_NUMBER(X, Y) \
+  ((NUMBER) { X.r * Y.r - X.i * Y.i, X.r * Y.i + X.i * Y.r })
+#define MUL_NUMBERP(X, Y) \
+  ((NUMBER) { X->r * Y->r - X->i * Y->i, X->r * Y->i + X->i * Y->r })
+#define NEG_NUMBER(X) ((NUMBER) { -X.r, -X.i })
+#define COMLEX_CONJ(X) ((NUMBER) { X.r, -X.i })
+#endif
 
 /* Fetch integer parameters */
-#define GET_INT(V) V = Int_val(v##V)
+#define GET_INT(V) V = Long_val(v##V)
 
 /* Fetch double parameters */
 #define GET_DOUBLE(V) V = Double_val(v##V)
 #define MAT_PARAMS(M) \
   struct caml_ba_array *big_##M = Caml_ba_array_val(v##M); \
   long *dims_##M = big_##M->dim; \
-  integer M##R = Int_val(v##M##R); \
-  integer M##C = Int_val(v##M##C); \
+  integer M##R = Long_val(v##M##R); \
+  integer M##C = Long_val(v##M##C); \
   integer rows_##M = *dims_##M++; \
   integer cols_##M = *dims_##M; \
   NUMBER *M##_data = ((NUMBER *) big_##M->data) + M##R + rows_##M*(M##C - 1) - 1
 #define VEC_PARAMS(V) \
   struct caml_ba_array *big_##V = Caml_ba_array_val(v##V); \
   integer dim_##V = *big_##V->dim; \
-  NUMBER *V##_data = ((NUMBER *) big_##V->data) + (Int_val(vOFS##V) - 1)
+  NUMBER *V##_data = ((NUMBER *) big_##V->data) + (Long_val(vOFS##V) - 1)
 
 /* Fetch vector parameters from real bigarray */
 #define RVEC_PARAMS(V) \
   struct caml_ba_array *big_##V = Caml_ba_array_val(v##V); \
   integer dim_##V = *big_##V->dim; \
-  REAL *V##_data = ((REAL *) big_##V->data) + (Int_val(vOFS##V) - 1)
+  REAL *V##_data = ((REAL *) big_##V->data) + (Long_val(vOFS##V) - 1)
 
 /* Fetch vector parameters from bigarray with offset 1 */
 #define VEC_PARAMS1(V) \
 
 /* Split an integer couple (int * int) into two ints */
 #define INT_COUPLE(V) \
-  integer V##1 = Int_val(Field(v##V, 0)); \
-  integer V##2 = Int_val(Field(v##V, 1))
+  integer V##1 = Long_val(Field(v##V, 0)); \
+  integer V##2 = Long_val(Field(v##V, 1))
 
 #endif  /* LACAML_MACROS */
     done;
     ar
 
-let diag mat =
-  let m = dim1 mat in
-  let n = dim2 mat in
-  let n_diag = min m n in
-  let vec = Array1.create prec fortran_layout n_diag in
-  for i = 1 to n_diag do vec.{i} <- mat.{i, i} done;
-  vec
-
 let col (mat : mat) c = Array2.slice_right mat c
 
 let copy_row ?vec mat r =
     done;
   a
 
+let copy_diag mat =
+  let m = dim1 mat in
+  let n = dim2 mat in
+  let n_diag = min m n in
+  let vec = Array1.create prec fortran_layout n_diag in
+  for i = 1 to n_diag do vec.{i} <- mat.{i, i} done;
+  vec
+
+let trace mat =
+  let m = dim1 mat in
+  let n = dim2 mat in
+  let n_diag = min m n in
+  let trace_ref = ref zero in
+  for i = 1 to n_diag do
+    trace_ref := add !trace_ref mat.{i, i}
+  done;
+  !trace_ref
+
 external direct_scal_mat :
   m : int ->
   n : int ->
   y
 
 external direct_syrk_diag :
-  trans : trans3 ->
+  trans : char ->
   n : int ->
   k : int ->
   ar : int ->
   let n = get_rows_mat_tr loc a_str a ar ac trans n_str n in
   let k = get_cols_mat_tr loc a_str a ar ac trans k_str k in
   let y = get_vec loc y_str y ofsy 1 n vec_create in
+  let trans = get_trans_char trans in
   direct_syrk_diag ~trans ~n ~k ~ar ~ac ~a ~ofsy ~y ~alpha ~beta;
   y
 
   let transb = get_trans_char transb in
   direct_gemm_trace ~transa ~transb ~n ~k ~ar ~ac ~a ~br ~bc ~b
 
+external direct_syrk_trace :
+  n : int ->
+  k : int ->
+  ar : int ->
+  ac : int ->
+  a : mat ->
+  num_type = "lacaml_NPRECsyrk_trace_stub"
+
+let syrk_trace ?n ?k ?(ar = 1) ?(ac = 1) a =
+  let loc = "Lacaml.Impl.NPREC.Mat.syrk_trace" in
+  let n = get_dim1_mat loc a_str a ar n_str n in
+  let k = get_dim2_mat loc a_str a ac k_str k in
+  direct_syrk_trace ~n ~k ~ar ~ac ~a
+
+external direct_symm2_trace :
+  n : int ->
+  uploa : char ->
+  ar : int ->
+  ac : int ->
+  a : mat ->
+  uplob : char ->
+  br : int ->
+  bc : int ->
+  b : mat ->
+  num_type = "lacaml_NPRECsymm2_trace_stub_bc" "lacaml_NPRECsymm2_trace_stub"
+
+let symm2_trace
+  ?n
+  ?(upa = true) ?(ar = 1) ?(ac = 1) a
+  ?(upb = true) ?(br = 1) ?(bc = 1) b =
+  let loc = "Lacaml.Impl.NPREC.Mat.symm2_trace" in
+  let n = get_n_of_square a_str loc ar ac a n in
+  let n = get_n_of_square b_str loc br bc b (Some n) in
+  let uploa = get_uplo_char upa in
+  let uplob = get_uplo_char upb in
+  direct_symm2_trace ~n ~uploa ~ar ~ac ~a ~uplob ~br ~bc ~b
+
 
 (* Iterators over matrices *)
 
 (** Matrix operations *)
 
 open Bigarray
+open Common
 open Numberxx
 
 (** {6 Creation of matrices and accessors} *)
 val of_diag : vec -> mat
 (** [of_diag v] @return the diagonal matrix with diagonals elements from [v]. *)
 
-val diag : mat -> vec
-(** [diag m] @return the diagonal of matrix [m] as a vector.  If [m]
-    is not a square matrix, the longest possible sequence of diagonal
-    elements will be returned. *)
-
 val dim1 : mat -> int
 (** [dim1 m] @return the first dimension of matrix [m] (number of rows). *)
 
 
 (** {6 Arithmetic and other matrix operations} *)
 
+val copy_diag : mat -> vec
+(** [copy_diag m] @return the diagonal of matrix [m] as a vector.
+    If [m] is not a square matrix, the longest possible sequence
+    of diagonal elements will be returned. *)
+
+val trace : mat -> num_type
+(** [trace m] @return the trace of matrix [m].  If [m] is not a
+    square matrix, the sum of the longest possible sequence of
+    diagonal elements will be returned. *)
+
 val scal :
   ?m : int -> ?n : int -> num_type -> ?ar : int -> ?ac : int -> mat -> unit
 (** [scal ?m ?n alpha ?ar ?ac a] BLAS [scal] function for (sub-)matrices. *)
   ?beta : num_type ->
   ?ofsy : int ->
   ?y : vec ->
-  ?trans : trans3 ->
+  ?trans : trans2 ->
   ?alpha : num_type ->
   ?ar : int ->
   ?ac : int ->
   mat ->
   vec
-(** [syrk_diag ?n ?k ?beta ?ofsy ?y ?trans ?alpha ?ar ?ac a] computes
-    the diagonal of the symmetric rank-k product of the (sub-)matrix
-    [a], multiplying it with [alpha] and adding [beta] times [y],
-    storing the result in [y] starting at the specified offset.
-    [n] elements of the diagonal will be computed, and [k] elements
-    of the matrix will be part of the dot product associated with
-    each diagonal element.
+(** [syrk_diag ?n ?k ?beta ?ofsy ?y ?trans ?alpha ?ar ?ac a]
+    computes the diagonal of the symmetric rank-k product of the
+    (sub-)matrix [a], multiplying it with [alpha] and adding [beta]
+    times [y], storing the result in [y] starting at the specified
+    offset.  [n] elements of the diagonal will be computed, and [k]
+    elements of the matrix will be part of the dot product associated
+    with each diagonal element.
 
     @param n default = number of rows of [a] (or tr[a])
     @param k default = number of columns of [a] (or tr[a])
   num_type
 (** [gemm_trace ?n ?k ?transa ?ar ?ac a ?transb ?br ?bc b] computes
     the trace of the product of the (sub-)matrices [a] and [b]
-    (taking into account potential transposing) [n] elements of the
-    diagonal will be computed, and [k] elements of the matrices
-    will be part of the dot product associated with each diagonal
-    element.
+    (taking into account potential transposing).  [n] is the number
+    of rows (columns) to consider in [a], and [k] the number of
+    columns (rows) in [b].
 
     @param n default = number of rows of [a] (or tr [a]) and
                        number of columns of [b] (or tr [b])
     @param bc default = [1]
 *)
 
+val syrk_trace :
+  ?n : int ->
+  ?k : int ->
+  ?ar : int ->
+  ?ac : int ->
+  mat ->
+  num_type
+(** [syrk_trace ?n ?k ?ar ?ac a] computes the trace of either [a' * a]
+    or [a * a'], whichever is more efficient (results are identical),
+    of the (sub-)matrix [a] multiplied by its own transpose.  [n]
+    is the number of rows to consider in [a], and [k] the number
+    of columns to consider.
+
+    @param n default = number of rows of [a]
+    @param k default = number of columns of [a]
+    @param ar default = [1]
+    @param ac default = [1]
+*)
+
+val symm2_trace :
+  ?n : int ->
+  ?upa : bool ->
+  ?ar : int ->
+  ?ac : int ->
+  mat ->
+  ?upb : bool ->
+  ?br : int ->
+  ?bc : int ->
+  mat ->
+  num_type
+(** [symm2_trace ?n ?upa ?ar ?ac a ?upb ?br ?bc b] computes the
+    trace of the product of the symmetric (sub-)matrices [a] and
+    [b].  [n] is the number of rows and columns to consider in [a]
+    and [b].
+
+    @param n default = dimensions of [a] and [b]
+    @param upa default = true (upper triangular portion of [a] is accessed)
+    @param ar default = [1]
+    @param ac default = [1]