Commits

Jack Poulson committed 80fde4d

Updating to Clique revision 203:fb66a0187a6e.

Comments (0)

Files changed (139)

external/clique/CMakeLists.txt

 set(Clique_VERSION_MINOR 1)
 
 option(BUILD_TESTS "Build a collection of test executables" OFF)
-option(USE_CUSTOM_ALLTOALLV_FOR_FACT 
-       "Avoid MPI_Alltoallv in factorization for performance reasons" ON)
-option(USE_CUSTOM_ALLTOALLV_FOR_MULT
-       "Avoid MPI_Alltoallv in multiplication for performance reasons" ON)
-option(USE_CUSTOM_ALLTOALLV_FOR_SOLVE
-       "Avoid MPI_Alltoallv in solve for performance reasons" ON)
+option(USE_CUSTOM_ALLTOALLV "Avoid MPI_Alltoallv for performance reasons" ON)
 
 # Add Elemental, but don't worry about the Hermitian eigensolver
 option(BUILD_PMRRR "Build the parallel tridiagonal eigensolver" OFF)
 ################################################################################
 # Uncomment if including Clique as a subproject in another build system        #
 ################################################################################
-set(USE_CUSTOM_ALLTOALLV_FOR_FACT ${USE_CUSTOM_ALLTOALLV_FOR_FACT} PARENT_SCOPE)
-set(USE_CUSTOM_ALLTOALLV_FOR_MULT ${USE_CUSTOM_ALLTOALLV_FOR_MULT} PARENT_SCOPE)
-set(USE_CUSTOM_ALLTOALLV_FOR_SOLVE ${USE_CUSTOM_ALLTOALLV_FOR_SOLVE} PARENT_SCOPE)
+set(USE_CUSTOM_ALLTOALLV ${USE_CUSTOM_ALLTOALLV} PARENT_SCOPE)
 
 set(LIBRARY_TYPE ${LIBRARY_TYPE} PARENT_SCOPE)
 set(MPI_C_COMPILER ${MPI_C_COMPILER} PARENT_SCOPE)

external/clique/external/elemental/CMakeLists.txt

 
   set(CORE_EXAMPLES Constructors)
   set(BLAS_EXAMPLES Gemv)
-  set(LAPACK_EXAMPLES GaussianElimination HouseholderSolve HPDInverse LDL 
-                      LDLInverse Polar Pseudoinverse QDWH SequentialQR 
-                      SequentialSVD SVD)
+  set(LAPACK_EXAMPLES GaussianElimination HermitianQDWH HouseholderSolve 
+                      HPDInverse LDL LDLInverse Polar Pseudoinverse QDWH 
+                      SequentialQR QR SequentialSVD SimpleSVD SVD)
   set(SPECIAL_EXAMPLES Cauchy CauchyLike Circulant Diagonal DiscreteFourier 
                        Hankel HermitianUniformSpectrum Hilbert Identity 
                        NormalUniformSpectrum Ones OneTwoOne Toeplitz Uniform

external/clique/external/elemental/doc/source/core/dist_matrix.rst

 
       Many of the following routines are only valid for complex datatypes.
 
-   .. cpp:function:: typename RealBase<T>::type GetReal( int i, int j ) const
+   .. cpp:function:: typename Base<T>::type GetRealPart( int i, int j ) const
 
       Return the real part of the ``(i,j)`` entry of the global matrix. This
       operation is collective.
 
-   .. cpp:function:: typename RealBase<T>::type GetImag( int i, int j ) const
+   .. cpp:function:: typename Base<T>::type GetImagPart( int i, int j ) const
 
       Return the imaginary part of the ``(i,j)`` entry of the global matrix. 
       This operation is collective.
 
-   .. cpp:function:: void SetReal( int i, int j, typename RealBase<T>::type alpha )
+   .. cpp:function:: void SetRealPart( int i, int j, typename Base<T>::type alpha )
 
       Set the real part of the ``(i,j)`` entry of the global matrix to 
       :math:`\alpha`.
 
-   .. cpp:function:: void SetImag( int i, int j, typename RealBase<T>::type alpha )
+   .. cpp:function:: void SetImagPart( int i, int j, typename Base<T>::type alpha )
 
       Set the imaginary part of the ``(i,j)`` entry of the global matrix to 
       :math:`\alpha`.
 
-   .. cpp:function:: void UpdateReal( int i, int j, typename RealBase<T>::type alpha )
+   .. cpp:function:: void UpdateRealPart( int i, int j, typename Base<T>::type alpha )
 
       Add :math:`\alpha` to the real part of the ``(i,j)`` entry of the global 
       matrix.
 
-   .. cpp:function:: void UpdateImag( int i, int j, typename RealBase<T>::type alpha )
+   .. cpp:function:: void UpdateImagPart( int i, int j, typename Base<T>::type alpha )
 
       Add :math:`\alpha` to the imaginary part of the ``(i,j)`` entry of the 
       global matrix.
 
-   .. cpp:function:: typename RealBase<T>::type GetRealLocal( int iLocal, int jLocal ) const
+   .. cpp:function:: typename Base<T>::type GetRealPartLocal( int iLocal, int jLocal ) const
 
       Return the real part of the ``(iLocal,jLocal)`` entry of our local matrix.
 
-   .. cpp:function:: typename RealBase<T>::type GetImagLocal( int iLocal, int jLocal ) const
+   .. cpp:function:: typename Base<T>::type GetLocalImagPart( int iLocal, int jLocal ) const
 
       Return the imaginary part of the ``(iLocal,jLocal)`` entry of our local 
       matrix.
 
-   .. cpp:function:: void SetRealLocal( int iLocal, int jLocal, typename RealBase<T>::type alpha )
+   .. cpp:function:: void SetLocalRealPart( int iLocal, int jLocal, typename Base<T>::type alpha )
 
       Set the real part of the ``(iLocal,jLocal)`` entry of our local matrix.
 
-   .. cpp:function:: void SetImagLocal( int iLocal, int jLocal, typename RealBase<T>::type alpha )
+   .. cpp:function:: void SetLocalImagPart( int iLocal, int jLocal, typename Base<T>::type alpha )
 
       Set the imaginary part of the ``(iLocal,jLocal)`` entry of our local 
       matrix.
 
-   .. cpp:function:: void UpdateRealLocal( int iLocal, int jLocal, typename RealBase<T>::type alpha )
+   .. cpp:function:: void UpdateRealPartLocal( int iLocal, int jLocal, typename Base<T>::type alpha )
 
       Add :math:`\alpha` to the real part of the ``(iLocal,jLocal)`` entry of 
       our local matrix.
 
-   .. cpp:function:: void UpdateImagLocal( int iLocal, int jLocal, typename RealBase<T>::type alpha )
+   .. cpp:function:: void UpdateLocalImagPart( int iLocal, int jLocal, typename Base<T>::type alpha )
 
       Add :math:`\alpha` to the imaginary part of the ``(iLocal,jLocal)`` entry 
       of our local matrix.
       are analogous to their general counterparts from above in the obvious 
       manner.
 
-   .. cpp:function:: void GetRealDiagonal( DistMatrix<typename RealBase<T>::type,MD,STAR>& d, int offset=0 ) const
+   .. cpp:function:: void GetRealPartOfDiagonal( DistMatrix<typename Base<T>::type,MD,STAR>& d, int offset=0 ) const
 
-   .. cpp:function:: void GetImagDiagonal( DistMatrix<typename RealBase<T>::type,MD,STAR>& d, int offset=0 ) const
+   .. cpp:function:: void GetImagPartOfDiagonal( DistMatrix<typename Base<T>::type,MD,STAR>& d, int offset=0 ) const
 
-   .. cpp:function:: void GetRealDiagonal( DistMatrix<typename RealBase<T>::type,STAR,MD>& d, int offset=0 ) const
+   .. cpp:function:: void GetRealPartOfDiagonal( DistMatrix<typename Base<T>::type,STAR,MD>& d, int offset=0 ) const
 
-   .. cpp:function:: void GetImagDiagonal( DistMatrix<typename RealBase<T>::type,STAR,MD>& d, int offset=0 ) const
+   .. cpp:function:: void GetImagPartOfDiagonal( DistMatrix<typename Base<T>::type,STAR,MD>& d, int offset=0 ) const
 
-   .. cpp:function:: void SetRealDiagonal( const DistMatrix<typename RealBase<T>::type,MD,STAR>& d, int offset=0 )
+   .. cpp:function:: void SetRealPartOfDiagonal( const DistMatrix<typename Base<T>::type,MD,STAR>& d, int offset=0 )
 
-   .. cpp:function:: void SetImagDiagonal( const DistMatrix<typename RealBase<T>::type,MD,STAR>& d, int offset=0 )
+   .. cpp:function:: void SetImagPartOfDiagonal( const DistMatrix<typename Base<T>::type,MD,STAR>& d, int offset=0 )
 
-   .. cpp:function:: void SetRealDiagonal( const DistMatrix<typename RealBase<T>::type,STAR,MD>& d, int offset=0 )
+   .. cpp:function:: void SetRealPartOfDiagonal( const DistMatrix<typename Base<T>::type,STAR,MD>& d, int offset=0 )
 
-   .. cpp:function:: void SetImagDiagonal( const DistMatrix<typename RealBase<T>::type,STAR,MD>& d, int offset=0 )
+   .. cpp:function:: void SetImagPartOfDiagonal( const DistMatrix<typename Base<T>::type,STAR,MD>& d, int offset=0 )
 
    .. rubric:: Alignment
 

external/clique/external/elemental/doc/source/core/environment.rst

    
       |\alpha|_{\mbox{fast}} = |\mathcal{R}(\alpha)| + |\mathcal{I}(\alpha)|
 
-.. cpp:function:: Z Real( const Z& alpha )
+.. cpp:function:: Z RealPart( const Z& alpha )
 
    Return the real part of the real variable :math:`\alpha`, which is 
    :math:`\alpha` itself. 
 
-.. cpp:function:: Z Real( const Complex<Z>& alpha )
+.. cpp:function:: Z RealPart( const Complex<Z>& alpha )
 
    Return the real part of the complex variable :math:`\alpha`.
 
-.. cpp:function:: Z Imag( const Z& alpha )
+.. cpp:function:: Z ImagPart( const Z& alpha )
 
    Return the imaginary part of the real variable :math:`\alpha`, which is 
    trivially zero.
 
-.. cpp:function:: Z Imag( const Complex<Z>& alpha )
+.. cpp:function:: Z ImagPart( const Complex<Z>& alpha )
 
    Return the imaginary part of the complex variable :math:`\alpha`.
 

external/clique/external/elemental/doc/source/core/matrix.rst

 
       Many of the following routines are only valid for complex datatypes.
 
-   .. cpp:function:: typename Base<T>::type GetReal( int i, int j ) const
+   .. cpp:function:: typename Base<T>::type GetRealPart( int i, int j ) const
 
       Return the real part of entry :math:`(i,j)`.
 
-   .. cpp:function:: typename Base<T>::type GetImag( int i, int j ) const
+   .. cpp:function:: typename Base<T>::type GetImagPart( int i, int j ) const
 
       Return the imaginary part of entry :math:`(i,j)`.
 
-   .. cpp:function:: void SetReal( int i, int j, typename Base<T>::type alpha )
+   .. cpp:function:: void SetRealPart( int i, int j, typename Base<T>::type alpha )
 
       Set the real part of entry :math:`(i,j)` to :math:`\alpha`.
 
-   .. cpp:function:: void SetImag( int i, int j, typename Base<T>::type alpha )
+   .. cpp:function:: void SetImagPart( int i, int j, typename Base<T>::type alpha )
 
       Set the imaginary part of entry :math:`(i,j)` to :math:`\alpha`.
 
-   .. cpp:function:: void UpdateReal( int i, int j, typename Base<T>::type alpha )
+   .. cpp:function:: void UpdateRealPart( int i, int j, typename Base<T>::type alpha )
 
       Add :math:`\alpha` to the real part of entry :math:`(i,j)`.
 
-   .. cpp:function:: void UpdateImag( int i, int j, typename Base<T>::type alpha ) 
+   .. cpp:function:: void UpdateImagPart( int i, int j, typename Base<T>::type alpha ) 
 
       Add :math:`\alpha` to the imaginary part of entry :math:`(i,j)`.
 
-   .. cpp:function:: void GetRealDiagonal( Matrix<typename Base<T>::type>& d, int offset=0 ) const
+   .. cpp:function:: void GetRealPartOfDiagonal( Matrix<typename Base<T>::type>& d, int offset=0 ) const
 
       Modify :math:`d` into a column-vector containing the real parts of the
       entries in the ``offset`` diagonal.
 
-   .. cpp:function:: void GetImagDiagonal( Matrix<typename Base<T>::type>& d, int offset=0 ) const
+   .. cpp:function:: void GetImagPartOfDiagonal( Matrix<typename Base<T>::type>& d, int offset=0 ) const
 
       Modify :math:`d` into a column-vector containing the imaginary parts of 
       the entries in the ``offset`` diagonal.
 
-   .. cpp:function:: void SetRealDiagonal( const Matrix<typename Base<T>::type>& d, int offset=0 )
+   .. cpp:function:: void SetRealPartOfDiagonal( const Matrix<typename Base<T>::type>& d, int offset=0 )
 
       Set the real parts of the entries in the ``offset`` diagonal from the 
       contents of the column-vector :math:`d`.
 
-   .. cpp:function:: void SetImagDiagonal( const Matrix<typename Base<T>::type>& d, int offset=0 )
+   .. cpp:function:: void SetImagPartOfDiagonal( const Matrix<typename Base<T>::type>& d, int offset=0 )
 
       Set the imaginary parts of the entries in the ``offset`` diagonal from 
       the column-vector :math:`d`.
 
-   .. cpp:function:: void UpdateRealDiagonal( const Matrix<typename Base<T>::type>& d, int offset=0 )
+   .. cpp:function:: void UpdateRealPartOfDiagonal( const Matrix<typename Base<T>::type>& d, int offset=0 )
 
       Add the contents of the column-vector :math:`d` onto the real parts of the
       entries in the ``offset`` diagonal.
 
-   .. cpp:function:: void UpdateImagDiagonal( const Matrix<typename Base<T>::type>& d, int offset=0 )
+   .. cpp:function:: void UpdateImagPartOfDiagonal( const Matrix<typename Base<T>::type>& d, int offset=0 )
 
       Add the contents of the column-vector :math:`d` onto the imaginary parts 
       of the entries in the ``offset`` diagonal.

external/clique/external/elemental/doc/source/imports/blas.rst

    Similar to ``blas::Dot``, but this routine instead returns 
    :math:`\alpha := x^T y` (`x` is not conjugated).
 
-.. cpp:function:: RealBase<T>::type blas::Nrm2( int n, const T* x, int incx )
+.. cpp:function:: Base<T>::type blas::Nrm2( int n, const T* x, int incx )
 
    Return the Euclidean two-norm of the vector `x`, where
    :math:`||x||_2 = \sqrt{\sum_{i=0}^{n-1} |x_i|^2}`. Note that if `T` 

external/clique/external/elemental/doc/source/lapack-like/eigen_svd.rst

 
    Overwrites `A` with :math:`U`, `s` with the diagonal entries of :math:`\Sigma`, and `V` with :math:`V`. 
 
-   .. note:: 
-
-      This routine has not yet been optimized for cases where the matrix is 
-      highly rectangular. It is also far from the most efficient 
-      possible algorithm, as it currently uses the QR algorithm for the
-      bidiagonal SVD (as opposed to a Divide and Conquer algorithm) and 
-      does not apply Householder reflectors in the most efficient manner 
-      possible.
-
 .. cpp:function:: void SingularValues( Matrix<F>& A, Matrix<typename Base<F>::type>& s )
 
 .. cpp:function:: void SingularValues( DistMatrix<F>& A, DistMatrix<typename Base<F>::type,VR,STAR>& s )
 
    Forms the singular values of :math:`A` in `s`. Note that `A` is overwritten in order to compute the singular values.
 
-   .. note::
-
-      This routine has not yet been optimized for cases where the matrix is 
-      highly rectangular. It is also far from the most efficient
-      possible algorithm, as it currently uses the QR algorithm to redundantly
-      compute the singular values of the bidiagonal matrix (as opposed to 
-      bisection).
-

external/clique/external/elemental/doc/source/lapack-like/invariants.rst

       circle; in order to represent a value that is precisely zero, `rho` 
       is set to zero.
 
-   .. cpp:member:: typename RealBase<F>::type kappa
+   .. cpp:member:: typename Base<F>::type kappa
 
       `kappa` can be an arbitrary real number.
 

external/clique/external/elemental/doc/source/lapack-like/norms.rst

 For computing norms of fully-populated matrices.
 
 .. cpp:function:: typename Base<F>::type Norm( const Matrix<F>& A, NormType type=FROBENIUS_NORM )
-.. cpp:function:: typename Base<F>::type Norm( const DistMatrix<F>& A, NormType type=FROBENIUS_NORM )
-
-   Return the norm of the fully-populated matrix `A`.
+.. cpp:function:: typename Base<F>::type Norm( const DistMatrix<F,U,V>& A, NormType type=FROBENIUS_NORM )
 
 HermitianNorm
 -------------
 
 Same as ``Norm``, but the (distributed) matrix is implicitly Hermitian 
-with the data stored in the triangle specified by ``UpperOrLower``.
+with the data stored in the triangle specified by ``UpperOrLower``. Also, 
+while ``Norm`` supports every type of distribution, ``HermitianNorm`` currently
+only supports the standard matrix distribution.
 
 SymmetricNorm
 -------------

external/clique/external/elemental/doc/source/lapack-like/tuning.rst

    an evolution of the HJS tridiagonalization approach
    (see "*Towards an efficient parallel eigensolver for dense symmetric 
    matrices*" by Bruce Hendrickson, Elizabeth Jessup, and Christopher Smith)
-   which is described in detail in Kenneth Stanley's dissertation and briefly
-   described in "*Application of a high performance parallel eigensolver to 
-   electronic structure calculations*" by Mark Sears, Ken Stanley, and Greg
-   Henry.
+   which is described in detail in Ken Stanley's dissertation, "*Execution time 
+   of symmetric eigensolvers*". 
 
 There is clearly a small penalty associated with the extra redistributions
 necessary for the second approach, but the benefit from using a square process

external/clique/external/elemental/examples/lapack-like/HermitianEig.cpp

 typedef double R;
 typedef Complex<R> C;
 
+const int n = 300;
+
 int
 main( int argc, char* argv[] )
 {
         // allow you to pass in your own local buffer and to specify the 
         // distribution alignments (i.e., which process row and column owns the
         // top-left element)
-        const int n = 6; // choose a small problem size since we will print
         DistMatrix<C> H( n, n, g );
 
         // Fill the matrix since we did not pass in a buffer. 
                 H.SetLocal( iLocal, jLocal, C(i+j,i-j) );
             }
         }
-        // Alternatively, we could have sequentially filled the matrix with 
-        // for( int j=0; j<A.Width(); ++j )
-        //   for( int i=0; i<A.Height(); ++i )
-        //     A.Set( i, j, C(i+j,i-j) );
-        //
-        // More convenient interfaces are being investigated.
-        //
 
-        // Print our matrix.
-        H.Print("H");
-
-        // Print its trace
-        const C trace = Trace( H );
-        if( commRank == 0 )
-            std::cout << "Tr(H) = " << trace << std::endl;
+        // Make a backup of H before we overwrite it within the eigensolver
+        DistMatrix<C> HCopy( H );
 
         // Call the eigensolver. We first create an empty complex eigenvector 
         // matrix, X[MC,MR], and an eigenvalue column vector, w[VR,* ]
+        //
+        // Optional: set blocksizes and algorithmic choices here. See the 
+        //           'Tuning' section of the README for details.
         DistMatrix<R,VR,STAR> w( g );
         DistMatrix<C> X( g );
-        // Optional: set blocksizes and algorithmic choices here. See the 
-        //           'Tuning' section of the README for details.
         HermitianEig( LOWER, H, w, X ); // only use lower half of H
 
-        // Print the eigensolution
-        w.Print("Eigenvalues of H");
-        X.Print("Eigenvectors of H");
+        // Optional: sort the eigenpairs
+        SortEig( w, X );
 
-        // Sort the eigensolution, then reprint
-        SortEig( w, X );
-        w.Print("Sorted eigenvalues of H");
-        X.Print("Sorted eigenvectors of H");
+        // Check the residual, || H X - Omega X ||_F
+        const R frobH = HermitianNorm( LOWER, HCopy, FROBENIUS_NORM );
+        DistMatrix<C> E( X );
+        DiagonalScale( RIGHT, NORMAL, w, E );
+        Hemm( LEFT, LOWER, (C)-1, HCopy, X, (C)1, E );
+        const R frobResid = HermitianNorm( LOWER, E, FROBENIUS_NORM );
+
+        // Check the orthogonality of X
+        Identity( n, n, E );
+        Herk( LOWER, NORMAL, (C)-1, X, (C)1, E );
+        const R frobOrthog = HermitianNorm( LOWER, E, FROBENIUS_NORM );
+
+        if( g.Rank() == 0 )
+        {
+            std::cout << "|| H ||_F = " << frobH << "\n"
+                      << "|| H X - X Omega ||_F / || A ||_F = " 
+                      << frobResid / frobH << "\n"
+                      << "|| X X^H - I ||_F = " << frobOrthog / frobH
+                      << "\n" << std::endl;
+        }
     }
     catch( exception& e )
     {

external/clique/external/elemental/examples/lapack-like/HermitianQDWH.cpp

+/*
+   Copyright (c) 2009-2012, Jack Poulson
+   All rights reserved.
+
+   This file is part of Elemental.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    - Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    - Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    - Neither the name of the owner nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+#include "elemental.hpp"
+using namespace std;
+using namespace elem;
+
+// Typedef our real and complex types to 'R' and 'C' for convenience
+typedef double R;
+typedef Complex<R> C;
+
+void Usage()
+{
+    cout << "HermitianQDWH <n>\n"
+         << "  <n>: size of random matrix to test polar decomp. on\n"
+         << endl;
+}
+
+int
+main( int argc, char* argv[] )
+{
+    Initialize( argc, argv );
+
+    mpi::Comm comm = mpi::COMM_WORLD;
+    const int commRank = mpi::CommRank( comm );
+
+    if( argc < 2 )
+    {
+        if( commRank == 0 )
+            Usage();
+        Finalize();
+        return 0;
+    }
+    const int n = atoi( argv[1] );
+
+    try 
+    {
+        Grid g( comm );
+        DistMatrix<C> A( g ), Q( g ), P( g );
+        HermitianUniformSpectrum( n, A, 1, 2 );
+        const R lowerBound = 1.0;
+        const R frobA = Norm( A, FROBENIUS_NORM );
+        const R upperBound = TwoNormUpperBound( A );
+        if( g.Rank() == 0 )
+        {
+            std::cout << "ASSUMING 1 / ||inv(A)||_2 >= " << lowerBound << "\n"
+                      << "||A||_F =  " << frobA << "\n"
+                      << "||A||_2 <= " << upperBound << "\n" << std::endl;
+        }
+
+        // Compute the polar decomp of A using a QR-based Dynamically Weighted
+        // Halley (QDWH) iteration
+        Q = A;
+        const int numItsQDWH = 
+            HermitianQDWH( LOWER, Q, lowerBound, upperBound );
+        Zeros( n, n, P );
+        Gemm( ADJOINT, NORMAL, (C)1, Q, A, (C)0, P );
+
+        // Check and report overall and orthogonality error
+        DistMatrix<C> B( A );
+        Gemm( NORMAL, NORMAL, (C)-1, Q, P, (C)1, B );
+        const R frobQDWH = Norm( B, FROBENIUS_NORM );
+        Identity( n, n, B );
+        Herk( LOWER, NORMAL, (C)1, Q, (C)-1, B );
+        const R frobQDWHOrthog = HermitianNorm( LOWER, B, FROBENIUS_NORM );
+        if( g.Rank() == 0 )
+        {
+            std::cout << numItsQDWH << " iterations of QDWH\n"
+                      << "||A - QP||_F / ||A||_F = " 
+                      << frobQDWH/frobA << "\n"
+                      << "||I - QQ^H||_F / ||A||_F = " 
+                      << frobQDWHOrthog/frobA << "\n"
+                      << std::endl;
+        }
+
+        // Compute the polar decomp of A using a standard QR-based Halley
+        // iteration
+        Q = A;
+        const int numItsHalley = HermitianHalley( LOWER, Q, upperBound );
+        Zeros( n, n, P );
+        Gemm( ADJOINT, NORMAL, (C)1, Q, A, (C)0, P );
+
+        // Check and report the overall and orthogonality error
+        B = A; 
+        Gemm( NORMAL, NORMAL, (C)-1, Q, P, (C)1, B );
+        const R frobHalley = Norm( B, FROBENIUS_NORM );
+        Identity( n, n, B );
+        Herk( LOWER, NORMAL, (C)1, Q, (C)-1, B );
+        const R frobHalleyOrthog = HermitianNorm( LOWER, B, FROBENIUS_NORM );
+        if( g.Rank() == 0 )
+        {
+            std::cout << numItsHalley << " iterations of Halley\n"
+                      << "||A - QP||_F / ||A||_F = " 
+                      << frobHalley/frobA << "\n"
+                      << "||I - QQ^H||_F / ||A||_F = "
+                      << frobHalleyOrthog/frobA << "\n"
+                      << std::endl;
+        }
+    }
+    catch( exception& e )
+    {
+        cerr << "Process " << commRank << " caught exception with message: "
+             << e.what() << endl;
+#ifndef RELEASE
+        DumpCallStack();
+#endif
+    }
+
+    Finalize();
+    return 0;
+}

external/clique/external/elemental/examples/lapack-like/QDWH.cpp

         Grid g( comm );
         DistMatrix<C> A( g ), Q( g ), P( g );
         Uniform( m, n, A );
+        const R lowerBound = 1e-7;
         const R frobA = Norm( A, FROBENIUS_NORM );
+        const R upperBound = TwoNormUpperBound( A );
         if( g.Rank() == 0 )
-            std::cout << "||A||_F = " << frobA << "\n" << std::endl;
+        {
+            std::cout << "ASSUMING 1 / ||inv(A)||_2 >= " << lowerBound << "\n"
+                      << "||A||_F =  " << frobA << "\n"
+                      << "||A||_2 <= " << upperBound << "\n" << std::endl;
+        }
 
         // Compute the polar decomp of A using a QR-based Dynamically Weighted
         // Halley (QDWH) iteration
-        const R lowerBound = 1e-7;
         Q = A;
-        const int numItsQDWH = QDWH( Q, lowerBound, frobA );
+        const int numItsQDWH = QDWH( Q, lowerBound, upperBound );
         Zeros( n, n, P );
         Gemm( ADJOINT, NORMAL, (C)1, Q, A, (C)0, P );
 
         // Compute the polar decomp of A using a standard QR-based Halley
         // iteration
         Q = A;
-        const int numItsHalley = Halley( Q, frobA );
+        const int numItsHalley = Halley( Q, upperBound );
         Zeros( n, n, P );
         Gemm( ADJOINT, NORMAL, (C)1, Q, A, (C)0, P );
 

external/clique/external/elemental/examples/lapack-like/QR.cpp

+/*
+   Copyright (c) 2009-2012, Jack Poulson
+   All rights reserved.
+
+   This file is part of Elemental.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions are met:
+
+    - Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    - Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    - Neither the name of the owner nor the names of its contributors
+      may be used to endorse or promote products derived from this software
+      without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+   POSSIBILITY OF SUCH DAMAGE.
+*/
+#include "elemental.hpp"
+using namespace std;
+using namespace elem;
+
+typedef double Real;
+typedef Complex<Real> C;
+
+void Usage()
+{
+    cout << "QR <m> <n>\n"
+         << "  <m>: height of random matrix to test QR on\n"
+         << "  <n>: width of random matrix to test QR on\n"
+         << endl;
+}
+
+int
+main( int argc, char* argv[] )
+{
+    Initialize( argc, argv );
+
+    mpi::Comm comm = mpi::COMM_WORLD;
+    const int commRank = mpi::CommRank( comm );
+
+    if( argc < 3 )
+    {
+        if( commRank == 0 )
+            Usage();
+        Finalize();
+        return 0;
+    }
+    const int m = atoi( argv[1] );
+    const int n = atoi( argv[2] );
+
+    try 
+    {
+        const Grid g( comm );
+        DistMatrix<C> A(g);
+        Uniform( m, n, A );
+        const Real frobA = Norm( A, FROBENIUS_NORM );
+
+        // Compute the QR decomposition of A, but do not overwrite A
+        DistMatrix<C> Q( A ), R(g);
+        ExplicitQR( Q, R );
+
+        // Check the error in the QR factorization, || A - Q R ||_F / || A ||_F
+        DistMatrix<C> E( A );
+        Gemm( NORMAL, NORMAL, (C)-1, Q, R, (C)1, E );
+        const Real frobQR = Norm( E, FROBENIUS_NORM );
+
+        // Check the numerical orthogonality of Q, || I - Q^H Q ||_F / || A ||_F
+        const int k = std::min(m,n);
+        Identity( k, k, E );
+        Herk( LOWER, ADJOINT, (C)-1, Q, (C)1, E );
+        const Real frobOrthog = HermitianNorm( LOWER, E, FROBENIUS_NORM ); 
+
+        if( g.Rank() == 0 )
+        {
+            std::cout << "|| A ||_F = " << frobA << "\n"
+                      << "|| A - Q R ||_F / || A ||_F   = " 
+                      << frobQR/frobA << "\n"
+                      << "|| I - Q^H Q ||_F / || A ||_F = "
+                      << frobOrthog/frobA << "\n"
+                      << std::endl;
+        }
+    }
+    catch( exception& e )
+    {
+        cerr << "Process " << commRank << " caught exception with message: "
+             << e.what() << endl;
+#ifndef RELEASE
+        DumpCallStack();
+#endif
+    }
+
+    Finalize();
+    return 0;
+}
+

external/clique/external/elemental/examples/lapack-like/SVD.cpp

         U = A;
         SVD( U, s, V );
 
-        // This is a bit of a hack since norms are not supported for anything
-        // but the standard ([MC,MR]) distribution (as of yet)
-        DistMatrix<R> s_MC_MR( s );
-        const R twoNormOfA = Norm( s_MC_MR, MAX_NORM );
-
+        const R twoNormOfA = Norm( s, MAX_NORM );
         const R maxNormOfA = Norm( A, MAX_NORM );
         const R oneNormOfA = Norm( A, ONE_NORM );
         const R infNormOfA = Norm( A, INFINITY_NORM );
         const R frobNormOfA = Norm( A, FROBENIUS_NORM );
-        const R lowerBound = TwoNormLowerBound( A );
-        const R upperBound = TwoNormUpperBound( A );
 
         DiagonalScale( RIGHT, NORMAL, s, U );
         Gemm( NORMAL, ADJOINT, (C)-1, U, V, (C)1, A );
                  << "||A||_1     = " << oneNormOfA << "\n"
                  << "||A||_oo    = " << infNormOfA << "\n"
                  << "||A||_F     = " << frobNormOfA << "\n"
-                 << "\n"
-                 << "lower bound = " << lowerBound << "\n"
                  << "||A||_2     = " << twoNormOfA << "\n"
-                 << "upper bound = " << upperBound << "\n"
                  << "\n"
                  << "||A - U Sigma V^H||_max = " << maxNormOfE << "\n"
                  << "||A - U Sigma V^H||_1   = " << oneNormOfE << "\n"

external/clique/external/elemental/examples/lapack-like/SequentialQR.cpp

 using namespace std;
 using namespace elem;
 
-typedef double R;
-typedef Complex<R> C;
+typedef double Real;
+typedef Complex<Real> C;
 
 void Usage()
 {
-    cout << "QR <m> <n>\n"
+    cout << "SequentialQR <m> <n>\n"
          << "  <m>: height of random matrix to test QR on\n"
          << "  <n>: width of random matrix to test QR on\n"
          << endl;
 
     try 
     {
+        const Grid g( comm );
+
         Matrix<C> A;
         Uniform( m, n, A );
+        const Real frobA = Norm( A, FROBENIUS_NORM );
 
         // Compute the QR decomposition of A, but do not overwrite A
-        Matrix<C> B( A );
-        Matrix<C> t;
-        SetBlocksize( 3 );
-        QR( B, t );
+        Matrix<C> Q( A ), R;
+        ExplicitQR( Q, R );
 
-        A.Print("A");
-        B.Print("B := qr(A)");
-        t.Print("t");
+        // Check the error in the QR factorization, || A - Q R ||_F / || A ||_F
+        Matrix<C> E( A );
+        Gemm( NORMAL, NORMAL, (C)-1, Q, R, (C)1, E );
+        const Real frobQR = Norm( E, FROBENIUS_NORM );
 
-        ExpandPackedReflectors( LOWER, VERTICAL, UNCONJUGATED, 0, B, t );
+        // Check the numerical orthogonality of Q, || I - Q^H Q ||_F / || A ||_F
+        const int k = std::min(m,n);
+        Identity( k, k, E );
+        Herk( LOWER, ADJOINT, (C)-1, Q, (C)1, E );
+        const Real frobOrthog = HermitianNorm( LOWER, E, FROBENIUS_NORM );
 
-        B.Print("Q");
+        if( g.Rank() == 0 )
+        {
+            std::cout << "|| A ||_F = " << frobA << "\n"
+                      << "|| A - Q R ||_F / || A ||_F   = "
+                      << frobQR/frobA << "\n"
+                      << "|| I - Q^H Q ||_F / || A ||_F = "
+                      << frobOrthog/frobA << "\n"
+                      << std::endl;
+        }
     }
     catch( exception& e )
     {

external/clique/external/elemental/examples/lapack-like/SimpleSVD.cpp

+#include "elemental.hpp"
+using namespace elem;
+
+const int m=300, n=300;  // run SVD on m x n matrix
+typedef double R;        // real datatype is `R'
+typedef Complex<R> C;    // complex datatype `C'
+
+int
+main( int argc, char* argv[] )
+{
+    Initialize( argc, argv );
+    DistMatrix<C> A;
+    Uniform( m, n, A );
+
+    DistMatrix<C> U, V;
+    DistMatrix<R,VR,STAR> s;
+    U = A;
+    SVD( U, s, V );
+    const R twoNormOfA = Norm( s, MAX_NORM );
+
+    DiagonalScale( RIGHT, NORMAL, s, U );
+    Gemm( NORMAL, ADJOINT, (C)-1, U, V, (C)1, A );
+    const R frobNormOfE = Norm( A, FROBENIUS_NORM );
+    const R eps = lapack::MachineEpsilon<R>();
+    const R scaledResidual = frobNormOfE / (std::max(m,n)*eps*twoNormOfA);
+    if( mpi::WorldRank() == 0 )
+    {
+        std::cout << "||A||_2 = " << twoNormOfA << "\n"
+                  << "||A - U Sigma V^H||_F / (max(m,n) eps ||A||_2) = " 
+                  << scaledResidual << "\n" << std::endl;
+    }
+
+    Finalize();
+    return 0;
+}

external/clique/external/elemental/include/elemental/blas-like.hpp

 // while the out-of-place sets B := Conjugate(A).
 //
 
-// In-place serial version for real datatypes. 
-// Note: this is a no-op.
+// In-place versions
 template<typename Z>
 void Conjugate( Matrix<Z>& A );
-
-// In-place serial version for complex datatypes.
 template<typename Z>
 void Conjugate( Matrix<Complex<Z> >& A );
-
-// In-place parallel version
 template<typename T,Distribution U,Distribution V>
 void Conjugate( DistMatrix<T,U,V>& A );
 
-// Out-of-place serial version.
+// Out-of-place versions
 template<typename T>
 void Conjugate( const Matrix<T>& A, Matrix<T>& B );
-
-// Out-of-place parallel version.
 template<typename T, 
          Distribution U,Distribution V,
          Distribution W,Distribution Z>
 ( LeftOrRight side, UpperOrLower uplo, int offset, DistMatrix<T,U,V>& A );
 
 //
+// MakeHermitian:
+//
+// Turn an implicitly Hermitian matrix into an explicitly Hermitian one by 
+// forcing the diagonal to be real and conjugate-transposing the specified 
+// strictly triangular portion of the matrix into the other.
+//
+
+template<typename T>
+void MakeHermitian( UpperOrLower uplo, Matrix<T>& A );
+template<typename T>
+void MakeHermitian( UpperOrLower uplo, DistMatrix<T>& A );
+
+//
+// MakeReal:
+//
+// Modify a matrix to ensure that all of its imaginary components are zero.
+//
+
+template<typename T>
+void MakeReal( Matrix<T>& A );
+template<typename T,Distribution U,Distribution V>
+void MakeReal( DistMatrix<T,U,V>& A );
+
+//
+// MakeSymmetric:
+//
+// Turn an implicitly symmetric matrix into an explicitly symmetric one by 
+// forcing the diagonal to be real and conjugate-transposing the specified 
+// strictly triangular portion of the matrix into the other.
+//
+
+template<typename T>
+void MakeSymmetric( UpperOrLower uplo, Matrix<T>& A );
+template<typename T>
+void MakeSymmetric( UpperOrLower uplo, DistMatrix<T>& A );
+
+//
 // ScaleTrapezoid:
 //
 // Scale only a trapezoidal portion of a matrix, using the parameter convention
 #endif
     const int m = A.Height();
     const int n = A.Width();
-    if( !B.Viewing() )
+    if( B.Viewing() )
+    {
+        if( B.Height() != n || B.Width() != m )
+        {
+            std::ostringstream msg;
+            msg << "If Adjoint'ing into a view, it must be the right size:\n"
+                << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
+                << "  B ~ " << B.Height() << " x " << B.Width();
+            throw std::logic_error( msg.str().c_str() );
+        }
+    }
+    else
         B.ResizeTo( n, m );
-    else if( B.Height() != n || B.Width() != m )
-        throw std::logic_error
-        ("If Adjoint'ing into a view, it must be the right size");
+
     for( int j=0; j<n; ++j )
         for( int i=0; i<m; ++i )
             B.Set(j,i,Conj(A.Get(i,j)));
 
 template<typename T>
 inline void
+MakeReal( Matrix<T>& A )
+{
+#ifndef RELEASE
+    PushCallStack("MakeReal");
+#endif
+    T* ABuffer = A.Buffer();
+    const int height = A.Height();
+    const int width = A.Width();
+    const int ldim = A.LDim();
+    for( int j=0; j<width; ++j )
+        for( int i=0; i<height; ++i )
+            ABuffer[i+j*ldim] = RealPart(ABuffer[i+j*ldim]);
+#ifndef RELEASE
+    PopCallStack();
+#endif
+}
+
+template<typename T,Distribution U,Distribution V>
+inline void
+MakeReal( DistMatrix<T,U,V>& A )
+{
+#ifndef RELEASE
+    PushCallStack("MakeReal");
+#endif
+    MakeReal( A.LocalMatrix() );
+#ifndef RELEASE
+    PopCallStack();
+#endif
+}
+
+template<typename T>
+inline void
 MakeTrapezoidal
 ( LeftOrRight side, UpperOrLower uplo, int offset, Matrix<T>& A )
 {
 
 template<typename T>
 inline void
+MakeHermitian( UpperOrLower uplo, Matrix<T>& A )
+{
+#ifndef RELEASE
+    PushCallStack("MakeHermitian");
+#endif
+    if( A.Height() != A.Width() )
+        throw std::logic_error("Cannot make non-square matrix Hermitian");
+
+    Matrix<T> d;
+    A.GetDiagonal( d );
+    MakeReal( d );
+
+    if( uplo == LOWER )
+        MakeTrapezoidal( LEFT, LOWER, -1, A );
+    else
+        MakeTrapezoidal( LEFT, UPPER, +1, A );
+    Matrix<T> AAdj;
+    Adjoint( A, AAdj );
+    Axpy( (T)1, AAdj, A );
+
+    A.SetDiagonal( d );
+#ifndef RELEASE
+    PopCallStack();
+#endif
+}
+
+template<typename T>
+inline void
+MakeHermitian( UpperOrLower uplo, DistMatrix<T>& A )
+{
+#ifndef RELEASE
+    PushCallStack("MakeHermitian");
+#endif
+    const Grid& g = A.Grid();
+    if( A.Height() != A.Width() )
+        throw std::logic_error("Cannot make non-square matrix Hermitian");
+
+    DistMatrix<T,MD,STAR> d(g);
+    A.GetDiagonal( d );
+    MakeReal( d );
+
+    if( uplo == LOWER )
+        MakeTrapezoidal( LEFT, LOWER, -1, A );
+    else
+        MakeTrapezoidal( LEFT, UPPER, +1, A );
+    DistMatrix<T> AAdj(g);
+    Adjoint( A, AAdj );
+    Axpy( (T)1, AAdj, A );
+
+    A.SetDiagonal( d );
+#ifndef RELEASE
+    PopCallStack();
+#endif
+}
+
+template<typename T>
+inline void
+MakeSymmetric( UpperOrLower uplo, Matrix<T>& A )
+{
+#ifndef RELEASE
+    PushCallStack("MakeSymmetric");
+#endif
+    if( A.Height() != A.Width() )
+        throw std::logic_error("Cannot make non-square matrix symmetric");
+
+    Matrix<T> d;
+    A.GetDiagonal( d );
+
+    if( uplo == LOWER )
+        MakeTrapezoidal( LEFT, LOWER, -1, A );
+    else
+        MakeTrapezoidal( LEFT, UPPER, +1, A );
+    Matrix<T> ATrans;
+    Transpose( A, ATrans );
+    Axpy( (T)1, ATrans, A );
+
+    A.SetDiagonal( d );
+#ifndef RELEASE
+    PopCallStack();
+#endif
+}
+
+template<typename T>
+inline void
+MakeSymmetric( UpperOrLower uplo, DistMatrix<T>& A )
+{
+#ifndef RELEASE
+    PushCallStack("MakeSymmetric");
+#endif
+    if( A.Height() != A.Width() )
+        throw std::logic_error("Cannot make non-square matrix symmetric");
+
+    const Grid& g = A.Grid();
+    DistMatrix<T> d(g);
+    A.GetDiagonal( d );
+
+    if( uplo == LOWER )
+        MakeTrapezoidal( LEFT, LOWER, -1, A );
+    else
+        MakeTrapezoidal( LEFT, UPPER, +1, A );
+    DistMatrix<T> ATrans(g);
+    Transpose( A, ATrans );
+    Axpy( (T)1, ATrans, A );
+
+    A.SetDiagonal( d );
+#ifndef RELEASE
+    PopCallStack();
+#endif
+}
+
+template<typename T>
+inline void
 ScaleTrapezoid
 ( T alpha, LeftOrRight side, UpperOrLower uplo, int offset, Matrix<T>& A )
 {
 #endif
     const int m = A.Height();
     const int n = A.Width();
-    if( !B.Viewing() )
+    if( B.Viewing() )
+    {
+        if( B.Height() != n || B.Width() != m )
+        {
+            std::ostringstream msg;
+            msg << "If Transpose'ing into a view, it must be the right size:\n"
+                << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
+                << "  B ~ " << B.Height() << " x " << B.Width();
+            throw std::logic_error( msg.str().c_str() );
+        }
+    }
+    else
         B.ResizeTo( n, m );
-    else if( B.Height() != n || B.Width() != m )
-        throw std::logic_error
-        ("If Transposing into a view, it must be the right size");
+
     for( int j=0; j<n; ++j )
         for( int i=0; i<m; ++i )
             B.Set(j,i,A.Get(i,j));
 #ifndef RELEASE
     PushCallStack("Adjoint");
 #endif
+    if( B.Viewing() )
+    {
+        if( A.Height() != B.Width() || A.Width() != B.Height() )
+        {
+            std::ostringstream msg;
+            msg << "If Adjoint'ing into a view, it must be the right size:\n"
+                << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
+                << "  B ~ " << B.Height() << " x " << B.Width();
+            throw std::logic_error( msg.str().c_str() );
+        }
+    }
+
     if( U == Z && V == W && 
         A.ColAlignment() == B.RowAlignment() && 
         A.RowAlignment() == B.ColAlignment() )
     else
     {
         DistMatrix<T,Z,W> C( B.Grid() );
-        if( B.ConstrainedColAlignment() )
+        if( B.Viewing() || B.ConstrainedColAlignment() )
             C.AlignRowsWith( B );
-        if( B.ConstrainedRowAlignment() )
+        if( B.Viewing() || B.ConstrainedRowAlignment() )
             C.AlignColsWith( B );
         C = A;
 
                 B.AlignRowsWith( C );
             B.ResizeTo( A.Width(), A.Height() );
         }
-        else if( B.Height() != A.Width() || B.Width() != A.Height() )
-        {
-            throw std::logic_error
-            ("If Adjoint'ing into a view, it must be the right size");
-        }
         Adjoint( C.LockedLocalMatrix(), B.LocalMatrix() );
     }
 #ifndef RELEASE
 #ifndef RELEASE
     PushCallStack("Transpose");
 #endif
+    if( B.Viewing() )
+    {
+        if( A.Height() != B.Width() || A.Width() != B.Height() )
+        {
+            std::ostringstream msg;
+            msg << "If Transpose'ing into a view, it must be the right size:\n"
+                << "  A ~ " << A.Height() << " x " << A.Width() << "\n"
+                << "  B ~ " << B.Height() << " x " << B.Width();
+            throw std::logic_error( msg.str().c_str() );
+        }
+    }
+
     if( U == Z && V == W && 
         A.ColAlignment() == B.RowAlignment() && 
         A.RowAlignment() == B.ColAlignment() )
     else
     {
         DistMatrix<T,Z,W> C( B.Grid() );
-        if( B.ConstrainedColAlignment() )
+        if( B.Viewing() || B.ConstrainedColAlignment() )
             C.AlignRowsWith( B );
-        if( B.ConstrainedRowAlignment() )
+        if( B.Viewing() || B.ConstrainedRowAlignment() )
             C.AlignColsWith( B );
         C = A;
 
                 B.AlignRowsWith( C );
             B.ResizeTo( A.Width(), A.Height() );
         }
-        else if( B.Height() != A.Width() || B.Width() != A.Height() )
-        {
-            throw std::logic_error
-            ("If Transposing into a view, it must be the right size");
-        }
         Transpose( C.LockedLocalMatrix(), B.LocalMatrix() );
     }
 #ifndef RELEASE

external/clique/external/elemental/include/elemental/blas-like/level3/Gemm/NN.hpp

     DistMatrix<T,STAR,MR> B1Trans_STAR_MR(g);
     DistMatrix<T,MC,STAR> D1_MC_STAR(g);
 
+    B1_VR_STAR.AlignWith( A );
+    B1Trans_STAR_MR.AlignWith( A );
+    D1_MC_STAR.AlignWith( A );
+
     // Start the algorithm
     Scale( beta, C );
     LockedPartitionRight( B, BL, BR, 0 );
         ( CL, /**/     CR,
           C0, /**/ C1, C2 );
 
-        B1_VR_STAR.AlignWith( A );
-        B1Trans_STAR_MR.AlignWith( A );
-        D1_MC_STAR.AlignWith( A );
-        D1_MC_STAR.ResizeTo( C1.Height(), C1.Width() );
+        Zeros( C1.Height(), C1.Width(), D1_MC_STAR );
         //--------------------------------------------------------------------//
         B1_VR_STAR = B1;
         B1Trans_STAR_MR.TransposeFrom( B1_VR_STAR );
         // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows
         C1.SumScatterUpdate( (T)1, D1_MC_STAR );
         //--------------------------------------------------------------------//
-        B1_VR_STAR.FreeAlignments();
-        B1Trans_STAR_MR.FreeAlignments();
-        D1_MC_STAR.FreeAlignments();
 
         SlideLockedPartitionRight
         ( BL,     /**/ BR,
 
     // Temporary distributions
     DistMatrix<T,STAR,MC> A1_STAR_MC(g);
-    DistMatrix<T,STAR,MR> D1_STAR_MR(g);
+    DistMatrix<T,MR,STAR> D1Trans_MR_STAR(g);
+
+    A1_STAR_MC.AlignWith( B );
+    D1Trans_MR_STAR.AlignWith( B );
 
     // Start the algorithm
     Scale( beta, C );
                C1,
           CB,  C2 );
 
-        A1_STAR_MC.AlignWith( B );
-        D1_STAR_MR.AlignWith( B );
-        D1_STAR_MR.ResizeTo( C1.Height(), C1.Width() );
+        Zeros( C1.Width(), C1.Height(), D1Trans_MR_STAR );
         //--------------------------------------------------------------------//
         A1_STAR_MC = A1; // A1[*,MC] <- A1[MC,MR]
 
-        // D1[*,MR] := alpha A1[*,MC] B[MC,MR]
-        LocalGemm( NORMAL, NORMAL, alpha, A1_STAR_MC, B, (T)0, D1_STAR_MR );
+        // D1^T[MR,* ] := alpha B^T[MR,MC] A1^T[MC,* ]
+        LocalGemm
+        ( TRANSPOSE, TRANSPOSE, alpha, B, A1_STAR_MC, (T)0, D1Trans_MR_STAR );
 
-        // C1[MC,MR] += scattered result of D1[*,MR] summed over grid cols
-        C1.SumScatterUpdate( (T)1, D1_STAR_MR );
+        C1.TransposeSumScatterUpdate( (T)1, D1Trans_MR_STAR );
         //--------------------------------------------------------------------//
-        A1_STAR_MC.FreeAlignments();
-        D1_STAR_MR.FreeAlignments();
 
         SlideLockedPartitionDown
         ( AT,  A0,
     // Matrix views
     DistMatrix<T> AL(g), AR(g),
                   A0(g), A1(g), A2(g);         
-
     DistMatrix<T> BT(g),  B0(g),
                   BB(g),  B1(g),
                           B2(g);
     DistMatrix<T,MC,STAR> A1_MC_STAR(g);
     DistMatrix<T,MR,STAR> B1Trans_MR_STAR(g); 
 
+    A1_MC_STAR.AlignWith( C );
+    B1Trans_MR_STAR.AlignWith( C );
+
     // Start the algorithm
     Scale( beta, C );
     LockedPartitionRight( A, AL, AR, 0 ); 
                                     B1, 
                                BB,  B2 );
 
-        A1_MC_STAR.AlignWith( C );
-        B1Trans_MR_STAR.AlignWith( C );
         //--------------------------------------------------------------------//
         A1_MC_STAR = A1; 
         B1Trans_MR_STAR.TransposeFrom( B1 );
         LocalGemm
         ( NORMAL, TRANSPOSE, alpha, A1_MC_STAR, B1Trans_MR_STAR, (T)1, C );
         //--------------------------------------------------------------------//
-        A1_MC_STAR.FreeAlignments();
-        B1Trans_MR_STAR.FreeAlignments();
 
         SlideLockedPartitionRight( AL,     /**/ AR,
                                    A0, A1, /**/ A2 );
         // Matrix views
         DistMatrix<T> AT(g), AB(g),
                       A0(g), A1(g), A2(g);         
-
         DistMatrix<T> BL(g),  B0(g),
                       BR(g),  B1(g),
                               B2(g);
-
         DistMatrix<T> CT(g), C0(g), C1L(g), C1R(g),
                       CB(g), C1(g), C10(g), C11(g), C12(g),
                              C2(g);
                    C1,
               CB,  C2 );
 
-            A1_STAR_VC.AlignWith( B1 );
-            //----------------------------------------------------------------//
             A1_STAR_VC = A1; 
-            //----------------------------------------------------------------//
+            B1_VC_STAR.AlignWith( A1_STAR_VC );
 
             LockedPartitionRight( B, BL, BR, 0 );
             PartitionRight( C1, C1L, C1R, 0 );
                 ( C1L, /**/ C1R,
                   C10, /**/ C11, C12 );
 
-                B1_VC_STAR.AlignWith( A1_STAR_VC );
-                C11_STAR_STAR.ResizeTo( C11.Height(), C11.Width() );
+                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                 //------------------------------------------------------------//
                 B1_VC_STAR = B1;
                 LocalGemm
                   alpha, A1_STAR_VC, B1_VC_STAR, (T)0, C11_STAR_STAR );
                 C11.SumScatterUpdate( (T)1, C11_STAR_STAR );
                 //------------------------------------------------------------//
-                B1_VC_STAR.FreeAlignments();
 
                 SlideLockedPartitionRight
                 ( BL,     /**/ BR,
                 ( C1L,      /**/ C1R,
                   C10, C11, /**/ C12 );
             }
-            A1_STAR_VC.FreeAlignments();
+            B1_VC_STAR.FreeAlignments();
 
             SlideLockedPartitionDown
             ( AT,  A0,
         // Matrix views
         DistMatrix<T> AT(g), AB(g),
                       A0(g), A1(g), A2(g);         
-
         DistMatrix<T> BL(g),  B0(g),
                       BR(g),  B1(g),
                               B2(g);
-
         DistMatrix<T> 
             CL(g), CR(g),         C1T(g),  C01(g),
             C0(g), C1(g), C2(g),  C1B(g),  C11(g),
             ( CL, /**/ CR,
               C0, /**/ C1, C2 );
 
-            B1_VR_STAR.AlignWith( A1 );
-            //----------------------------------------------------------------//
             B1_VR_STAR = B1;
-            //----------------------------------------------------------------//
+            A1_STAR_VR.AlignWith( B1_VR_STAR );
 
             LockedPartitionDown
             ( A, AT,
                         C11,
                   C1B,  C21 );
 
-                A1_STAR_VR.AlignWith( B1_VR_STAR );
-                C11_STAR_STAR.ResizeTo( C11.Height(), C11.Width() );
+                Zeros( C11.Height(), C11.Width(), C11_STAR_STAR );
                 //------------------------------------------------------------//
                 A1_STAR_VR = A1;
                 LocalGemm
                   alpha, A1_STAR_VR, B1_VR_STAR, (T)0, C11_STAR_STAR );
                 C11.SumScatterUpdate( (T)1, C11_STAR_STAR );
                 //------------------------------------------------------------//
-                A1_STAR_VR.FreeAlignments();
 
                 SlideLockedPartitionDown
                 ( AT,  A0,
                  /***/ /***/
                   C1B,  C21 );
             }
-            B1_VR_STAR.FreeAlignments();
+            A1_STAR_VR.FreeAlignments();
 
             SlideLockedPartitionRight
             ( BL,     /**/ BR,
 #endif
 }
 
-
 } // namespace internal
 } // namespace elem

external/clique/external/elemental/include/elemental/blas-like/level3/Gemm/NT.hpp

     DistMatrix<T> BT(g),  B0(g),
                   BB(g),  B1(g),
                           B2(g);
-
     DistMatrix<T> CL(g), CR(g),
                   C0(g), C1(g), C2(g);
 
     // Temporary distributions
-    DistMatrix<T,STAR,MR> B1_STAR_MR(g);
+    DistMatrix<T,MR,STAR> B1AdjOrTrans_MR_STAR(g);
     DistMatrix<T,MC,STAR> D1_MC_STAR(g);
 
+    B1AdjOrTrans_MR_STAR.AlignWith( A );
+    D1_MC_STAR.AlignWith( A );
+
     // Start the algorithm
     Scale( beta, C );
     LockedPartitionDown
         ( CL, /**/     CR,
           C0, /**/ C1, C2 );
 
-        B1_STAR_MR.AlignWith( A );
-        D1_MC_STAR.AlignWith( A );
-        D1_MC_STAR.ResizeTo( C1.Height(), C1.Width() );
+        Zeros( C1.Height(), C1.Width(), D1_MC_STAR );
         //--------------------------------------------------------------------//
-        B1_STAR_MR = B1; // B1[*,MR] <- B1[MC,MR]
+        if( orientationOfB == ADJOINT )
+            B1AdjOrTrans_MR_STAR.AdjointFrom( B1 );
+        else
+            B1AdjOrTrans_MR_STAR.TransposeFrom( B1 );
 
-        // C1[MC,*] := alpha A[MC,MR] (B1[*,MR])^T
-        //           = alpha A[MC,MR] (B1^T)[MR,*]
+        // C1[MC,*] := alpha A[MC,MR] (B1^[T/H])[MR,*]
         LocalGemm
-        ( NORMAL, orientationOfB, alpha, A, B1_STAR_MR, (T)0, D1_MC_STAR );
+        ( NORMAL, NORMAL, alpha, A, B1AdjOrTrans_MR_STAR, (T)0, D1_MC_STAR );
 
         // C1[MC,MR] += scattered result of D1[MC,*] summed over grid rows
         C1.SumScatterUpdate( (T)1, D1_MC_STAR );
         //--------------------------------------------------------------------//
-        B1_STAR_MR.FreeAlignments();
-        D1_MC_STAR.FreeAlignments();
 
         SlideLockedPartitionDown
         ( BT,  B0,
     DistMatrix<T> AT(g),  A0(g),
                   AB(g),  A1(g),
                           A2(g);
-
     DistMatrix<T> CT(g),  C0(g),
                   CB(g),  C1(g),
                           C2(g);
 
     // Temporary distributions
-    DistMatrix<T,STAR,MR> A1_STAR_MR(g);
+    DistMatrix<T,MR,STAR> A1Trans_MR_STAR(g);
     DistMatrix<T,STAR,MC> D1_STAR_MC(g);
-    DistMatrix<T,MR,  MC> D1_MR_MC(g);
+    DistMatrix<T,MR,MC> D1_MR_MC(g);
     DistMatrix<T> D1(g);
 
+    A1Trans_MR_STAR.AlignWith( B );
+    D1_STAR_MC.AlignWith( B );
+
     // Start the algorithm
     Scale( beta, C );
     LockedPartitionDown
                C1,
           CB,  C2 );
 
-        A1_STAR_MR.AlignWith( B );
-        D1_STAR_MC.AlignWith( B );
-        D1_STAR_MC.ResizeTo( C1.Height(), C1.Width() );
         D1.AlignWith( C1 );
+        Zeros( C1.Height(), C1.Width(), D1_STAR_MC );
         //--------------------------------------------------------------------//
-        A1_STAR_MR = A1; // A1[*,MR] <- A1[MC,MR]
+        A1Trans_MR_STAR.TransposeFrom( A1 );
 
         // D1[*,MC] := alpha A1[*,MR] (B[MC,MR])^T
-        //           = alpha A1[*,MR] (B^T)[MR,MC]
+        //           = alpha (A1^T)[MR,*] (B^T)[MR,MC]
         LocalGemm
-        ( NORMAL, orientationOfB, alpha, A1_STAR_MR, B, (T)0, D1_STAR_MC );
+        ( TRANSPOSE, orientationOfB, 
+          alpha, A1Trans_MR_STAR, B, (T)0, D1_STAR_MC );
 
         // C1[MC,MR] += scattered & transposed D1[*,MC] summed over grid rows
         D1_MR_MC.SumScatterFrom( D1_STAR_MC );
         D1 = D1_MR_MC; 
         Axpy( (T)1, D1, C1 );
         //--------------------------------------------------------------------//
-        A1_STAR_MR.FreeAlignments();
-        D1_STAR_MC.FreeAlignments();
         D1.FreeAlignments();
 
         SlideLockedPartitionDown
     // Matrix views
     DistMatrix<T> AL(g), AR(g),
                   A0(g), A1(g), A2(g);
-
     DistMatrix<T> BL(g), BR(g),
                   B0(g), B1(g), B2(g);
 
     // Temporary distributions
     DistMatrix<T,MC,STAR> A1_MC_STAR(g);
-    DistMatrix<T,MR,STAR> B1_MR_STAR(g);
+    DistMatrix<T,VR,STAR> B1_VR_STAR(g);
+    DistMatrix<T,STAR,MR> B1AdjOrTrans_STAR_MR(g);
+
+    A1_MC_STAR.AlignWith( C );
+    B1_VR_STAR.AlignWith( C );
+    B1AdjOrTrans_STAR_MR.AlignWith( C );
 
     // Start the algorithm
     Scale( beta, C );
         ( BL, /**/ BR,
           B0, /**/ B1, B2 );
 
-        A1_MC_STAR.AlignWith( C );
-        B1_MR_STAR.AlignWith( C );
         //--------------------------------------------------------------------//
         A1_MC_STAR = A1; // A1[MC,*] <- A1[MC,MR]
-        B1_MR_STAR = B1; // B1[MR,*] <- B1[MC,MR]
+        B1_VR_STAR = B1;
+        if( orientationOfB == ADJOINT )
+            B1AdjOrTrans_STAR_MR.AdjointFrom( B1_VR_STAR );
+        else
+            B1AdjOrTrans_STAR_MR.TransposeFrom( B1_VR_STAR );