Commits

Alexey Korepanov committed 0446588

Rewrite

Comments (0)

Files changed (2)

Eigen/Eigenvalues

 
 #include "src/Eigenvalues/Tridiagonalization.h"
 #include "src/Eigenvalues/RealSchur.h"
+#include "src/Eigenvalues/RealQZ.h"
 #include "src/Eigenvalues/EigenSolver.h"
 #include "src/Eigenvalues/SelfAdjointEigenSolver.h"
 #include "src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h"
 #include "src/Eigenvalues/ComplexSchur.h"
 #include "src/Eigenvalues/ComplexEigenSolver.h"
 #include "src/Eigenvalues/MatrixBaseEigenvalues.h"
-#include "src/Eigenvalues/RealQZ.h"
 #ifdef EIGEN_USE_LAPACKE
 #include "src/Eigenvalues/RealSchur_MKL.h"
 #include "src/Eigenvalues/ComplexSchur_MKL.h"

Eigen/src/Eigenvalues/RealQZ.h

-/*
-    TODO: 
-    * check for complex numbers, as we don't do complex here
-    * what if we use FullPivHouseholderQr? Will result improve?
-    * or may be there's some sort of balancing for bad matrices?
-*/
 
-#include <algorithm>
-#include <vector>
+/* TODO:
+ * license, documentation
+ *
+ * decoupling of 2x2 blocks where T(i,i) or T(i+1,i+1) iz zero
+ * Scalar(0), Scalar(0.5), etc
+ * block operations in pushig down zeros
+ */
 
-#include <Eigen/Dense>
-using namespace Eigen;
+#ifndef EIGEN_REAL_QZ_H
+#define EIGEN_REAL_QZ_H
 
-template<typename _MatrixType> class RealQZ
-{
-    public:
-        typedef _MatrixType MatrixType;
-        enum {
-            RowsAtCompileTime = MatrixType::RowsAtCompileTime,
-            ColsAtCompileTime = MatrixType::ColsAtCompileTime,
-            Options = MatrixType::Options,
-            MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
-            MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
-        };
+//#define KHU_DEBUG
 
-        typedef typename MatrixType::Scalar Scalar;
-        typedef typename MatrixType::Index Index;
+namespace Eigen {
 
-        // default constructor
-        RealQZ(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime):
-            A(size,size),
-            B(size,size),
-            Q(size,size),
-            Z(size,size),
-            workspace(size),
-            m_computed(false) { }
+    template<typename _MatrixType> class RealQZ {
+        public:
+            typedef _MatrixType MatrixType;
+            enum {
+                RowsAtCompileTime = MatrixType::RowsAtCompileTime,
+                ColsAtCompileTime = MatrixType::ColsAtCompileTime,
+                Options = MatrixType::Options,
+                MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime,
+                MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime
+            };
+            typedef typename MatrixType::Scalar Scalar;
+            typedef std::complex<typename NumTraits<Scalar>::Real> ComplexScalar;
+            typedef typename MatrixType::Index Index;
 
-        // constructive constructor
-        RealQZ(const MatrixType& A_, const MatrixType& B_, bool computeQZ = true) :
-            A(A_.rows(),A_.cols()),
-            B(A_.rows(),A_.cols()),
-            Q(A_.rows(),A_.cols()),
-            Z(A_.rows(),A_.cols()),
-            workspace(A_.rows()),
-            m_computed(false),
-            m_computeQZ(computeQZ) { 
-                compute(A_, B_, computeQZ);
+            typedef Matrix<ComplexScalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> EigenvalueType;
+            typedef Matrix<Scalar, ColsAtCompileTime, 1, Options & ~RowMajor, MaxColsAtCompileTime, 1> ColumnVectorType;
+
+            /** \brief Default constructor.
+             *
+             * \param [in] size  Positive integer, size of the matrix whose QZ decomposition will be computed.
+             *
+             * The default constructor is useful in cases in which the user intends to
+             * perform decompositions via compute().  The \p size parameter is only
+             * used as a hint. It is not an error to give a wrong \p size, but it may
+             * impair performance.
+             *
+             * \sa compute() for an example.
+             */
+            RealQZ(Index size = RowsAtCompileTime==Dynamic ? 1 : RowsAtCompileTime) : 
+                m_S(size, size),
+                m_T(size, size),
+                m_Q(size, size),
+                m_Z(size, size),
+                m_workspace(size*2),
+                m_isInitialized(false)
+                { }
+
+            /** \brief Constructor; computes real QZ decomposition of given matrices
+             * 
+             * \param[in]  A          Matrix A.
+             * \param[in]  B          Matrix B.
+             * \param[in]  computeQZ  If false, A and Z are not computed.
+             *
+             * This constructor calls compute() to compute the QZ decomposition.
+             */
+            RealQZ(const MatrixType& A, const MatrixType& B, bool computeQZ = true) :
+                m_S(A.rows(),A.cols()),
+                m_T(A.rows(),A.cols()),
+                m_Q(A.rows(),A.cols()),
+                m_Z(A.rows(),A.cols()),
+                m_workspace(A.rows()*2),
+                m_isInitialized(false) {
+                    compute(A, B, computeQZ);
+                }
+
+            /** \brief Returns matrix Q in the QZ decomposition. 
+             *
+             * \returns A const reference to the matrix Q.
+             */
+            const MatrixType& matrixQ() const {
+                eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+                eigen_assert(m_computeQZ && "The matrices Q and Z have not been computed during the QZ decomposition.");
+                return m_Q;
+            }
+            
+            /** \brief Returns matrix Z in the QZ decomposition. 
+             *
+             * \returns A const reference to the matrix Z.
+             */
+            const MatrixType& matrixZ() const {
+                eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+                eigen_assert(m_computeQZ && "The matrices Q and Z have not been computed during the QZ decomposition.");
+                return m_Z;
+            }
+            
+            /** \brief Returns matrix S in the QZ decomposition. 
+             *
+             * \returns A const reference to the matrix S.
+             */
+            const MatrixType& matrixS() const {
+                eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+                return m_S;
+            }
+            
+            /** \brief Returns matrix S in the QZ decomposition. 
+             *
+             * \returns A const reference to the matrix S.
+             */
+            const MatrixType& matrixT() const {
+                eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+                return m_T;
             }
 
-        // actual computation routine
-        void compute(const MatrixType& A_, const MatrixType& B_, bool computeQZ = true);
+            /** \brief Computes QZ decomposition of given matrix. 
+             * 
+             * \param[in]  A          Matrix A.
+             * \param[in]  B          Matrix B.
+             * \param[in]  computeQZ  If false, A and Z are not computed.
+             * \returns    Reference to \c *this
+             */
+            RealQZ& compute(const MatrixType& A, const MatrixType& B, bool computeQZ = true);
 
-        // get matrix Q
-        const MatrixType & matrixQ() const {
-            eigen_assert(m_computed && m_computeQZ && "matrix Q was not computed");
-            return Q;
-        }
-        
-        // get matrix Z
-        const MatrixType & matrixZ() const {
-            eigen_assert(m_computed && m_computeQZ && "matrix Z was not computed");
-            return Z;
-        }
-        
-        // get matrix A
-        const MatrixType & matrixA() const {
-            eigen_assert(m_computed && "matrix A was not computed");
-            return A;
-        }
-        
-        // get matrix B
-        const MatrixType & matrixB() const {
-            eigen_assert(m_computed && "matrix B was not computed");
-            return B;
-        }
-        
-        // get matrix B
-        const unsigned long iterations() const {
-            eigen_assert(m_computed && "no computation done yet");
-            return iter;
+            /** \brief Reports whether previous computation was successful.
+             *
+             * \returns \c Success if computation was succesful, \c NoConvergence otherwise.
+             */
+            ComputationInfo info() const
+            {
+                eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+                return m_info;
+            }
+            
+            /** \brief Returns number of performed QR-like iterations.
+             */
+            Index iterations() const
+            {
+                eigen_assert(m_isInitialized && "RealQZ is not initialized.");
+                return m_global_iter;
+            }
+
+            /** \brief Maximum number of iterations.
+             *
+             * Maximum number of iterations allowed for an eigenvalue to converge. 
+             */
+            static const Index m_max_iter = 400;
+
+        private:
+
+            MatrixType m_S, m_T, m_Q, m_Z;
+//MatrixType m_A_in;
+            ColumnVectorType m_workspace;
+            ComputationInfo m_info;
+            bool m_isInitialized;
+            bool m_computeQZ;
+            Scalar m_normOfT, m_normOfS;
+            Index m_global_iter;
+
+            typedef Matrix<Scalar,3,1> Vector3s;
+            typedef Matrix<Scalar,2,1> Vector2s;
+            typedef Matrix<Scalar,2,2> Matrix2s;
+            typedef JacobiRotation<Scalar> JRs;
+
+            void hessenbergTriangular();
+            void computeNorms();
+            Index findSmallSubdiagEntry(Index iu);
+            Index findSmallDiagEntry(Index f, Index l);
+            void splitOffTwoRows(Index i);
+            void pushDownZero(Index z, Index f, Index l);
+            void step(Index f, Index l, Index iter);
+            
+    }; // RealQZ
+
+    /** \internal Reduces S and T to upper Hessenberg - triangular form */
+    template<typename MatrixType>
+        void RealQZ<MatrixType>::hessenbergTriangular() {
+      
+            const Index dim = m_S.cols();
+
+            // perform QR decomposition of T, overwrite T with R, save Q
+            HouseholderQR<MatrixType> qrT(m_T);
+            m_T = qrT.matrixQR();
+            m_T.template triangularView<StrictlyLower>().setZero();
+            m_Q = qrT.householderQ();
+            // overwrite S with Q* S
+            m_S.applyOnTheLeft(m_Q.adjoint());
+            // init Z as Identity
+            m_Z = MatrixType::Identity(dim,dim);
+            // reduce S to upper Hessenberg with Givens rotations
+            for (Index j=0; j<=dim-3; j++) {
+                for (Index i=dim-1; i>=j+2; i--) {
+                    JRs G;
+                    // kill S(i,j)
+                    G.makeGivens(m_S(i-1, j), m_S(i,j));
+                    m_S.applyOnTheLeft(i-1,i,G.adjoint());
+                    m_T.applyOnTheLeft(i-1,i,G.adjoint());
+                    m_S(i,j) = 0.0;
+                    // update Q
+                    m_Q.applyOnTheRight(i-1,i,G);
+                    // kill T(i,i-1)
+                    G.makeGivens(m_T(i,i), m_T(i,i-1));
+                    m_S.applyOnTheRight(i,i-1,G);
+                    m_T.applyOnTheRight(i,i-1,G);
+                    m_T(i,i-1) = 0.0;
+                    // update Z
+                    m_Z.applyOnTheLeft(i,i-1,G.adjoint());
+                }
+            }
         }
 
+    /** \internal Computes vector L1 norms of S and T when in Hessenberg-Triangular form already */
+    template<typename MatrixType>
+        inline void RealQZ<MatrixType>::computeNorms() {
+            const Index size = m_S.cols();
+            m_normOfS = Scalar(0);
+            m_normOfT = Scalar(0);
+            for (Index j = 0; j < size; ++j) {
+                Index row_start = (std::max)(j-1,Index(0));
+                m_normOfS += m_S.row(j).segment(row_start, size - row_start).cwiseAbs().sum();
+                m_normOfT += m_T.row(j).segment(j, size - j).cwiseAbs().sum();
+            }
+        }
 
-    private:
+    
+    /** \internal Look for single small sub-diagonal element S(res, res-1) and return res (or 0) */
+    template<typename MatrixType>
+        inline typename MatrixType::Index RealQZ<MatrixType>::findSmallSubdiagEntry(Index iu) {
+            Index res = iu;
+            while (res > 0) {
+                Scalar s = internal::abs(m_S(res-1,res-1)) + internal::abs(m_S(res,res));
+                if (s == 0.0)
+                    s = m_normOfS;
+                if (internal::abs(m_S(res,res-1)) < NumTraits<Scalar>::epsilon() * s)
+                    break;
+                res--;
+            }
+            return res;
+        }
+    
+    /** \internal Look for single small diagonal element T(res, res) for res between f and l, and return res (or f-1)  */
+    template<typename MatrixType>
+        inline typename MatrixType::Index RealQZ<MatrixType>::findSmallDiagEntry(Index f, Index l) {
+            Index res = l;
+            while (res >= f) {
+                if (internal::abs(m_T(res,res)) < NumTraits<Scalar>::epsilon() * m_normOfT)
+                    break;
+                res--;
+            }
+            return res;
+        }
 
-        typedef JacobiRotation<Scalar> JRs;
-        typedef Matrix<Scalar,3,3> Matrix3s;
-        typedef Matrix<Scalar,3,1> Vector3s;
-        typedef Matrix<Scalar,2,2> Matrix2s;
-        typedef Matrix<Scalar,2,1> Vector2s;
-        typedef Matrix<Scalar,1,1> Vector1s;
-        
-        Index dim;
-        MatrixType A, B, Q, Z;
-        Matrix<Scalar,Dynamic,1> workspace;
-        bool m_computed;
-        bool m_computeQZ;
-
-        unsigned long iter;
-        bool converged;
-
-        struct {
-            bool operator() (Scalar a, Scalar b) { return internal::abs(a) < internal::abs(b); }
-        } scalar_abs_comp;
-
-        // reduce pair A,B to upper Hessenberg, upper Triangular matrices
-        void hessenbergTriangular();
-
-        // see compute for details
-        void pqr(Index &p, Index &q, Index &r);
-
-        // Apply householder reflection to some rows of a matrix on the left
-        template <Index es>
-            void applyH_l(
-                    MatrixType &A,                          // matrix to apply reflection to
-                    const Index &row0,                      // initial row
-                    const Index &rowE,                      // first essential row
-                    const Matrix<Scalar,es,1> &essential,   // essential vector
-                    const Scalar &tau                       // tau
-                    ) {
-                Map<Matrix<Scalar,1,Dynamic> > tmp(workspace.data(),A.cols());
-                tmp = essential.adjoint()*(A.template middleRows<es>(rowE));
-                tmp += A.row(row0);
-                A.row(row0) -= tau*tmp;
-                A.template middleRows<es>(rowE) -= essential * (tau*tmp);
-            };
-
-        // Apply householder reflection to some columns of a matrix on the right
-        template <Index es>
-            void applyH_r(
-                    MatrixType &A,                          // matrix to apply reflection to
-                    const Index &col0,                      // initial column
-                    const Index &colE,                      // first essential column
-                    const Matrix<Scalar,es,1> &essential,   // essential vector
-                    const Scalar &tau                       // tau
-                    ) {
-                Map<Matrix<Scalar,Dynamic,1> > tmp(workspace.data(),A.rows());
-                tmp = (A.template middleCols<es>(colE)) * essential;
-                tmp += A.col(col0);
-                A.col(col0) -= tau*tmp;
-                A.template middleCols<es>(colE) -= (tau*tmp) * essential.adjoint();
-            };
-
-        // make a Householder rotation
-        template <size_t m>
-            void makeH(const Scalar &head, const Matrix<Scalar,m,1> &tail, Matrix<Scalar,m,1> &essential, Scalar &tau, Scalar &beta) {
-                Scalar tailSqNorm = tail.squaredNorm();
-                if(tailSqNorm == 0.0) {
-                    tau = 0.0;
-                    beta = head;
-                    essential.setZero();
-                } else {
-                    beta = internal::sqrt(internal::abs2(head) + tailSqNorm);
-                    if (head >= 0.0)
-                        beta = -beta;
-                    essential = tail / (head - beta);
-                    tau = (beta - head) / beta;
-                }
-            };
-
-
-};
-
-template<typename MatrixType>
-void RealQZ<MatrixType>::compute(const MatrixType& A_, const MatrixType& B_, bool computeQZ) {
-
-    iter = 0;
-
-    m_computeQZ = computeQZ;
-
-    workspace = A.col(0);
-
-    dim = A_.cols();
-
-    assert(A_.rows()==dim && B_.rows()==dim && B_.cols()==dim);
-    assert(dim>=3);
-
-    A = A_; B = B_; 
-    
-    // entrance point to Francis-like QR iteration - Hessenberg-Triangular form
-    hessenbergTriangular();
-
-    converged = true;
-
-    // Big Loop
-    for (;;) {
-        Index p,q,r; 
-        // compute smallest p, largest q and r such that if
-        //     |A11 A12 A13|
-        // A = | 0  A22 A23|, A22=A.block(p,p,r,r), A33=A.block(p+r,p+r,q,q)
-        //     | 0   0  A33|
-        // then A33 is upper quazi-triangular, and A22 is unreduced upper Hessenberg;
-        // B is partitioned accondingly
-        pqr(p,q,r);
-
-        if (q==dim) {
-            // if q==dim, we are done
-            break;
-        } else {
-            // otherwise we work on A22, B22
-
-            // index of last element in B22
-            Index l22 = p+r-1;
-            // compute max (in abs value) element on the diaginal of B22
-            Scalar B22_max_diag = B.block(p,p,r,r).diagonal().cwiseAbs().maxCoeff();
-            // look for 0 on the diagonal of B22
-            Index B22_zero;
-            for (B22_zero=p; B22_zero<=l22; B22_zero++) {
-                if (internal::isMuchSmallerThan(B(B22_zero,B22_zero),B22_max_diag))
-                    break;
-            }
-            if (B22_zero<=l22) {
-                // if we found 0, we are going to zero A(l22, l22-1)
-                for (;B22_zero<l22; B22_zero++) {
-                    // push 0 down
-                    JRs G;
-                    G.makeGivens(B(B22_zero, B22_zero+1), B(B22_zero+1, B22_zero+1));
-                    A.applyOnTheLeft(B22_zero,B22_zero+1,G.adjoint());
-                    B.applyOnTheLeft(B22_zero,B22_zero+1,G.adjoint());
-                    B(B22_zero,B22_zero+1) = 0.0;
-                    // update Q
-                    Q.applyOnTheRight(B22_zero,B22_zero+1,G);
-                    // kill A(B22_zero+1, B22_zero-1)
-                    if (B22_zero>p) {
-                        G.makeGivens(A(B22_zero+1, B22_zero), A(B22_zero+1,B22_zero-1));
-                        A.applyOnTheRight(B22_zero, B22_zero-1,G);
-                        B.applyOnTheRight(B22_zero, B22_zero-1,G);
-                        A(B22_zero+1,B22_zero-1) = 0.0;
-                        // update Z
-                        Z.applyOnTheLeft(B22_zero,B22_zero-1,G.adjoint());
-                    }
-                }
-                {
-                    // finally kill A(l22,l22-1)
-                    JRs G;
-                    G.makeGivens(A(l22, l22), A(l22,l22-1));
-                    A.applyOnTheRight(l22,l22-1,G);
-                    B.applyOnTheRight(l22,l22-1,G);
-                    A(l22,l22-1)=0.0;
-                    // update Z
-                    Z.applyOnTheLeft(l22,l22-1,G.adjoint());
-                }
-                
-            } else {
-                // B_22 is not singular
-                // compute (x,y,z,0..0)* - the first column of A22 B22^{-1}
-                Scalar x=0.0, y=0.0, z;
-                if (r==3) {
-                    std::vector<Scalar> xx(9), yy(3);
-                    const Scalar
-                        a11=A(p+0,p+0),
-                        a12=A(p+0,p+1),
-                        a21=A(p+1,p+0),
-                        a22=A(p+1,p+1),
-                        a23=A(p+1,p+2),
-                        a32=A(p+2,p+1),
-                        a33=A(p+2,p+2),
-                        b12=B(p+0,p+1),
-                        b13=B(p+0,p+2),
-                        b23=B(p+1,p+2),
-                        b11i=1.0/B(p+0,p+0),
-                        b22i=1.0/B(p+1,p+1),
-                        b33i=1.0/B(p+2,p+2);
-                    xx[0] =   a11*a11*b11i*b11i;
-                    xx[1] =   a12*a21*b11i*b22i;
-                    xx[2] = - a11*a22*b11i*b22i;
-                    xx[3] = - a11*a33*b11i*b33i;
-                    xx[4] = - a23*a32*b22i*b33i;
-                    xx[5] =   a22*a33*b22i*b33i;
-                    xx[6] = - a21*a33*b12*b11i*b22i*b33i;
-                    xx[7] =   a21*a32*b13*b11i*b22i*b33i;
-                    xx[8] =   a11*a32*b23*b11i*b22i*b33i;
-                    yy[0] =   a11*a21*b11i*b11i;
-                    yy[1] = - a21*a33*b11i*b33i;
-                    yy[2] =   a21*a32*b23*b11i*b22i*b33i;
-                    std::sort(xx.begin(), xx.end(), scalar_abs_comp);
-                    for (size_t it=0; it<xx.size(); it++) 
-                        x+= xx[it];
-                    std::sort(yy.begin(), yy.end(), scalar_abs_comp);
-                    for (size_t it=0; it<yy.size(); it++) 
-                        y+= yy[it];
-                    z = a21*a32*b11i*b22i;
-                } else {
-                    // r>=4
-                    std::vector<Scalar> xx(11), yy(7);
-                    const Scalar
-                        a11=A(p+0,p+0),
-                        a12=A(p+0,p+1),
-                        a21=A(p+1,p+0),
-                        a22=A(p+1,p+1),
-                        a32=A(p+2,p+1),
-                        b12=B(p+0,p+1),
-                        b11i=1.0/B(p+0,p+0),
-                        b22i=1.0/B(p+1,p+1),
-                        b77i=1.0/B(l22-2,l22-2),
-                        b88i=1.0/B(l22-1,l22-1),
-                        b99i=1.0/B(l22-0,l22-0),
-                        a99=A(l22-0,l22-0),
-                        a88=A(l22-1,l22-1),
-                        a89=A(l22-1,l22-0),
-                        a98=A(l22-0,l22-1),
-                        a87=A(l22-1,l22-2),
-                        b89=B(l22-1,l22-0),
-                        b78=B(l22-2,l22-1),
-                        b79=B(l22-2,l22-0);
-                    xx[0] = a11*a11*b11i*b11i;
-                    xx[1] = a12*a21*b11i*b22i;
-                    xx[2] = -a11*a21*b12*b11i*b11i*b22i;
-                    xx[3] = -a11*a88*b11i*b88i;
-                    xx[4] = a11*a87*b78*b11i*b77i*b88i;
-                    xx[5] = -a11*a99*b11i*b99i;
-                    xx[6] = -a89*a98*b88i*b99i;
-                    xx[7] = a88*a99*b88i*b99i;
-                    xx[8] = -a87*a99*b78*b77i*b88i*b99i;
-                    xx[9] = a87*a98*b79*b77i*b88i*b99i;
-                    xx[10]= a11*a98*b89*b11i*b88i*b99i;
-                    yy[0] = a11*a21*b11i*b11i;
-                    yy[1] = a21*a22*b11i*b22i;
-                    yy[2] = -a21*a21*b12*b11i*b11i*b22i;
-                    yy[3] = -a21*a88*b11i*b88i;
-                    yy[4] = a21*a87*b78*b11i*b77i*b88i;
-                    yy[5] = -a21*a99*b11i*b99i;
-                    yy[6] = a21*a98*b89*b11i*b88i*b99i;
-                    std::sort(xx.begin(), xx.end(), scalar_abs_comp);
-                    for (size_t it=0; it<xx.size(); it++) 
-                        x+= xx[it];
-                    std::sort(yy.begin(), yy.end(), scalar_abs_comp);
-                    for (size_t it=0; it<yy.size(); it++) 
-                        y+= yy[it];
-                    z = a21*a32*b11i*b22i;
-                }
-
-                // variables for Householder rotations
-                Vector2s essential2;
-                Vector1s essential1;
-                Scalar tau, beta;
-
-                for (Index k=p; k<=l22-2; k++) {  
-                    Vector2s yz(y,z);
-
-                    // apply Householder reflection Qk such that
-                    //    |x|   |*|
-                    // Qk |y| = |0|
-                    //    |z|   |0|
-                    // on the left to A.block(k,k, k+3, dim)
-                    makeH<2>(x, yz, essential2, tau, beta);
-                    // apply Qk
-                    applyH_l<2>(A,k,k+1,essential2,tau);
-                    applyH_l<2>(B,k,k+1,essential2,tau);
-                    if (k>p) {
-                        A(k+1,k-1) = 0.0;
-                        A(k+2,k-1) = 0.0;
-                    }
-                    // update Q
-                    applyH_r<2>(Q,k,k+1,essential2,tau);
-                
-                    // make Zk1
-                    makeH<2>(B(k+2,k+2), B.block(k+2,k,1,2).adjoint(), essential2, tau, beta);
-                    // apply Zk1
-                    applyH_r<2>(A,k+2,k,essential2,tau);
-                    applyH_r<2>(B,k+2,k,essential2,tau);
-                    B(k+2,k) = 0.0;
-                    B(k+2,k+1) = 0.0;
-                    B(k+2,k+2) = beta;
-                    // update Z
-                    applyH_l<2>(Z,k+2,k,essential2,tau);
-                    
-                    // make Zk2
-                    makeH<1>(B(k+1,k+1), B.block(k+1,k,1,1).adjoint(), essential1, tau, beta);
-                    // apply Zk2
-                    applyH_r<1>(A,k+1,k,essential1,tau);
-                    applyH_r<1>(B,k+1,k,essential1,tau);
-                    B(k+1,k) = 0.0;
-                    B(k+1,k+1) = beta;
-                    // update Z
-                    applyH_l<1>(Z,k+1,k,essential1,tau);
-                    
-                    // update x,y,z
-                    x = A(k+1,k);
-                    y = A(k+2,k);
-                    if (k < l22-2)
-                        z = A(k+3,k);
-                } // loop over k
-                
-                // apply Householder Qlast so Qlast (x y)* = (* 0)* on lower 2x2 block of B22, A22
-                makeH<1>(A(l22-1,l22-2),A.block(l22,l22-2,1,1),essential1,tau,beta);
-                applyH_l<1>(A,l22-1,l22,essential1,tau);
-                applyH_l<1>(B,l22-1,l22,essential1,tau);
-                A(l22-1,l22-2) = beta;
-                A(l22,l22-2) = 0.0;
-                // update Q
-                applyH_r<1>(Q,l22-1,l22,essential1,tau);
-
-                // apply Householder Zlast so (B(l22,l22-1),B(l22,l22)) Zlast = (0 *) on lower block of B22, A22
-                makeH<1>(B(l22,l22),B.block(l22,l22-1,1,1), essential1, tau, beta);
-                applyH_r<1>(A,l22,l22-1,essential1,tau);
-                applyH_r<1>(B,l22,l22-1,essential1,tau);
-                B(l22,l22-1) = 0.0;
-                B(l22,l22) = beta;
-                // update Z
-                applyH_l<1>(Z,l22,l22-1,essential1,tau);
-
-                std::cout << "\n\n************************************\n"
-                    << "loop over k completed (iter=" << iter << ", p=" << p << ", l22=" << l22 << ")\n"
-                    << "*************************************\n";
-                std::cout << "A:\n" << A << "\nB:\n" << B << "\nA B^{-1}:\n" << A*B.inverse() << "\n\n";
-                //sleep(2);
-                
-                if (iter++>20) {
-                    std::cout << "**********************************\nLeaving on iteration " 
-                        << iter << "\n***********************************\n\n";
-                    converged = false;
-                    break;
-                }
-            } // B22 is not singular
-        } // q!=dim
-    } // Big Loop
-
-    if (converged) {
-        // work on 2x2 blocks with real eigenvalues
-        for (Index i=dim-2; i>=0; i--) {
-            if (A(i+1,i) != 0.0 && B(i,i)!=0.0 && B(i+1,i+1)!=0.0) {
-                // block of (A B^{-1})
-                Matrix2s ABi = A.block(i,i,2,2)*B.block(i,i,2,2).inverse();
-                Scalar p = 0.5*(ABi(0,0)-ABi(1,1));
-                Scalar q = p*p + ABi(1,0)*ABi(0,1);
+    /** \internal decouple 2x2 diagonal block in rows iu, iu+1 if eigenvalues are real */
+    template<typename MatrixType>
+        inline void RealQZ<MatrixType>::splitOffTwoRows(Index i) {
+            if (m_S(i+1,i) != Scalar(0) && m_T(i,i)!=Scalar(0) && m_T(i+1,i+1)!=Scalar(0)) {
+                // block of (S T^{-1})
+                Matrix2s STi = m_T.template block<2,2>(i,i).template triangularView<Upper>().template solve<OnTheRight>(m_S.template block<2,2>(i,i));
+                Scalar p = 0.5*(STi(0,0)-STi(1,1));
+                Scalar q = p*p + STi(1,0)*STi(0,1);
                 if (q>=0) {
                     Scalar z = internal::sqrt(q);
                     // QR for ABi - lambda I
                     JRs G;
                     if (p>=0)
-                        G.makeGivens(p + z, ABi(1,0));
+                        G.makeGivens(p + z, STi(1,0));
                     else
-                        G.makeGivens(p - z, ABi(1,0));
-                    A.applyOnTheLeft(i,i+1,G.adjoint());
-                    B.applyOnTheLeft(i,i+1,G.adjoint());
+                        G.makeGivens(p - z, STi(1,0));
+                    m_S.applyOnTheLeft(i,i+1,G.adjoint());
+                    m_T.applyOnTheLeft(i,i+1,G.adjoint());
                     // update Q
-                    Q.applyOnTheRight(i,i+1,G);
+                    m_Q.applyOnTheRight(i,i+1,G);
 
-                    G.makeGivens(B(i+1,i+1), B(i+1,i));
-                    A.applyOnTheRight(i+1,i,G);
-                    B.applyOnTheRight(i+1,i,G);
+                    G.makeGivens(m_T(i+1,i+1), m_T(i+1,i));
+                    m_S.applyOnTheRight(i+1,i,G);
+                    m_T.applyOnTheRight(i+1,i,G);
                     // update Z
-                    Z.applyOnTheLeft(i+1,i,G.adjoint());
-                    
-                    A(i+1,i) = 0.0;
-                    B(i+1,i) = 0.0;
+                    m_Z.applyOnTheLeft(i+1,i,G.adjoint());
 
-                }
-
-            }
-        }
-
-    }
-
-    m_computed = true;
-}
-
-template<typename MatrixType>
-void RealQZ<MatrixType>::hessenbergTriangular() {
-    
-    std::cout << "Before Hessenberg-Triangular:\nA:\n" << A << "\nB:\n" << B << "\n\n";
-    
-    // perform QR decomposition of B, overwrite B with R, save Q
-    HouseholderQR<MatrixType> qrB(B);
-    B = qrB.matrixQR();
-    B.template triangularView<StrictlyLower>().setZero();
-    Q = qrB.householderQ();
-    // overwrite A with Q* A
-    A.applyOnTheLeft(Q.adjoint());
-    // init Z as Identity
-    Z = MatrixType::Identity(dim,dim);
-    // reduce A to upper Hessenberg with givens rotations
-    for (Index j=0; j<=dim-3; j++) {
-        for (Index i=dim-1; i>=j+2; i--) {
-            JRs G;
-            // kill A(i,j)
-            G.makeGivens(A(i-1, j), A(i,j));
-            A.applyOnTheLeft(i-1,i,G.adjoint());
-            B.applyOnTheLeft(i-1,i,G.adjoint());
-            A(i,j) = 0.0;
-            // update Q
-            Q.applyOnTheRight(i-1,i,G);
-            // kill B(i,i-1)
-            G.makeGivens(B(i,i), B(i,i-1));
-            A.applyOnTheRight(i,i-1,G);
-            B.applyOnTheRight(i,i-1,G);
-            B(i,i-1) = 0.0;
-            // update Z
-            Z.applyOnTheLeft(i,i-1,G.adjoint());
-        }
-    }
-    
-    std::cout << "After Hessenberg-Triangular:\nA:\n" << A << "\nB:\n" << B << "\nA B^{-1}:\n" << A*B.inverse() << "\n\n";
-}
-
-
-template<typename MatrixType>
-void RealQZ<MatrixType>::pqr(Index &p, Index &q, Index &r) {
-        p=q=0;
-        char nz=0;
-        // compute p,q
-        for (Index i=dim-1; i>0; i--) {
-            if ( internal::isMuchSmallerThan(A(i,i-1), internal::abs(A(i-1,i-1))+internal::abs(A(i,i)) ) ) {
-                A(i,i-1) = 0.0;
-                if (nz==1) {
-                    nz = 0;
-                } else if (nz==2) {
-                    p = i;
-                    nz = 3;
-                }
-            } else {
-                if (nz==0) {
-                    nz = 1;
-                } else if (nz==1) {
-                    nz = 2;
-                    q = dim-i-2;
+                    m_S(i+1,i) = 0.0;
+                    m_T(i+1,i) = 0.0;
                 }
             }
         }
-        if (q==0 && nz<2)
-            q = dim;
-        r = dim-p-q;
-}
+    
+    /** \internal use zero in T(z,z) to zero S(l,l-1), working in block f..l */
+    template<typename MatrixType>
+        inline void RealQZ<MatrixType>::pushDownZero(Index z, Index f, Index l) {
+            JRs G;
+            for (Index zz=z; zz<l; zz++) {
+                // push 0 down
+                G.makeGivens(m_T(zz, zz+1), m_T(zz+1, zz+1));
+                m_S.applyOnTheLeft(zz,zz+1,G.adjoint());
+                m_T.applyOnTheLeft(zz,zz+1,G.adjoint());
+                m_T(zz+1,zz+1) = 0.0;
+                // update Q
+                m_Q.applyOnTheRight(zz,zz+1,G);
+                // kill S(zz+1, zz-1)
+                if (zz>f) {
+                    G.makeGivens(m_S(zz+1, zz), m_S(zz+1,zz-1));
+                    m_S.applyOnTheRight(zz, zz-1,G);
+                    m_T.applyOnTheRight(zz, zz-1,G);
+                    m_S(zz+1,zz-1) = 0.0;
+                    // update Z
+                    m_Z.applyOnTheLeft(zz,zz-1,G.adjoint());
+                }
+            }
+            // finally kill S(l,l-1)
+            G.makeGivens(m_S(l,l), m_S(l,l-1));
+            m_S.applyOnTheRight(l,l-1,G);
+            m_T.applyOnTheRight(l,l-1,G);
+            m_S(l,l-1)=0.0;
+            // update Z
+            m_Z.applyOnTheLeft(l,l-1,G.adjoint());
+        }
+    
+    /** \internal QR-like iterative step */
+    template<typename MatrixType>
+        inline void RealQZ<MatrixType>::step(Index f, Index l, Index iter) {
+            const Index dim = m_S.cols();
+
+            // x, y, z
+            Scalar x, y, z;
+            if (iter==10) {
+                // Wilkinson ad hoc shift
+                const Scalar
+                    a11=m_S(f+0,f+0), a12=m_S(f+0,f+1),
+                    a21=m_S(f+1,f+0), a22=m_S(f+1,f+1), a32=m_S(f+2,f+1),
+                    b12=m_T(f+0,f+1),
+                    b11i=1.0/m_T(f+0,f+0),
+                    b22i=1.0/m_T(f+1,f+1),
+                    a87=m_S(l-1,l-2),
+                    a98=m_S(l-0,l-1),
+                    b77i=1.0/m_T(l-2,l-2),
+                    b88i=1.0/m_T(l-1,l-1);
+                Scalar ss = internal::abs(a87*b77i) + internal::abs(a98*b88i),
+                       lpl = 1.5*ss,
+                       ll = ss*ss;
+                x = ll + a11*a11*b11i*b11i - lpl*a11*b11i + a12*a21*b11i*b22i
+                    - a11*a21*b12*b11i*b11i*b22i;
+                y = a11*a21*b11i*b11i - lpl*a21*b11i + a21*a22*b11i*b22i 
+                    - a21*a21*b12*b11i*b11i*b22i;
+                z = a21*a32*b11i*b22i;
+            } else if (iter==20 || iter==23 || iter == 27) {
+                // another exceptional shift
+                x = m_S(f,f)/m_T(f,f)-m_S(l,l)/m_T(l,l) + m_S(l,l-1)*m_T(l-1,l)/(m_T(l-1,l-1)*m_T(l,l));
+                y = m_S(f+1,f)/m_T(f,f);
+                z = 0;
+            } else if (iter>31 && !(iter%8)) {
+                // extremely exceptional shift
+                x = internal::random<Scalar>(-1.0,1.0);
+                y = internal::random<Scalar>(-1.0,1.0);
+                z = internal::random<Scalar>(-1.0,1.0);
+            } else {
+                const Scalar 
+                    a11=m_S(f,f), a12=m_S(f,f+1), 
+                    a21=m_S(f+1,f), a22=m_S(f+1,f+1),
+                    a32=m_S(f+2,f+1),
+                    a88=m_S(l-1,l-1), a89=m_S(l-1,l),
+                    a98=m_S(l,l-1), a99=m_S(l,l),
+                    b11=m_T(f,f), b11i=1.0/b11, b12=m_T(f,f+1),
+                    b22i=1.0/m_T(f+1,f+1),
+                    b88i=1.0/m_T(l-1,l-1), b89=m_T(l-1,l),
+                    b99i=1.0/m_T(l,l);
+                x = ( (a88*b88i - a11*b11i)*(a99*b99i - a11*b11i) - (a89*b99i)*(a98*b88i) + (a98*b88i)*(b89*b99i)*(a11*b11i) ) * (b11/a21)
+                    + a12*b22i - (a11*b11i)*(b12*b22i);
+                y = (a22*b22i-a11*b11i) - (a21*b11i)*(b12*b22i) - (a88*b88i-a11*b11i) - (a99*b99i-a11*b11i) + (a98*b88i)*(b89*b99i);
+                z = a32*b22i;
+            }
+
+            JRs G;
+
+            for (Index k=f; k<=l-2; k++) {  
+                // variables for Householder reflections
+                Vector2s essential2;
+                Scalar tau, beta;
+            
+                Vector3s hr(x,y,z);
+
+//std::cout << "  ** CP 0: " << (m_Q*m_S*m_Z-m_A_in).norm()/m_A_in.norm() << "\nS:\n"<< m_S << "\nT:\n" << m_T << "\n";
+                // Q_k
+                hr.makeHouseholderInPlace(tau, beta);
+                essential2 = hr.template bottomRows<2>();
+                Index fc=std::max(k-1,Index(0));  // first col to update
+                m_S.block(k,fc,3,dim-fc).applyHouseholderOnTheLeft(essential2, tau, m_workspace.data());
+                m_T.block(k,fc,3,dim-fc).applyHouseholderOnTheLeft(essential2, tau, m_workspace.data());
+                m_Q.template middleCols<3>(k).applyHouseholderOnTheRight(essential2, tau, m_workspace.data());
+                if (k>f) {
+                    m_S(k+1,k-1) = 0.0;
+                    m_S(k+2,k-1) = 0.0;
+                }
+//std::cout << "  ** CP 1: " << (m_Q*m_S*m_Z-m_A_in).norm()/m_A_in.norm() << "\nS:\n"<< m_S << "\nT:\n" << m_T << "\n";
+
+                // Z_{k1}
+                hr << m_T(k+2,k+2),m_T(k+2,k),m_T(k+2,k+1);
+                hr.makeHouseholderInPlace(tau, beta);
+                essential2 = hr.template bottomRows<2>();
+                {
+                    Index lr = std::min(k+4,dim); // last row to update
+                    Map<Matrix<Scalar,Dynamic,1> > tmp(m_workspace.data(),lr);
+                    // S
+                    tmp = m_S.block(0,k,lr,2) * essential2;
+                    tmp += m_S.block(0,k+2,lr,1);
+                    m_S.block(0,k+2,lr,1) -= tau*tmp;
+                    m_S.block(0,k,lr,2) -= (tau*tmp) * essential2.adjoint();
+                    // T
+                    tmp = m_T.block(0,k,lr,2) * essential2;
+                    tmp += m_T.block(0,k+2,lr,1);
+                    m_T.block(0,k+2,lr,1) -= tau*tmp;
+                    m_T.block(0,k,lr,2) -= (tau*tmp) * essential2.adjoint();
+                }
+                {
+                    // Z
+                    Map<Matrix<Scalar,1,Dynamic> > tmp(m_workspace.data(),dim);
+                    tmp = essential2.adjoint()*(m_Z.template middleRows<2>(k));
+                    tmp += m_Z.row(k+2);
+                    m_Z.row(k+2) -= tau*tmp;
+                    m_Z.template middleRows<2>(k) -= essential2 * (tau*tmp);
+                }
+                m_T(k+2,k) = 0.0;
+                m_T(k+2,k+1) = 0.0;
+//std::cout << "  ** CP 2: " << (m_Q*m_S*m_Z-m_A_in).norm()/m_A_in.norm() << "\nS:\n"<< m_S << "\nT:\n" << m_T << "\n";
+
+                // Z_{k2}
+                G.makeGivens(m_T(k+1,k+1), m_T(k+1,k));
+                m_S.applyOnTheRight(k+1,k,G);
+                m_T.applyOnTheRight(k+1,k,G);
+                // update Z
+                m_Z.applyOnTheLeft(k+1,k,G.adjoint());
+                m_T(k+1,k) = 0.0;
+
+//std::cout << "  ** CP 3: " << (m_Q*m_S*m_Z-m_A_in).norm()/m_A_in.norm() << "\nS:\n"<< m_S << "\nT:\n" << m_T << "\n";
+
+                // update x,y,z
+                x = m_S(k+1,k);
+                y = m_S(k+2,k);
+                if (k < l-2)
+                    z = m_S(k+3,k);
+            } // loop over k
+
+            // Q_{n-1}
+            G.makeGivens(x,y);
+            m_S.applyOnTheLeft(l-1,l,G.adjoint());
+            m_T.applyOnTheLeft(l-1,l,G.adjoint());
+            m_Q.applyOnTheRight(l-1,l,G);
+            m_S(l,l-2) = 0.0;
+
+//std::cout << "  ** CP 4: " << (m_Q*m_S*m_Z-m_A_in).norm()/m_A_in.norm() << "\nS:\n"<< m_S << "\nT:\n" << m_T << "\n";
+
+            // Z_{n-1}
+            G.makeGivens(m_T(l,l),m_T(l,l-1));
+            m_S.applyOnTheRight(l,l-1,G);
+            m_T.applyOnTheRight(l,l-1,G);
+            m_Z.applyOnTheLeft(l,l-1,G.adjoint());
+            m_T(l,l-1) = 0.0;
+
+//std::cout << "  ** CP 5: " << (m_Q*m_S*m_Z-m_A_in).norm()/m_A_in.norm() << "\nS:\n"<< m_S << "\nT:\n" << m_T << "\n";
+
+        }
+
+
+    template<typename MatrixType>
+        RealQZ<MatrixType>& RealQZ<MatrixType>::compute(const MatrixType& A_in, const MatrixType& B_in, bool computeQZ) {
+
+            const Index dim = A_in.cols();
+            
+            assert (A_in.rows()==dim && A_in.cols()==dim 
+                    && B_in.rows()==dim && B_in.cols()==dim 
+                    && "Need square matrices of the same dimension");
+                        
+            m_isInitialized = true;
+            m_computeQZ = computeQZ;
+            m_S = A_in; m_T = B_in;
+            m_workspace.resize(dim*2);
+            m_global_iter = 0;
+//m_A_in = A_in;
+
+            // entrance point: hessenberg triangular decomposition
+            hessenbergTriangular();
+
+            // compute L1 vector norms of T, S into m_normOfS, m_normOfT
+            computeNorms();
+
+#ifdef KHU_DEBUG
+std::cout << "Very beginning\n";
+std::cout << "S:\n" << m_S << "\nT:\n" << m_T << "\n" << "ST^{-1}:\n" << m_S*m_T.inverse() << "\n";
+//std::cout << "Q:\n" << m_Q << "\nZ:\n" << m_Z << "\n\n";
+std::cout << "    ********* error in S: " << (m_Q*m_S*m_Z-A_in).norm()/A_in.norm() << "\n";
+//std::cout << "    ********* error in T: " << (m_Q*m_T*m_Z-B_in).norm()/B_in.norm() << "\n";
+#endif
+
+            Index l = dim-1, 
+                  f, 
+                  local_iter = 0;
+
+            while (l>0 && local_iter<m_max_iter) {
+                f = findSmallSubdiagEntry(l);
+                if (f>0) m_S(f,f-1) = 0.0;
+#ifdef KHU_DEBUG
+std::cout << "new loop pass, f: " << f << ", l: " << l << ", local_iter: " << local_iter << ", m_global_iter: " << m_global_iter << "\n";
+#endif
+                if (f == l) {
+                    l --;
+                    local_iter = 0;
+                } else if (f == l-1) {
+#ifdef KHU_DEBUG
+std::cout << "splitting rows " << f << " and " << f+1 << "\n";
+#endif
+                    splitOffTwoRows(f);
+                    l -= 2;
+                    local_iter = 0;
+                } else {
+                    Index z = findSmallDiagEntry(f,l);
+                    if (z>=f) {
+                        // zero found
+#ifdef KHU_DEBUG
+std::cout << "pushing 0 down (" << z << ", " << f << ", " << l << ")\n";
+#endif
+                        pushDownZero(z,f,l);
+                    } else {
+                        // QR-like iteration
+#ifdef KHU_DEBUG
+std::cout << "doing step(" << f << ", " << l << ", " << local_iter << ")\n";
+#endif
+                        step(f,l, local_iter);
+                        local_iter++;
+                        m_global_iter++;
+                    }
+                }
+#ifdef KHU_DEBUG
+std::cout << "End of step\n";
+std::cout << "S:\n" << m_S << "\nT:\n" << m_T << "\n" << "ST^{-1}:\n" << m_S*m_T.inverse() << "\n";
+//std::cout << "Q:\n" << m_Q << "\nZ:\n" << m_Z << "\n\n";
+std::cout << "    ********* error in S: " << (m_Q*m_S*m_Z-A_in).norm()/A_in.norm() << "\n";
+//std::cout << "    ********* error in T: " << (m_Q*m_T*m_Z-B_in).norm()/B_in.norm() << "\n";
+#endif
+            }
+            // check if we converged before reaching iterations limit
+            if (local_iter<m_max_iter) {
+                m_info = Success;
+            } else {
+                m_info = NoConvergence;
+            }
+            return *this;
+        }
+
+
+} // end namespace Eigen
+
+
+#endif //EIGEN_REAL_QZ
+