unsupported/Eigen/src/IterativeSolvers/IDRSTABL.h - nest-cam/v350/eigen - Git at Google

 // This file is part of Eigen, a lightweight C++ template library
 // for linear algebra.
 //
 // Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl>
 // Copyright (C) 2020 Mischa Senders <m.j.senders@student.tue.nl>
 // Copyright (C) 2020 Lex Kuijpers <l.kuijpers@student.tue.nl>
 // Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl>
 // Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl>
 // Copyright (C) 2020 Adithya Vijaykumar
 //
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
 /*

 The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L)

 This implementation of IDRSTABL is based on
 1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with
 reliable update strategies for solving sparse linear systems. Journal of
 Computational and Applied Mathematics, 259, 244-258.
    doi:10.1016/j.cam.2013.08.028
                 2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned
 IDRSTABL Algorithms for Solving Nonsymmetric Linear Systems. International
 Journal of Applied Mathematics, 45(3).
                 3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems:
 Second Edition. Philadelphia, PA: SIAM.
                 4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family
 of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear
 Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062.
    doi:10.1137/070685804
                 5. Sonneveld, P. (2012). On the convergence behavior of IDR (s)
 and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598.

     Right-preconditioning based on Ref. 3 is implemented here.
 */

 #ifndef EIGEN_IDRSTABL_H
 #define EIGEN_IDRSTABL_H

 namespace Eigen {

 namespace internal {

 template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
 bool idrstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters,
               typename Dest::RealScalar &tol_error, Index L, Index S) {
   /*
     Setup and type definitions.
   */
   using numext::abs;
   using numext::sqrt;
   typedef typename Dest::Scalar Scalar;
   typedef typename Dest::RealScalar RealScalar;
   typedef Matrix<Scalar, Dynamic, 1> VectorType;
   typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;

   const Index N = x.rows();

   Index k = 0;  // Iteration counter
   const Index maxIters = iters;

   const RealScalar rhs_norm = rhs.stableNorm();
   const RealScalar tol = tol_error * rhs_norm;

   if (rhs_norm == 0) {
     /*
       If b==0, then the exact solution is x=0.
       rhs_norm is needed for other calculations anyways, this exit is a freebie.
     */
     x.setZero();
     tol_error = 0.0;
     return true;
   }
   // Construct decomposition objects beforehand.
   FullPivLU<DenseMatrixType> lu_solver;

   if (S >= N || L >= N) {
     /*
       The matrix is very small, or the choice of L and S is very poor
       in that case solving directly will be best.
     */
     lu_solver.compute(DenseMatrixType(mat));
     x = lu_solver.solve(rhs);
     tol_error = (rhs - mat * x).stableNorm() / rhs_norm;
     return true;
   }

   // Define maximum sizes to prevent any reallocation later on.
   DenseMatrixType u(N, L + 1);
   DenseMatrixType r(N, L + 1);

   DenseMatrixType V(N * (L + 1), S);

   VectorType alpha(S);
   VectorType gamma(L);
   VectorType update(N);

   /*
     Main IDRSTABL algorithm
   */
   // Set up the initial residual
   VectorType x0 = x;
   r.col(0) = rhs - mat * x;
   x.setZero();  // The final solution will be x0+x

   tol_error = r.col(0).stableNorm();

   // FOM = Full orthogonalisation method
   DenseMatrixType h_FOM = DenseMatrixType::Zero(S, S - 1);

   // Construct an initial U matrix of size N x S
   DenseMatrixType U(N * (L + 1), S);
   for (Index col_index = 0; col_index < S; ++col_index) {
     // Arnoldi-like process to generate a set of orthogonal vectors spanning
     // {u,A*u,A*A*u,...,A^(S-1)*u}. This construction can be combined with the
     // Full Orthogonalization Method (FOM) from Ref.3 to provide a possible
     // early exit with no additional MV.
     if (col_index != 0) {
       /*
       Modified Gram-Schmidt strategy:
       */
       VectorType w = mat * precond.solve(u.col(0));
       for (Index i = 0; i < col_index; ++i) {
         auto v = U.col(i).head(N);
         h_FOM(i, col_index - 1) = v.dot(w);
         w -= h_FOM(i, col_index - 1) * v;
       }
       u.col(0) = w;
       h_FOM(col_index, col_index - 1) = u.col(0).stableNorm();

       if (abs(h_FOM(col_index, col_index - 1)) != RealScalar(0)) {
         /*
         This only happens if u is NOT exactly zero. In case it is exactly zero
         it would imply that that this u has no component in the direction of the
         current residual.

         By then setting u to zero it will not contribute any further (as it
         should). Whereas attempting to normalize results in division by zero.

         Such cases occur if:
         1. The basis of dimension <S is sufficient to exactly solve the linear
         system. I.e. the current residual is in span{r,Ar,...A^{m-1}r}, where
         (m-1)<=S.
         2. Two vectors vectors generated from r, Ar,... are (numerically)
         parallel.

         In case 1, the exact solution to the system can be obtained from the
         "Full Orthogonalization Method" (Algorithm 6.4 in the book of Saad),
         without any additional MV.

         Contrary to what one would suspect, the comparison with ==0.0 for
         floating-point types is intended here. Any arbritary non-zero u is fine
         to continue, however if u contains either NaN or Inf the algorithm will
         break down.
         */
         u.col(0) /= h_FOM(col_index, col_index - 1);
       }
     } else {
       u.col(0) = r.col(0);
       u.col(0).normalize();
     }

     U.col(col_index).head(N) = u.col(0);
   }

   if (S > 1) {
     // Check for early FOM exit.
     Scalar beta = r.col(0).stableNorm();
     VectorType e1 = VectorType::Zero(S - 1);
     e1(0) = beta;
     lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1));
     VectorType y = lu_solver.solve(e1);
     VectorType x2 = x + U.topLeftCorner(N, S - 1) * y;

     // Using proposition 6.7 in Saad, one MV can be saved to calculate the
     // residual
     RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).stableNorm();

     if (FOM_residual < tol) {
       // Exit, the FOM algorithm was already accurate enough
       iters = k;
       // Convert back to the unpreconditioned solution
       x = precond.solve(x2);
       // x contains the updates to x0, add those back to obtain the solution
       x += x0;
       tol_error = FOM_residual / rhs_norm;
       return true;
     }
   }

   /*
     Select an initial (N x S) matrix R0.
     1. Generate random R0, orthonormalize the result.
     2. This results in R0, however to save memory and compute we only need the
     adjoint of R0. This is given by the matrix R_T.\ Additionally, the matrix
     (mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the
     anti-distributivity property of the adjoint. This results in AR_T, which is
     constant if R_T does not have to be regenerated and can be precomputed.
     Based on reference 4, this has zero probability in exact arithmetic.
   */

   // Original IDRSTABL and Kensuke choose S random vectors:
   const HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
   DenseMatrixType R_T = (qr.householderQ() * DenseMatrixType::Identity(N, S)).adjoint();
   DenseMatrixType AR_T = DenseMatrixType(R_T * mat);

   // Pre-allocate sigma.
   DenseMatrixType sigma(S, S);

   bool reset_while = false;  // Should the while loop be reset for some reason?

   while (k < maxIters) {
     for (Index j = 1; j <= L; ++j) {
       /*
         The IDR Step
       */
       // Construction of the sigma-matrix, and the decomposition of sigma.
       for (Index i = 0; i < S; ++i) {
         sigma.col(i).noalias() = AR_T * precond.solve(U.block(N * (j - 1), i, N, 1));
       }

       lu_solver.compute(sigma);
       // Obtain the update coefficients alpha
       if (j == 1) {
         // alpha=inverse(sigma)*(R_T*r_0);
         alpha.noalias() = lu_solver.solve(R_T * r.col(0));
       } else {
         // alpha=inverse(sigma)*(AR_T*r_{j-2})
         alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.col(j - 2)));
       }

       // Obtain new solution and residual from this update
       update.noalias() = U.topRows(N) * alpha;
       r.col(0) -= mat * precond.solve(update);
       x += update;

       for (Index i = 1; i <= j - 2; ++i) {
         // This only affects the case L>2
         r.col(i) -= U.block(N * (i + 1), 0, N, S) * alpha;
       }
       if (j > 1) {
         // r=[r;A*r_{j-2}]
         r.col(j - 1).noalias() = mat * precond.solve(r.col(j - 2));
       }
       tol_error = r.col(0).stableNorm();

       if (tol_error < tol) {
         // If at this point the algorithm has converged, exit.
         reset_while = true;
         break;
       }

       bool break_normalization = false;
       for (Index q = 1; q <= S; ++q) {
         if (q == 1) {
           // u = r;
           u.leftCols(j + 1) = r.leftCols(j + 1);
         } else {
           // u=[u_1;u_2;...;u_j]
           u.leftCols(j) = u.middleCols(1, j);
         }

         // Obtain the update coefficients beta implicitly
         // beta=lu_sigma.solve(AR_T * u.block(N * (j - 1), 0, N, 1)
         u.reshaped().head(u.rows() * j) -= U.topRows(N * j) * lu_solver.solve(AR_T * precond.solve(u.col(j - 1)));

         // u=[u;Au_{j-1}]
         u.col(j).noalias() = mat * precond.solve(u.col(j - 1));

         // Orthonormalize u_j to the columns of V_j(:,1:q-1)
         if (q > 1) {
           /*
           Modified Gram-Schmidt-like procedure to make u orthogonal to the
           columns of V from Ref. 1.

           The vector mu from Ref. 1 is obtained implicitly:
           mu=V.block(N * j, 0, N, q - 1).adjoint() * u.block(N * j, 0, N, 1).
           */
           for (Index i = 0; i <= q - 2; ++i) {
             auto v = V.col(i).segment(N * j, N);
             Scalar h = v.squaredNorm();
             h = v.dot(u.col(j)) / h;
             u.reshaped().head(u.rows() * (j + 1)) -= h * V.block(0, i, N * (j + 1), 1);
           }
         }
         // Normalize u and assign to a column of V
         Scalar normalization_constant = u.col(j).stableNorm();
         //  If u is exactly zero, this will lead to a NaN. Small, non-zero u is
         //  fine.
         if (normalization_constant == RealScalar(0.0)) {
           break_normalization = true;
           break;
         } else {
           u.leftCols(j + 1) /= normalization_constant;
         }

         V.block(0, q - 1, N * (j + 1), 1).noalias() = u.reshaped().head(u.rows() * (j + 1));
       }

       if (break_normalization == false) {
         U = V;
       }
     }
     if (reset_while) {
       break;
     }

     // r=[r;mat*r_{L-1}]
     r.col(L).noalias() = mat * precond.solve(r.col(L - 1));

     /*
             The polynomial step
     */
     ColPivHouseholderQR<DenseMatrixType> qr_solver(r.rightCols(L));
     gamma.noalias() = qr_solver.solve(r.col(0));

     // Update solution and residual using the "minimized residual coefficients"
     update.noalias() = r.leftCols(L) * gamma;
     x += update;
     r.col(0) -= mat * precond.solve(update);

     // Update iteration info
     ++k;
     tol_error = r.col(0).stableNorm();

     if (tol_error < tol) {
       // Slightly early exit by moving the criterion before the update of U,
       // after the main while loop the result of that calculation would not be
       // needed.
       break;
     }

     /*
     U=U0-sum(gamma_j*U_j)
     Consider the first iteration. Then U only contains U0, so at the start of
     the while-loop U should be U0. Therefore only the first N rows of U have to
     be updated.
     */
     for (Index i = 1; i <= L; ++i) {
       U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1);
     }
   }

   /*
           Exit after the while loop terminated.
   */
   iters = k;
   // Convert back to the unpreconditioned solution
   x = precond.solve(x);
   // x contains the updates to x0, add those back to obtain the solution
   x += x0;
   tol_error = tol_error / rhs_norm;
   return true;
 }

 }  // namespace internal

 template <typename MatrixType_, typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar>>
 class IDRSTABL;

 namespace internal {

 template <typename MatrixType_, typename Preconditioner_>
 struct traits<IDRSTABL<MatrixType_, Preconditioner_>> {
   typedef MatrixType_ MatrixType;
   typedef Preconditioner_ Preconditioner;
 };

 }  // namespace internal

 /** \ingroup IterativeLinearSolvers_Module
  * \brief The IDR(s)STAB(l) is a combination of IDR(s) and BiCGSTAB(l). It is a
  * short-recurrences Krylov method for sparse square problems. It can outperform
  * both IDR(s) and BiCGSTAB(l). IDR(s)STAB(l) generally closely follows the
  * optimal GMRES convergence in terms of the number of Matrix-Vector products.
  * However, without the increasing cost per iteration of GMRES. IDR(s)STAB(l) is
  * suitable for both indefinite systems and systems with complex eigenvalues.
  *
  * This class allows solving for A.x = b sparse linear problems. The vectors x
  * and b can be either dense or sparse.
  *
  * \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a
  * sparse matrix. \tparam Preconditioner_ the type of the preconditioner.
  * Default is DiagonalPreconditioner
  *
  * \implsparsesolverconcept
  *
  * The maximum number of iterations and tolerance value can be controlled via
  * the setMaxIterations() and setTolerance() methods. The defaults are the size
  * of the problem for the maximum number of iterations and
  * NumTraits<Scalar>::epsilon() for the tolerance.
  *
  * The tolerance is the maximum relative residual error: |Ax-b|/|b| for which
  * the linear system is considered solved.
  *
  * \b Performance: When using sparse matrices, best performance is achieved for
  * a row-major sparse matrix format. Moreover, in this case multi-threading can
  * be exploited if the user code is compiled with OpenMP enabled. See \ref
  * TopicMultiThreading for details.
  *
  * By default the iterations start with x=0 as an initial guess of the solution.
  * One can control the start using the solveWithGuess() method.
  *
  * IDR(s)STAB(l) can also be used in a matrix-free context, see the following
  * \link MatrixfreeSolverExample example \endlink.
  *
  * \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
  */

 template <typename MatrixType_, typename Preconditioner_>
 class IDRSTABL : public IterativeSolverBase<IDRSTABL<MatrixType_, Preconditioner_>> {
   typedef IterativeSolverBase<IDRSTABL> Base;
   using Base::m_error;
   using Base::m_info;
   using Base::m_isInitialized;
   using Base::m_iterations;
   using Base::matrix;
   Index m_L;
   Index m_S;

  public:
   typedef MatrixType_ MatrixType;
   typedef typename MatrixType::Scalar Scalar;
   typedef typename MatrixType::RealScalar RealScalar;
   typedef Preconditioner_ Preconditioner;

  public:
   /** Default constructor. */
   IDRSTABL() : m_L(2), m_S(4) {}

   /**   Initialize the solver with matrix \a A for further \c Ax=b solving.

   This constructor is a shortcut for the default constructor followed
   by a call to compute().

   \warning this class stores a reference to the matrix A as well as some
   precomputed values that depend on it. Therefore, if \a A is changed
   this class becomes invalid. Call compute() to update it with the new
   matrix A, or modify a copy of A.
           */
   template <typename MatrixDerived>
   explicit IDRSTABL(const EigenBase<MatrixDerived> &A) : Base(A.derived()), m_L(2), m_S(4) {}

   /** \internal */
   /**     Loops over the number of columns of b and does the following:
                                   1. sets the tolerance and maxIterations
                                   2. Calls the function that has the core solver
      routine
   */
   template <typename Rhs, typename Dest>
   void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const {
     m_iterations = Base::maxIterations();
     m_error = Base::m_tolerance;
     bool ret = internal::idrstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L, m_S);

     m_info = (!ret) ? NumericalIssue : m_error <= 10 * Base::m_tolerance ? Success : NoConvergence;
   }

   /** Sets the parameter L, indicating the amount of minimize residual steps are
    * used. */
   void setL(Index L) {
     eigen_assert(L >= 1 && "L needs to be positive");
     m_L = L;
   }
   /** Sets the parameter S, indicating the dimension of the shadow residual
    * space.. */
   void setS(Index S) {
     eigen_assert(S >= 1 && "S needs to be positive");
     m_S = S;
   }
 };

 }  // namespace Eigen

 #endif /* EIGEN_IDRSTABL_H */
	// This file is part of Eigen, a lightweight C++ template library
	// for linear algebra.
	//
	// Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl>
	// Copyright (C) 2020 Mischa Senders <m.j.senders@student.tue.nl>
	// Copyright (C) 2020 Lex Kuijpers <l.kuijpers@student.tue.nl>
	// Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl>
	// Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl>
	// Copyright (C) 2020 Adithya Vijaykumar
	//
	// This Source Code Form is subject to the terms of the Mozilla
	// Public License v. 2.0. If a copy of the MPL was not distributed
	// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
	/*

	The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L)

	This implementation of IDRSTABL is based on
	1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with
	reliable update strategies for solving sparse linear systems. Journal of
	Computational and Applied Mathematics, 259, 244-258.
	doi:10.1016/j.cam.2013.08.028
	2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned
	IDRSTABL Algorithms for Solving Nonsymmetric Linear Systems. International
	Journal of Applied Mathematics, 45(3).
	3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems:
	Second Edition. Philadelphia, PA: SIAM.
	4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family
	of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear
	Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062.
	doi:10.1137/070685804
	5. Sonneveld, P. (2012). On the convergence behavior of IDR (s)
	and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598.

	Right-preconditioning based on Ref. 3 is implemented here.
	*/

	#ifndef EIGEN_IDRSTABL_H
	#define EIGEN_IDRSTABL_H

	namespace Eigen {

	namespace internal {

	template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
	bool idrstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters,
	typename Dest::RealScalar &tol_error, Index L, Index S) {
	/*
	Setup and type definitions.
	*/
	using numext::abs;
	using numext::sqrt;
	typedef typename Dest::Scalar Scalar;
	typedef typename Dest::RealScalar RealScalar;
	typedef Matrix<Scalar, Dynamic, 1> VectorType;
	typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;

	const Index N = x.rows();

	Index k = 0; // Iteration counter
	const Index maxIters = iters;

	const RealScalar rhs_norm = rhs.stableNorm();
	const RealScalar tol = tol_error * rhs_norm;

	if (rhs_norm == 0) {
	/*
	If b==0, then the exact solution is x=0.
	rhs_norm is needed for other calculations anyways, this exit is a freebie.
	*/
	x.setZero();
	tol_error = 0.0;
	return true;
	}
	// Construct decomposition objects beforehand.
	FullPivLU<DenseMatrixType> lu_solver;

	if (S >= N \|\| L >= N) {
	/*
	The matrix is very small, or the choice of L and S is very poor
	in that case solving directly will be best.
	*/
	lu_solver.compute(DenseMatrixType(mat));
	x = lu_solver.solve(rhs);
	tol_error = (rhs - mat * x).stableNorm() / rhs_norm;
	return true;
	}

	// Define maximum sizes to prevent any reallocation later on.
	DenseMatrixType u(N, L + 1);
	DenseMatrixType r(N, L + 1);

	DenseMatrixType V(N * (L + 1), S);

	VectorType alpha(S);
	VectorType gamma(L);
	VectorType update(N);

	/*
	Main IDRSTABL algorithm
	*/
	// Set up the initial residual
	VectorType x0 = x;
	r.col(0) = rhs - mat * x;
	x.setZero(); // The final solution will be x0+x

	tol_error = r.col(0).stableNorm();

	// FOM = Full orthogonalisation method
	DenseMatrixType h_FOM = DenseMatrixType::Zero(S, S - 1);

	// Construct an initial U matrix of size N x S
	DenseMatrixType U(N * (L + 1), S);
	for (Index col_index = 0; col_index < S; ++col_index) {
	// Arnoldi-like process to generate a set of orthogonal vectors spanning
	// {u,Au,AAu,...,A^(S-1)u}. This construction can be combined with the
	// Full Orthogonalization Method (FOM) from Ref.3 to provide a possible
	// early exit with no additional MV.
	if (col_index != 0) {
	/*
	Modified Gram-Schmidt strategy:
	*/
	VectorType w = mat * precond.solve(u.col(0));
	for (Index i = 0; i < col_index; ++i) {
	auto v = U.col(i).head(N);
	h_FOM(i, col_index - 1) = v.dot(w);
	w -= h_FOM(i, col_index - 1) * v;
	}
	u.col(0) = w;
	h_FOM(col_index, col_index - 1) = u.col(0).stableNorm();

	if (abs(h_FOM(col_index, col_index - 1)) != RealScalar(0)) {
	/*
	This only happens if u is NOT exactly zero. In case it is exactly zero
	it would imply that that this u has no component in the direction of the
	current residual.

	By then setting u to zero it will not contribute any further (as it
	should). Whereas attempting to normalize results in division by zero.

	Such cases occur if:
	1. The basis of dimension <S is sufficient to exactly solve the linear
	system. I.e. the current residual is in span{r,Ar,...A^{m-1}r}, where
	(m-1)<=S.
	2. Two vectors vectors generated from r, Ar,... are (numerically)
	parallel.

	In case 1, the exact solution to the system can be obtained from the
	"Full Orthogonalization Method" (Algorithm 6.4 in the book of Saad),
	without any additional MV.

	Contrary to what one would suspect, the comparison with ==0.0 for
	floating-point types is intended here. Any arbritary non-zero u is fine
	to continue, however if u contains either NaN or Inf the algorithm will
	break down.
	*/
	u.col(0) /= h_FOM(col_index, col_index - 1);
	}
	} else {
	u.col(0) = r.col(0);
	u.col(0).normalize();
	}

	U.col(col_index).head(N) = u.col(0);
	}

	if (S > 1) {
	// Check for early FOM exit.
	Scalar beta = r.col(0).stableNorm();
	VectorType e1 = VectorType::Zero(S - 1);
	e1(0) = beta;
	lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1));
	VectorType y = lu_solver.solve(e1);
	VectorType x2 = x + U.topLeftCorner(N, S - 1) * y;

	// Using proposition 6.7 in Saad, one MV can be saved to calculate the
	// residual
	RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).stableNorm();

	if (FOM_residual < tol) {
	// Exit, the FOM algorithm was already accurate enough
	iters = k;
	// Convert back to the unpreconditioned solution
	x = precond.solve(x2);
	// x contains the updates to x0, add those back to obtain the solution
	x += x0;
	tol_error = FOM_residual / rhs_norm;
	return true;
	}
	}

	/*
	Select an initial (N x S) matrix R0.
	1. Generate random R0, orthonormalize the result.
	2. This results in R0, however to save memory and compute we only need the
	adjoint of R0. This is given by the matrix R_T.\ Additionally, the matrix
	(mat.adjoint()R_tilde).adjoint()=R_tilde.adjoint()mat by the
	anti-distributivity property of the adjoint. This results in AR_T, which is
	constant if R_T does not have to be regenerated and can be precomputed.
	Based on reference 4, this has zero probability in exact arithmetic.
	*/

	// Original IDRSTABL and Kensuke choose S random vectors:
	const HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
	DenseMatrixType R_T = (qr.householderQ() * DenseMatrixType::Identity(N, S)).adjoint();
	DenseMatrixType AR_T = DenseMatrixType(R_T * mat);

	// Pre-allocate sigma.
	DenseMatrixType sigma(S, S);

	bool reset_while = false; // Should the while loop be reset for some reason?

	while (k < maxIters) {
	for (Index j = 1; j <= L; ++j) {
	/*
	The IDR Step
	*/
	// Construction of the sigma-matrix, and the decomposition of sigma.
	for (Index i = 0; i < S; ++i) {
	sigma.col(i).noalias() = AR_T * precond.solve(U.block(N * (j - 1), i, N, 1));
	}

	lu_solver.compute(sigma);
	// Obtain the update coefficients alpha
	if (j == 1) {
	// alpha=inverse(sigma)(R_Tr_0);
	alpha.noalias() = lu_solver.solve(R_T * r.col(0));
	} else {
	// alpha=inverse(sigma)(AR_Tr_{j-2})
	alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.col(j - 2)));
	}

	// Obtain new solution and residual from this update
	update.noalias() = U.topRows(N) * alpha;
	r.col(0) -= mat * precond.solve(update);
	x += update;

	for (Index i = 1; i <= j - 2; ++i) {
	// This only affects the case L>2
	r.col(i) -= U.block(N * (i + 1), 0, N, S) * alpha;
	}
	if (j > 1) {
	// r=[r;A*r_{j-2}]
	r.col(j - 1).noalias() = mat * precond.solve(r.col(j - 2));
	}
	tol_error = r.col(0).stableNorm();

	if (tol_error < tol) {
	// If at this point the algorithm has converged, exit.
	reset_while = true;
	break;
	}

	bool break_normalization = false;
	for (Index q = 1; q <= S; ++q) {
	if (q == 1) {
	// u = r;
	u.leftCols(j + 1) = r.leftCols(j + 1);
	} else {
	// u=[u_1;u_2;...;u_j]
	u.leftCols(j) = u.middleCols(1, j);
	}

	// Obtain the update coefficients beta implicitly
	// beta=lu_sigma.solve(AR_T * u.block(N * (j - 1), 0, N, 1)
	u.reshaped().head(u.rows() * j) -= U.topRows(N * j) * lu_solver.solve(AR_T * precond.solve(u.col(j - 1)));

	// u=[u;Au_{j-1}]
	u.col(j).noalias() = mat * precond.solve(u.col(j - 1));

	// Orthonormalize u_j to the columns of V_j(:,1:q-1)
	if (q > 1) {
	/*
	Modified Gram-Schmidt-like procedure to make u orthogonal to the
	columns of V from Ref. 1.

	The vector mu from Ref. 1 is obtained implicitly:
	mu=V.block(N * j, 0, N, q - 1).adjoint() * u.block(N * j, 0, N, 1).
	*/
	for (Index i = 0; i <= q - 2; ++i) {
	auto v = V.col(i).segment(N * j, N);
	Scalar h = v.squaredNorm();
	h = v.dot(u.col(j)) / h;
	u.reshaped().head(u.rows() * (j + 1)) -= h * V.block(0, i, N * (j + 1), 1);
	}
	}
	// Normalize u and assign to a column of V
	Scalar normalization_constant = u.col(j).stableNorm();
	// If u is exactly zero, this will lead to a NaN. Small, non-zero u is
	// fine.
	if (normalization_constant == RealScalar(0.0)) {
	break_normalization = true;
	break;
	} else {
	u.leftCols(j + 1) /= normalization_constant;
	}

	V.block(0, q - 1, N * (j + 1), 1).noalias() = u.reshaped().head(u.rows() * (j + 1));
	}

	if (break_normalization == false) {
	U = V;
	}
	}
	if (reset_while) {
	break;
	}

	// r=[r;mat*r_{L-1}]
	r.col(L).noalias() = mat * precond.solve(r.col(L - 1));

	/*
	The polynomial step
	*/
	ColPivHouseholderQR<DenseMatrixType> qr_solver(r.rightCols(L));
	gamma.noalias() = qr_solver.solve(r.col(0));

	// Update solution and residual using the "minimized residual coefficients"
	update.noalias() = r.leftCols(L) * gamma;
	x += update;
	r.col(0) -= mat * precond.solve(update);

	// Update iteration info
	++k;
	tol_error = r.col(0).stableNorm();

	if (tol_error < tol) {
	// Slightly early exit by moving the criterion before the update of U,
	// after the main while loop the result of that calculation would not be
	// needed.
	break;
	}

	/*
	U=U0-sum(gamma_j*U_j)
	Consider the first iteration. Then U only contains U0, so at the start of
	the while-loop U should be U0. Therefore only the first N rows of U have to
	be updated.
	*/
	for (Index i = 1; i <= L; ++i) {
	U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1);
	}
	}

	/*
	Exit after the while loop terminated.
	*/
	iters = k;
	// Convert back to the unpreconditioned solution
	x = precond.solve(x);
	// x contains the updates to x0, add those back to obtain the solution
	x += x0;
	tol_error = tol_error / rhs_norm;
	return true;
	}

	} // namespace internal

	template <typename MatrixType_, typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar>>
	class IDRSTABL;

	namespace internal {

	template <typename MatrixType_, typename Preconditioner_>
	struct traits<IDRSTABL<MatrixType_, Preconditioner_>> {
	typedef MatrixType_ MatrixType;
	typedef Preconditioner_ Preconditioner;
	};

	} // namespace internal

	/** \ingroup IterativeLinearSolvers_Module
	* \brief The IDR(s)STAB(l) is a combination of IDR(s) and BiCGSTAB(l). It is a
	* short-recurrences Krylov method for sparse square problems. It can outperform
	* both IDR(s) and BiCGSTAB(l). IDR(s)STAB(l) generally closely follows the
	* optimal GMRES convergence in terms of the number of Matrix-Vector products.
	* However, without the increasing cost per iteration of GMRES. IDR(s)STAB(l) is
	* suitable for both indefinite systems and systems with complex eigenvalues.
	*
	* This class allows solving for A.x = b sparse linear problems. The vectors x
	* and b can be either dense or sparse.
	*
	* \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a
	* sparse matrix. \tparam Preconditioner_ the type of the preconditioner.
	* Default is DiagonalPreconditioner
	*
	* \implsparsesolverconcept
	*
	* The maximum number of iterations and tolerance value can be controlled via
	* the setMaxIterations() and setTolerance() methods. The defaults are the size
	* of the problem for the maximum number of iterations and
	* NumTraits<Scalar>::epsilon() for the tolerance.
	*
	* The tolerance is the maximum relative residual error: \|Ax-b\|/\|b\| for which
	* the linear system is considered solved.
	*
	* \b Performance: When using sparse matrices, best performance is achieved for
	* a row-major sparse matrix format. Moreover, in this case multi-threading can
	* be exploited if the user code is compiled with OpenMP enabled. See \ref
	* TopicMultiThreading for details.
	*
	* By default the iterations start with x=0 as an initial guess of the solution.
	* One can control the start using the solveWithGuess() method.
	*
	* IDR(s)STAB(l) can also be used in a matrix-free context, see the following
	* \link MatrixfreeSolverExample example \endlink.
	*
	* \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
	*/

	template <typename MatrixType_, typename Preconditioner_>
	class IDRSTABL : public IterativeSolverBase<IDRSTABL<MatrixType_, Preconditioner_>> {
	typedef IterativeSolverBase<IDRSTABL> Base;
	using Base::m_error;
	using Base::m_info;
	using Base::m_isInitialized;
	using Base::m_iterations;
	using Base::matrix;
	Index m_L;
	Index m_S;

	public:
	typedef MatrixType_ MatrixType;
	typedef typename MatrixType::Scalar Scalar;
	typedef typename MatrixType::RealScalar RealScalar;
	typedef Preconditioner_ Preconditioner;

	public:
	/** Default constructor. */
	IDRSTABL() : m_L(2), m_S(4) {}

	/** Initialize the solver with matrix \a A for further \c Ax=b solving.

	This constructor is a shortcut for the default constructor followed
	by a call to compute().

	\warning this class stores a reference to the matrix A as well as some
	precomputed values that depend on it. Therefore, if \a A is changed
	this class becomes invalid. Call compute() to update it with the new
	matrix A, or modify a copy of A.
	*/
	template <typename MatrixDerived>
	explicit IDRSTABL(const EigenBase<MatrixDerived> &A) : Base(A.derived()), m_L(2), m_S(4) {}

	/** \internal */
	/** Loops over the number of columns of b and does the following:
	1. sets the tolerance and maxIterations
	2. Calls the function that has the core solver
	routine
	*/
	template <typename Rhs, typename Dest>
	void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const {
	m_iterations = Base::maxIterations();
	m_error = Base::m_tolerance;
	bool ret = internal::idrstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L, m_S);

	m_info = (!ret) ? NumericalIssue : m_error <= 10 * Base::m_tolerance ? Success : NoConvergence;
	}

	/** Sets the parameter L, indicating the amount of minimize residual steps are
	* used. */
	void setL(Index L) {
	eigen_assert(L >= 1 && "L needs to be positive");
	m_L = L;
	}
	/** Sets the parameter S, indicating the dimension of the shadow residual
	* space.. */
	void setS(Index S) {
	eigen_assert(S >= 1 && "S needs to be positive");
	m_S = S;
	}
	};

	} // namespace Eigen

	#endif /* EIGEN_IDRSTABL_H */