blob: 712c17181b69f7819a32e76d43262005431bc690 [file] [log] [blame]
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2020 Chris Schoutrop <c.e.m.schoutrop@tue.nl>
// Copyright (C) 2020 Mischa Senders <m.j.senders@student.tue.nl>
// Copyright (C) 2020 Lex Kuijpers <l.kuijpers@student.tue.nl>
// Copyright (C) 2020 Jens Wehner <j.wehner@esciencecenter.nl>
// Copyright (C) 2020 Jan van Dijk <j.v.dijk@tue.nl>
// Copyright (C) 2020 Adithya Vijaykumar
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
/*
The IDR(S)Stab(L) method is a combination of IDR(S) and BiCGStab(L)
This implementation of IDRSTABL is based on
1. Aihara, K., Abe, K., & Ishiwata, E. (2014). A variant of IDRstab with
reliable update strategies for solving sparse linear systems. Journal of
Computational and Applied Mathematics, 259, 244-258.
doi:10.1016/j.cam.2013.08.028
2. Aihara, K., Abe, K., & Ishiwata, E. (2015). Preconditioned
IDRSTABL Algorithms for Solving Nonsymmetric Linear Systems. International
Journal of Applied Mathematics, 45(3).
3. Saad, Y. (2003). Iterative Methods for Sparse Linear Systems:
Second Edition. Philadelphia, PA: SIAM.
4. Sonneveld, P., & Van Gijzen, M. B. (2009). IDR(s): A Family
of Simple and Fast Algorithms for Solving Large Nonsymmetric Systems of Linear
Equations. SIAM Journal on Scientific Computing, 31(2), 1035-1062.
doi:10.1137/070685804
5. Sonneveld, P. (2012). On the convergence behavior of IDR (s)
and related methods. SIAM Journal on Scientific Computing, 34(5), A2576-A2598.
Right-preconditioning based on Ref. 3 is implemented here.
*/
#ifndef EIGEN_IDRSTABL_H
#define EIGEN_IDRSTABL_H
namespace Eigen {
namespace internal {
template <typename MatrixType, typename Rhs, typename Dest, typename Preconditioner>
bool idrstabl(const MatrixType &mat, const Rhs &rhs, Dest &x, const Preconditioner &precond, Index &iters,
typename Dest::RealScalar &tol_error, Index L, Index S) {
/*
Setup and type definitions.
*/
using numext::abs;
using numext::sqrt;
typedef typename Dest::Scalar Scalar;
typedef typename Dest::RealScalar RealScalar;
typedef Matrix<Scalar, Dynamic, 1> VectorType;
typedef Matrix<Scalar, Dynamic, Dynamic, ColMajor> DenseMatrixType;
const Index N = x.rows();
Index k = 0; // Iteration counter
const Index maxIters = iters;
const RealScalar rhs_norm = rhs.stableNorm();
const RealScalar tol = tol_error * rhs_norm;
if (rhs_norm == 0) {
/*
If b==0, then the exact solution is x=0.
rhs_norm is needed for other calculations anyways, this exit is a freebie.
*/
x.setZero();
tol_error = 0.0;
return true;
}
// Construct decomposition objects beforehand.
FullPivLU<DenseMatrixType> lu_solver;
if (S >= N || L >= N) {
/*
The matrix is very small, or the choice of L and S is very poor
in that case solving directly will be best.
*/
lu_solver.compute(DenseMatrixType(mat));
x = lu_solver.solve(rhs);
tol_error = (rhs - mat * x).stableNorm() / rhs_norm;
return true;
}
// Define maximum sizes to prevent any reallocation later on.
DenseMatrixType u(N, L + 1);
DenseMatrixType r(N, L + 1);
DenseMatrixType V(N * (L + 1), S);
VectorType alpha(S);
VectorType gamma(L);
VectorType update(N);
/*
Main IDRSTABL algorithm
*/
// Set up the initial residual
VectorType x0 = x;
r.col(0) = rhs - mat * x;
x.setZero(); // The final solution will be x0+x
tol_error = r.col(0).stableNorm();
// FOM = Full orthogonalisation method
DenseMatrixType h_FOM = DenseMatrixType::Zero(S, S - 1);
// Construct an initial U matrix of size N x S
DenseMatrixType U(N * (L + 1), S);
for (Index col_index = 0; col_index < S; ++col_index) {
// Arnoldi-like process to generate a set of orthogonal vectors spanning
// {u,A*u,A*A*u,...,A^(S-1)*u}. This construction can be combined with the
// Full Orthogonalization Method (FOM) from Ref.3 to provide a possible
// early exit with no additional MV.
if (col_index != 0) {
/*
Modified Gram-Schmidt strategy:
*/
VectorType w = mat * precond.solve(u.col(0));
for (Index i = 0; i < col_index; ++i) {
auto v = U.col(i).head(N);
h_FOM(i, col_index - 1) = v.dot(w);
w -= h_FOM(i, col_index - 1) * v;
}
u.col(0) = w;
h_FOM(col_index, col_index - 1) = u.col(0).stableNorm();
if (abs(h_FOM(col_index, col_index - 1)) != RealScalar(0)) {
/*
This only happens if u is NOT exactly zero. In case it is exactly zero
it would imply that that this u has no component in the direction of the
current residual.
By then setting u to zero it will not contribute any further (as it
should). Whereas attempting to normalize results in division by zero.
Such cases occur if:
1. The basis of dimension <S is sufficient to exactly solve the linear
system. I.e. the current residual is in span{r,Ar,...A^{m-1}r}, where
(m-1)<=S.
2. Two vectors vectors generated from r, Ar,... are (numerically)
parallel.
In case 1, the exact solution to the system can be obtained from the
"Full Orthogonalization Method" (Algorithm 6.4 in the book of Saad),
without any additional MV.
Contrary to what one would suspect, the comparison with ==0.0 for
floating-point types is intended here. Any arbritary non-zero u is fine
to continue, however if u contains either NaN or Inf the algorithm will
break down.
*/
u.col(0) /= h_FOM(col_index, col_index - 1);
}
} else {
u.col(0) = r.col(0);
u.col(0).normalize();
}
U.col(col_index).head(N) = u.col(0);
}
if (S > 1) {
// Check for early FOM exit.
Scalar beta = r.col(0).stableNorm();
VectorType e1 = VectorType::Zero(S - 1);
e1(0) = beta;
lu_solver.compute(h_FOM.topLeftCorner(S - 1, S - 1));
VectorType y = lu_solver.solve(e1);
VectorType x2 = x + U.topLeftCorner(N, S - 1) * y;
// Using proposition 6.7 in Saad, one MV can be saved to calculate the
// residual
RealScalar FOM_residual = (h_FOM(S - 1, S - 2) * y(S - 2) * U.col(S - 1).head(N)).stableNorm();
if (FOM_residual < tol) {
// Exit, the FOM algorithm was already accurate enough
iters = k;
// Convert back to the unpreconditioned solution
x = precond.solve(x2);
// x contains the updates to x0, add those back to obtain the solution
x += x0;
tol_error = FOM_residual / rhs_norm;
return true;
}
}
/*
Select an initial (N x S) matrix R0.
1. Generate random R0, orthonormalize the result.
2. This results in R0, however to save memory and compute we only need the
adjoint of R0. This is given by the matrix R_T.\ Additionally, the matrix
(mat.adjoint()*R_tilde).adjoint()=R_tilde.adjoint()*mat by the
anti-distributivity property of the adjoint. This results in AR_T, which is
constant if R_T does not have to be regenerated and can be precomputed.
Based on reference 4, this has zero probability in exact arithmetic.
*/
// Original IDRSTABL and Kensuke choose S random vectors:
const HouseholderQR<DenseMatrixType> qr(DenseMatrixType::Random(N, S));
DenseMatrixType R_T = (qr.householderQ() * DenseMatrixType::Identity(N, S)).adjoint();
DenseMatrixType AR_T = DenseMatrixType(R_T * mat);
// Pre-allocate sigma.
DenseMatrixType sigma(S, S);
bool reset_while = false; // Should the while loop be reset for some reason?
while (k < maxIters) {
for (Index j = 1; j <= L; ++j) {
/*
The IDR Step
*/
// Construction of the sigma-matrix, and the decomposition of sigma.
for (Index i = 0; i < S; ++i) {
sigma.col(i).noalias() = AR_T * precond.solve(U.block(N * (j - 1), i, N, 1));
}
lu_solver.compute(sigma);
// Obtain the update coefficients alpha
if (j == 1) {
// alpha=inverse(sigma)*(R_T*r_0);
alpha.noalias() = lu_solver.solve(R_T * r.col(0));
} else {
// alpha=inverse(sigma)*(AR_T*r_{j-2})
alpha.noalias() = lu_solver.solve(AR_T * precond.solve(r.col(j - 2)));
}
// Obtain new solution and residual from this update
update.noalias() = U.topRows(N) * alpha;
r.col(0) -= mat * precond.solve(update);
x += update;
for (Index i = 1; i <= j - 2; ++i) {
// This only affects the case L>2
r.col(i) -= U.block(N * (i + 1), 0, N, S) * alpha;
}
if (j > 1) {
// r=[r;A*r_{j-2}]
r.col(j - 1).noalias() = mat * precond.solve(r.col(j - 2));
}
tol_error = r.col(0).stableNorm();
if (tol_error < tol) {
// If at this point the algorithm has converged, exit.
reset_while = true;
break;
}
bool break_normalization = false;
for (Index q = 1; q <= S; ++q) {
if (q == 1) {
// u = r;
u.leftCols(j + 1) = r.leftCols(j + 1);
} else {
// u=[u_1;u_2;...;u_j]
u.leftCols(j) = u.middleCols(1, j);
}
// Obtain the update coefficients beta implicitly
// beta=lu_sigma.solve(AR_T * u.block(N * (j - 1), 0, N, 1)
u.reshaped().head(u.rows() * j) -= U.topRows(N * j) * lu_solver.solve(AR_T * precond.solve(u.col(j - 1)));
// u=[u;Au_{j-1}]
u.col(j).noalias() = mat * precond.solve(u.col(j - 1));
// Orthonormalize u_j to the columns of V_j(:,1:q-1)
if (q > 1) {
/*
Modified Gram-Schmidt-like procedure to make u orthogonal to the
columns of V from Ref. 1.
The vector mu from Ref. 1 is obtained implicitly:
mu=V.block(N * j, 0, N, q - 1).adjoint() * u.block(N * j, 0, N, 1).
*/
for (Index i = 0; i <= q - 2; ++i) {
auto v = V.col(i).segment(N * j, N);
Scalar h = v.squaredNorm();
h = v.dot(u.col(j)) / h;
u.reshaped().head(u.rows() * (j + 1)) -= h * V.block(0, i, N * (j + 1), 1);
}
}
// Normalize u and assign to a column of V
Scalar normalization_constant = u.col(j).stableNorm();
// If u is exactly zero, this will lead to a NaN. Small, non-zero u is
// fine.
if (normalization_constant == RealScalar(0.0)) {
break_normalization = true;
break;
} else {
u.leftCols(j + 1) /= normalization_constant;
}
V.block(0, q - 1, N * (j + 1), 1).noalias() = u.reshaped().head(u.rows() * (j + 1));
}
if (break_normalization == false) {
U = V;
}
}
if (reset_while) {
break;
}
// r=[r;mat*r_{L-1}]
r.col(L).noalias() = mat * precond.solve(r.col(L - 1));
/*
The polynomial step
*/
ColPivHouseholderQR<DenseMatrixType> qr_solver(r.rightCols(L));
gamma.noalias() = qr_solver.solve(r.col(0));
// Update solution and residual using the "minimized residual coefficients"
update.noalias() = r.leftCols(L) * gamma;
x += update;
r.col(0) -= mat * precond.solve(update);
// Update iteration info
++k;
tol_error = r.col(0).stableNorm();
if (tol_error < tol) {
// Slightly early exit by moving the criterion before the update of U,
// after the main while loop the result of that calculation would not be
// needed.
break;
}
/*
U=U0-sum(gamma_j*U_j)
Consider the first iteration. Then U only contains U0, so at the start of
the while-loop U should be U0. Therefore only the first N rows of U have to
be updated.
*/
for (Index i = 1; i <= L; ++i) {
U.topRows(N) -= U.block(N * i, 0, N, S) * gamma(i - 1);
}
}
/*
Exit after the while loop terminated.
*/
iters = k;
// Convert back to the unpreconditioned solution
x = precond.solve(x);
// x contains the updates to x0, add those back to obtain the solution
x += x0;
tol_error = tol_error / rhs_norm;
return true;
}
} // namespace internal
template <typename MatrixType_, typename Preconditioner_ = DiagonalPreconditioner<typename MatrixType_::Scalar>>
class IDRSTABL;
namespace internal {
template <typename MatrixType_, typename Preconditioner_>
struct traits<IDRSTABL<MatrixType_, Preconditioner_>> {
typedef MatrixType_ MatrixType;
typedef Preconditioner_ Preconditioner;
};
} // namespace internal
/** \ingroup IterativeLinearSolvers_Module
* \brief The IDR(s)STAB(l) is a combination of IDR(s) and BiCGSTAB(l). It is a
* short-recurrences Krylov method for sparse square problems. It can outperform
* both IDR(s) and BiCGSTAB(l). IDR(s)STAB(l) generally closely follows the
* optimal GMRES convergence in terms of the number of Matrix-Vector products.
* However, without the increasing cost per iteration of GMRES. IDR(s)STAB(l) is
* suitable for both indefinite systems and systems with complex eigenvalues.
*
* This class allows solving for A.x = b sparse linear problems. The vectors x
* and b can be either dense or sparse.
*
* \tparam MatrixType_ the type of the sparse matrix A, can be a dense or a
* sparse matrix. \tparam Preconditioner_ the type of the preconditioner.
* Default is DiagonalPreconditioner
*
* \implsparsesolverconcept
*
* The maximum number of iterations and tolerance value can be controlled via
* the setMaxIterations() and setTolerance() methods. The defaults are the size
* of the problem for the maximum number of iterations and
* NumTraits<Scalar>::epsilon() for the tolerance.
*
* The tolerance is the maximum relative residual error: |Ax-b|/|b| for which
* the linear system is considered solved.
*
* \b Performance: When using sparse matrices, best performance is achieved for
* a row-major sparse matrix format. Moreover, in this case multi-threading can
* be exploited if the user code is compiled with OpenMP enabled. See \ref
* TopicMultiThreading for details.
*
* By default the iterations start with x=0 as an initial guess of the solution.
* One can control the start using the solveWithGuess() method.
*
* IDR(s)STAB(l) can also be used in a matrix-free context, see the following
* \link MatrixfreeSolverExample example \endlink.
*
* \sa class SimplicialCholesky, DiagonalPreconditioner, IdentityPreconditioner
*/
template <typename MatrixType_, typename Preconditioner_>
class IDRSTABL : public IterativeSolverBase<IDRSTABL<MatrixType_, Preconditioner_>> {
typedef IterativeSolverBase<IDRSTABL> Base;
using Base::m_error;
using Base::m_info;
using Base::m_isInitialized;
using Base::m_iterations;
using Base::matrix;
Index m_L;
Index m_S;
public:
typedef MatrixType_ MatrixType;
typedef typename MatrixType::Scalar Scalar;
typedef typename MatrixType::RealScalar RealScalar;
typedef Preconditioner_ Preconditioner;
public:
/** Default constructor. */
IDRSTABL() : m_L(2), m_S(4) {}
/** Initialize the solver with matrix \a A for further \c Ax=b solving.
This constructor is a shortcut for the default constructor followed
by a call to compute().
\warning this class stores a reference to the matrix A as well as some
precomputed values that depend on it. Therefore, if \a A is changed
this class becomes invalid. Call compute() to update it with the new
matrix A, or modify a copy of A.
*/
template <typename MatrixDerived>
explicit IDRSTABL(const EigenBase<MatrixDerived> &A) : Base(A.derived()), m_L(2), m_S(4) {}
/** \internal */
/** Loops over the number of columns of b and does the following:
1. sets the tolerance and maxIterations
2. Calls the function that has the core solver
routine
*/
template <typename Rhs, typename Dest>
void _solve_vector_with_guess_impl(const Rhs &b, Dest &x) const {
m_iterations = Base::maxIterations();
m_error = Base::m_tolerance;
bool ret = internal::idrstabl(matrix(), b, x, Base::m_preconditioner, m_iterations, m_error, m_L, m_S);
m_info = (!ret) ? NumericalIssue : m_error <= 10 * Base::m_tolerance ? Success : NoConvergence;
}
/** Sets the parameter L, indicating the amount of minimize residual steps are
* used. */
void setL(Index L) {
eigen_assert(L >= 1 && "L needs to be positive");
m_L = L;
}
/** Sets the parameter S, indicating the dimension of the shadow residual
* space.. */
void setS(Index S) {
eigen_assert(S >= 1 && "S needs to be positive");
m_S = S;
}
};
} // namespace Eigen
#endif /* EIGEN_IDRSTABL_H */