Implementing BiCGStab Solver Directly in NGSolve

Hi everyone,

I’ve been using the BiCGStab solver from PETSc in NGSolve for some time now. While it works well, I’ve encountered significant computational overhead when converting large NGSolve matrices to PETSc matrices. I attempted to implement the BiCGStab solver directly within NGSolve to address this issue based on C code. However, my implementation has not been converging as expected. Below, I include my implementation and a simple test case, and I’d appreciate any feedback or suggestions for improvement.

from ngsolve import (
    BaseVector,
    Norm,
    ngsglobals,
    H1,
    y,
    x,
    Mesh,
    LinearForm,
    BilinearForm,
    grad,
    dx,
    GridFunction,
    sqrt,
    Integrate,
    Draw
)
from netgen.geom2d import unit_square
from ngsolve.krylovspace import LinearSolver


class BiCGStabSolver(LinearSolver):
    def __init__(
        self,
        *args,
        conjugate: bool = False,
        abstol: float = None,
        maxsteps: int = None,
        printing: bool = False,
        **kwargs
    ):
        if printing:
            print("WARNING: printing is deprecated, use printrates instead!")
            kwargs["printrates"] = printing
        if abstol is not None:
            print("WARNING: abstol is deprecated, use atol instead!")
            kwargs["abstol"] = abstol
        if maxsteps is not None:
            print("WARNING: maxsteps is deprecated, use maxiter instead!")
            kwargs["maxiter"] = maxsteps
        super().__init__(*args, **kwargs)
        self.conjugate = conjugate  # for backward compatibility

    @property
    def errors(self):
        return self.residuals

    def _SolveImpl(self, rhs: BaseVector, sol: BaseVector):
        r, r_tilde, p, p_tilde, s, s_tilde, t, v = [sol.CreateVector() for i in range(8)]
        conjugate = self.conjugate
        r.data = rhs - self.mat * sol 
        r_tilde.data = r 
        rho_new = r_tilde.InnerProduct(r, conjugate=conjugate) 
        p.data = r 
        p_tilde.data = self.pre * p 
        v.data = self.mat * p_tilde 
        alpha = rho_new / r_tilde.InnerProduct(v, conjugate=conjugate) 
        s.data = r 
        s.data -= alpha * v
        s_tilde.data = self.pre * s 
        t.data = self.mat * s_tilde 
        omega = t.InnerProduct(s, conjugate=conjugate) / t.InnerProduct(t, conjugate=conjugate) 
        sol.data += alpha * p_tilde + omega * s_tilde 
        r.data = s
        r.data -= omega * t
        err_i = Norm(r) 
        if self.CheckResidual(err_i):
            return

        while True:
            rho_old = rho_new
            rho_new = r_tilde.InnerProduct(r, conjugate=conjugate)
            beta = (rho_new / rho_old) * (alpha / omega) 
            p.data = r + beta * (p - omega * v)
            p_tilde.data = self.pre * p 
            v.data = self.mat * p_tilde 
            alpha = rho_new / r_tilde.InnerProduct(v, conjugate=conjugate)
            s.data = r
            s.data -= alpha * v
            sol.data += alpha * p_tilde

            err_i = Norm(s)
            if err_i <= err_i*1e-8:
                break

            s_tilde.data = self.pre * s 
            t.data = self.mat * s_tilde 
            omega = t.InnerProduct(s, conjugate=conjugate) / t.InnerProduct(t, conjugate=conjugate)
            sol.data += omega * s_tilde
            r.data = s
            r.data -= omega * t
            err_i = Norm(r)
            if self.CheckResidual(err_i):
                return 

def BiCGStab(
    mat,
    rhs,
    pre=None,
    sol=None,
    tol=1e-12,
    maxsteps=100,
    printrates=True,
    initialize=True,
    conjugate=False,
    callback=None,
    **kwargs
):
    """preconditioned biconjugate gradient stabalized method    Returns
    -------
    (vector)
      Solution vector of the BiCGStab method."""
    solver = BiCGStabSolver(
        mat=mat,
        pre=pre,
        conjugate=conjugate,
        tol=tol,
        maxiter=maxsteps,
        callback=callback,
        printrates=printrates,
        **kwargs
    )
    solver.Solve(rhs=rhs, sol=sol, initialize=initialize)
    return solver.sol


ngsglobals.msg_level = 1  # generate a triangular mesh of mesh-size 0.2
mesh = Mesh(unit_square.GenerateMesh(maxh=0.2))  # H1-conforming finite element space
fes = H1(mesh, order=3, dirichlet=[1, 2, 3, 4])  # define trial- and test-functions
u = fes.TrialFunction()
v = fes.TestFunction()  # the right hand side
f = LinearForm(fes)
f += 32 * (y * (1 - y) + x * (1 - x)) * v * dx
# the bilinear-form
a = BilinearForm(fes, symmetric=True)
a += grad(u) * grad(v) * dx
a.Assemble()
f.Assemble()
# the solution field
gfu_1 = GridFunction(fes)
gfu_1.vec.data = a.mat.Inverse(fes.FreeDofs(), inverse="sparsecholesky") * f.vec
preJpoint = a.mat.CreateSmoother(fes.FreeDofs())
inv = BiCGStabSolver(
    a.mat, pre=preJpoint, maxiter=50, tol=1e-8, printrates=True, conjugate=False
)
gfu_2 = GridFunction(fes)
gfu_2.vec.data = inv.Solve(f.vec, initialize=False)
print("L2-error:", sqrt(Integrate((gfu_1 - gfu_2) * (gfu_1 - gfu_2), mesh)))
Draw(gfu_2)

I have also tried to implement the pseudo-code in the book. Both gave the same results.

    def _SolveImpl(self, rhs : BaseVector, sol : BaseVector):
        r, r_tilde, p, p_tilde, s, s_tilde, t, v = [sol.CreateVector() for i in range(8)]
        conjugate = self.conjugate
        r.data = rhs - self.mat * sol 
        r_tilde.data = r 
        rho_new = r_tilde.InnerProduct(r, conjugate=conjugate) 
        p.data = r 

        for i in range(1,self.maxiter+1):
            print(i)
            p_tilde.data = self.pre * p 
            v.data = self.mat * p_tilde 
            alpha = rho_new / r_tilde.InnerProduct(v, conjugate=conjugate)
            sol.data += alpha * p_tilde
            s.data = r
            s.data -= alpha * v

            err_i = Norm(s)
            if err_i <= err_i*1e-8:
                break

            
            s_tilde.data = self.pre * s 
            t.data = self.mat * s_tilde 
            omega = t.InnerProduct(s, conjugate=conjugate) / t.InnerProduct(t, conjugate=conjugate)
            sol.data += omega * s_tilde
            r.data = s 
            r.data -= omega * t

            err_i = Norm(r)
            if self.CheckResidual(err_i):
                return
            rho_old = rho_new
            rho_new = r_tilde.InnerProduct(r, conjugate=conjugate)
            beta = (rho_new / rho_old) * (alpha / omega) 
            p.data = r + beta * (p - omega * v)

Are you using ngsPETSc or ngs2petsc ? What computational overhead did you found? I think the best way here is to fix this issue (if you are using ngsPETSc we can work on it together !) rather than reimplementing biCGstab because this will solve the issue also if you are using any other non-standard Krylov solver!

Dear Umberto,

I have compared the computational time when using ngs2petsc, ngsPETSc and NGSolve. NGSolve solver is the fastest. I reckon that an increase in the computational time is because of the conversion of NGSolve matrices to PETSc matrices and vice versa.

My colleague also noticed that there’s an issue with the PETSc preconditioner. For example, one uses a PETSc PC as a preconditioner inside the NGSolve preconditioning infrastructure. A memory leak occurs if the preconditioner is called many times, particularly in a BilinearForm where the parameters are regularly updated.

Best regards,
VC

Dear VC,
I’ve been rewriting ngsPETSc PETSc KSP wrap.
One new feature is that you can run ngsPETSc in matrix-free way, this means that no conversion to PETSc Matrix will occur.
This should speed up your code quite a lot and prevent the memory leak you mentioned.
Take a look here: Solving the Poisson problem with different preconditioning strategies — ngsPETSc 0.0.5 documentation.
In particular, the solver parameters you want are:

"ksp_type": "bcgs",
"ksp_monitor": "",
 "pc_type": "mat",
 "ngs_mat_type": "python",
  "ksp_rtol": 1e-10}

You can now also run flexible BiStabCG and Enhanced BiCGStab(L).