HDG method for Stokes equation not giving correct convergence rates

I am solving Stokes equation using a Hybridized Discontinuous Galerkin Method by following the paper “An embedded–hybridized discontinuous Galerkin finite element method for the Stokes equations” by Sander Rhebergena and Garth N. Wells. But i am not getting right order of convergence. This is the code that i am facing problems with. Please give any suggestions where i am doing wrong.

Minimal running code

import netgen.gui
from ngsolve import *
from scipy.sparse.linalg import spsolve
from netgen.geom2d import unit_square
import matplotlib.pyplot as plt
from ngsolve import Mesh
import numpy as np
from math import pi
from ngsolve.webgui import Draw
from shapely.geometry import Polygon

nu = 1 # Viscosity parameter
H = # Stores mesh size at each iteration
DOF = # Stores degree of freedom
L2U = # Stores L2 errors for velocity
L2P = # Stores L2 errors for pressure

u_exact = CoefficientFunction((sin(2pix)sin(2piy), sin(2pix)sin(2piy)))
p_exact = CoefficientFunction(cos(2pix)cos(2piy))
f_expr = CoefficientFunction((8
nupipisin(2pix)sin(2piy) - 2pisin(2pix)cos(2piy), 8nupipisin(2pix)sin(2piy) - 2picos(2pix)sin(2pi*y)))

maxh_values = [1 / (2 ** n) for n in range(0,7)] # Mesh-size
for M in maxh_values:
mesh = Mesh(unit_square.GenerateMesh(maxh=M))
order = 1
V = VectorL2(mesh, order = order, dirichlet=“bottom|left|right|top”)
V_T = VectorFacetFESpace(mesh, order = order, dirichlet=“bottom|left|right|top”)
Q = L2(mesh, order = order - 1)
Q_T = FacetFESpace(mesh, order = order)
N = NumberSpace(mesh); # for lagrange multipliers
W = FESpace([V,V_T,Q,Q_T,N])
DOF.append(V.ndof + V_T.ndof + Q.ndof + Q_T.ndof + N.ndof)

# Test and trial functions
u,uhat,p,phat,lam = W.TrialFunction()
v,vhat,q,qhat,mu  = W.TestFunction()

div_u  = grad(u)[0,0] + grad(u)[1,1]
div_v  = grad(v)[0,0] + grad(v)[1,1]
jump_u = u-uhat
jump_v = v-vhat
alpha  = 4

condense = True
h = specialcf.mesh_size
n = specialcf.normal(mesh.dim)

a = BilinearForm(W, condense=condense)
dS = dx(element_boundary=True)
a += nu*InnerProduct(grad(u),grad(v))*dx + alpha*nu*order**2/h*InnerProduct(jump_u,jump_v)*dS - \
        nu*InnerProduct(grad(u)*n,jump_v)*dS - nu*InnerProduct(grad(v)*n,jump_u)*dS - p*div_v*dx + InnerProduct(v,n)*phat*dS - q*div_u*dx  + InnerProduct(u,n)*qhat*dS + lam*q*dx - mu*p*dx

a.Assemble()
f = LinearForm((f_expr[0] * v[0] + f_expr[1] * v[1]) * dx).Assemble()

# Solve the system
up_h = GridFunction(W)

if not condense:
    inv = a.mat.Inverse(W.FreeDofs(), inverse="sparsecholesky") #, inverse="sparsecholesky
    up_h.vec.data = inv * f.vec
else:
    fmod = (f.vec + a.harmonic_extension_trans * f.vec).Evaluate()
    inv = a.mat.Inverse(W.FreeDofs(True), inverse="sparsecholesky") #, inverse="umfpack"
    up_h.vec.data = inv * fmod
    up_h.vec.data += a.harmonic_extension * up_h.vec
    up_h.vec.data += a.inner_solve * f.vec
    
u_h, uhat_h, p_h, phat_h, lam_h = up_h.components


# Compute L2 error
L2_u = sqrt(Integrate((u_h[0] - u_exact[0])**2 + (u_h[1] - u_exact[1])**2, mesh))
L2_p = sqrt(Integrate((p_h - p_exact)**2, mesh))
L2U.append(L2_u)
L2P.append(L2_p)
H.append(M)
print("L2-u: {:.2e}".format(L2_u))
print("L2-p: {:.2e}".format(L2_p))
print("h=",M)
print("DOF=",DOF)

for i in range(0, len(maxh_values)):
r1 = round(log(L2U[i-1] / L2U[i]) / log(2),2)
r2 = round(log(L2P[i-1] / L2P[i]) / log(2),2)
print (“h= {}, L2u= {},L2p= {},”.format(H[i], r1,r2))