Source code for firedrake.preconditioners.fdm

from textwrap import dedent
from functools import partial
from itertools import chain, product
from firedrake.petsc import PETSc
from firedrake.preconditioners.base import PCBase
from firedrake.preconditioners.patch import bcdofs
from firedrake.preconditioners.pmg import (prolongation_matrix_matfree,
                                           evaluate_dual,
                                           get_permutation_to_nodal_elements,
                                           cache_generate_code)
from firedrake.preconditioners.facet_split import split_dofs, restricted_dofs
from firedrake.formmanipulation import ExtractSubBlock
from firedrake.functionspace import FunctionSpace, MixedFunctionSpace
from firedrake.function import Function
from firedrake.cofunction import Cofunction
from firedrake.parloops import par_loop
from firedrake.ufl_expr import TestFunction, TestFunctions, TrialFunctions
from firedrake.utils import cached_property
from firedrake_citations import Citations
from ufl.algorithms.ad import expand_derivatives
from ufl.algorithms.expand_indices import expand_indices
from finat.element_factory import create_element
from pyop2.compilation import load
from pyop2.mpi import COMM_SELF
from pyop2.sparsity import get_preallocation
from pyop2.utils import get_petsc_dir, as_tuple
from pyop2 import op2
from tsfc.ufl_utils import extract_firedrake_constants
from firedrake.tsfc_interface import compile_form

import firedrake.dmhooks as dmhooks
import ufl
import finat.ufl
import FIAT
import finat
import numpy
import ctypes

Citations().add("Brubeck2022", """
@article{Brubeck2022,
  title={A scalable and robust vertex-star relaxation for high-order {FEM}},
  author={Brubeck, Pablo D. and Farrell, Patrick E.},
  journal = {SIAM J. Sci. Comput.},
  volume = {44},
  number = {5},
  pages = {A2991-A3017},
  year = {2022},
  doi = {10.1137/21M1444187}
""")

Citations().add("Brubeck2024", """
@article{Brubeck2024,
  title={{Multigrid solvers for the de Rham complex with optimal complexity in polynomial degree}},
  author={Brubeck, Pablo D. and Farrell, Patrick E.},
  journal = {SIAM J. Sci. Comput.},
  volume = {46},
  number = {3},
  pages = {A1549-A1573},
  year = {2024},
  doi = {10.1137/22M1537370}
""")


__all__ = ("FDMPC", "PoissonFDMPC")


def broken_function(V, val):
    W = FunctionSpace(V.mesh(), finat.ufl.BrokenElement(V.ufl_element()))
    w = Function(W, dtype=val.dtype)
    v = Function(V, val=val)
    domain = "{[i]: 0 <= i < v.dofs}"
    instructions = """
    for i
        w[i] = v[i]
    end
    """
    par_loop((domain, instructions), ufl.dx, {'w': (w, op2.WRITE), 'v': (v, op2.READ)})
    return w


def mask_local_indices(V, lgmap, repeated=False):
    mask = lgmap.indices
    if repeated:
        w = broken_function(V, mask)
        V = w.function_space()
        mask = w.dat.data_ro_with_halos
    indices = numpy.arange(len(mask), dtype=PETSc.IntType)
    indices[mask == -1] = -1
    indices_dat = V.make_dat(val=indices)
    indices_acc = indices_dat(op2.READ, V.cell_node_map())
    return indices_acc


[docs] class FDMPC(PCBase): """ A preconditioner for tensor-product elements that changes the shape functions so that the H(d) (d in {grad, curl, div}) Riesz map is sparse on Cartesian cells, and assembles a global sparse matrix on which other preconditioners, such as `ASMStarPC`, can be applied. Here we assume that the volume integrals in the Jacobian can be expressed as: inner(d(v), alpha * d(u))*dx + inner(v, beta * u)*dx where alpha and beta are possibly tensor-valued. The sparse matrix is obtained by approximating (v, alpha * u) and (v, beta * u) as diagonal mass matrices. The PETSc options inspected by this class are: - 'fdm_mat_type': can be either 'aij' or 'sbaij' - 'fdm_static_condensation': are we assembling the Schur complement on facets? """ _prefix = "fdm_" _variant = "fdm" _citation = "Brubeck2024" _cache = {}
[docs] @PETSc.Log.EventDecorator("FDMInit") def initialize(self, pc): Citations().register(self._citation) self.comm = pc.comm Amat, Pmat = pc.getOperators() prefix = pc.getOptionsPrefix() options_prefix = prefix + self._prefix options = PETSc.Options(options_prefix) use_amat = options.getBool("pc_use_amat", True) use_static_condensation = options.getBool("static_condensation", False) pmat_type = options.getString("mat_type", PETSc.Mat.Type.AIJ) self.mat_type = pmat_type allow_repeated = False if pmat_type == "is": allow_repeated = options.getBool("mat_is_allow_repeated", True) self.allow_repeated = allow_repeated appctx = self.get_appctx(pc) fcp = appctx.get("form_compiler_parameters") or {} self.appctx = appctx # Get original Jacobian form and bcs J, bcs = self.form(pc) if Pmat.getType() == "python": mat_type = "matfree" else: ctx = dmhooks.get_appctx(pc.getDM()) mat_type = ctx.mat_type # TODO assemble Schur complements specified by a SLATE Tensor # we might extract the form on the interface-interface block like this: # # if isinstance(J, firedrake.slate.TensorBase) and use_static_condensation: # J = J.children[0].form if not isinstance(J, ufl.Form): raise ValueError("Expecting a ufl.Form, not a %r" % type(J)) # Transform the problem into the space with FDM shape functions V = J.arguments()[-1].function_space() V_fdm = V.reconstruct(variant=self._variant) if V == V_fdm: J_fdm, bcs_fdm = (J, bcs) else: # Reconstruct Jacobian and bcs with variant element J_fdm = J(*(t.reconstruct(function_space=V_fdm) for t in J.arguments())) bcs_fdm = [] for bc in bcs: W = V_fdm for index in bc._indices: W = W.sub(index) bcs_fdm.append(bc.reconstruct(V=W, g=0)) # Create a new _SNESContext in the variant space self._ctx_ref = self.new_snes_ctx(pc, J_fdm, bcs_fdm, mat_type, fcp=fcp, options_prefix=options_prefix) # Construct interpolation from variant to original spaces self.fdm_interp = prolongation_matrix_matfree(V_fdm, V, bcs_fdm, []) self.work_vec_x = Amat.createVecLeft() self.work_vec_y = Amat.createVecRight() if use_amat: from firedrake.assemble import get_assembler form_assembler = get_assembler(J_fdm, bcs=bcs_fdm, form_compiler_parameters=fcp, mat_type=mat_type, options_prefix=options_prefix) self.A = form_assembler.allocate() self._assemble_A = form_assembler.assemble self._assemble_A(tensor=self.A) Amat = self.A.petscmat if len(bcs) > 0: self.bc_nodes = numpy.unique(numpy.concatenate([bcdofs(bc, ghost=False) for bc in bcs])) else: self.bc_nodes = numpy.empty(0, dtype=PETSc.IntType) # Internally, we just set up a PC object that the user can configure # however from the PETSc command line. Since PC allows the user to specify # a KSP, we can do iterative by -fdm_pc_type ksp. fdmpc = PETSc.PC().create(comm=pc.comm) fdmpc.incrementTabLevel(1, parent=pc) # We set a DM and an appropriate SNESContext on the constructed PC so one # can do e.g. multigrid or patch solves. self._dm = V_fdm.dm fdmpc.setDM(self._dm) fdmpc.setOptionsPrefix(options_prefix) self.pc = fdmpc # Assemble the FDM preconditioner with sparse local matrices self.V = V_fdm Amat, Pmat, self.assembly_callables = self.allocate_matrix(Amat, V_fdm, J_fdm, bcs_fdm, fcp, pmat_type, use_static_condensation, use_amat) self.assembly_callables.append(partial(Pmat.viewFromOptions, "-pmat_view", fdmpc)) self._assemble_P() fdmpc.setOperators(A=Amat, P=Pmat) fdmpc.setUseAmat(use_amat) if hasattr(self, "_ctx_ref"): with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref, save=False): fdmpc.setFromOptions() else: fdmpc.setFromOptions()
[docs] @PETSc.Log.EventDecorator("FDMPrealloc") def allocate_matrix(self, Amat, V, J, bcs, fcp, pmat_type, use_static_condensation, use_amat): """ Allocate the FDM sparse preconditioner. :arg Amat: the original Jacobian :class:`PETSc.Mat` :arg V: the :class:`.FunctionSpace` of the form arguments :arg J: the Jacobian bilinear form :arg bcs: an iterable of boundary conditions on V :arg fcp: form compiler parameters to assemble coefficients :arg pmat_type: the `PETSc.Mat.Type` for the blocks in the diagonal :arg use_static_condensation: are we assembling the statically-condensed Schur complement on facets? :arg use_amat: are we computing the Schur complement exactly? :returns: 3-tuple with the Jacobian :class:`PETSc.Mat`, the preconditioner :class:`PETSc.Mat`, and a list of assembly callables """ symmetric = pmat_type.endswith("sbaij") ifacet = [i for i, Vsub in enumerate(V) if is_restricted(Vsub.finat_element)[1]] if len(ifacet) == 0: Vfacet = None Vbig = V ebig = V.ufl_element() _, fdofs = split_dofs(V.finat_element) elif len(ifacet) == 1: Vfacet = V[ifacet[0]] ebig, = set(unrestrict_element(Vsub.ufl_element()) for Vsub in V) Vbig = FunctionSpace(V.mesh(), ebig) if len(V) > 1: dims = [Vsub.finat_element.space_dimension() for Vsub in V] assert sum(dims) == Vbig.finat_element.space_dimension() fdofs = restricted_dofs(Vfacet.finat_element, Vbig.finat_element) else: raise ValueError("Expecting at most one FunctionSpace restricted onto facets.") self.embedding_element = ebig if Vbig.block_size == 1: self.fises = PETSc.IS().createGeneral(fdofs, comm=COMM_SELF) else: self.fises = PETSc.IS().createBlock(Vbig.block_size, fdofs, comm=COMM_SELF) # Create data structures needed for assembly self.lgmaps = {Vsub: Vsub.local_to_global_map([bc for bc in bcs if bc.function_space() == Vsub]) for Vsub in V} self.indices_acc = {Vsub: mask_local_indices(Vsub, self.lgmaps[Vsub], self.allow_repeated) for Vsub in V} self.coefficients, assembly_callables = self.assemble_coefficients(J, fcp) self.assemblers = {} Pmats = {} # Dictionary with kernel to compute the Schur complement self.schur_kernel = {} if V == Vbig and Vbig.finat_element.formdegree == 0: # If we are in H(grad), we just pad with zeros on the statically-condensed pattern self.schur_kernel[V] = SchurComplementPattern elif Vfacet and use_static_condensation: # If we are in a facet space, we build the Schur complement on its diagonal block if Vfacet.finat_element.formdegree == 0 and Vfacet.block_size == 1: self.schur_kernel[Vfacet] = SchurComplementDiagonal interior_pc_type = PETSc.PC.Type.JACOBI elif symmetric: self.schur_kernel[Vfacet] = SchurComplementBlockCholesky interior_pc_type = PETSc.PC.Type.ICC else: self.schur_kernel[Vfacet] = SchurComplementBlockLU interior_pc_type = PETSc.PC.Type.ILU if use_amat: # Replace the facet block of the stiffness matrix with the exact Schur complement # Set up the preconditioner with exact off-diagonal blocks and exact inverse of the interior block Amat, Pmats = self.condense(Amat, J, bcs, fcp, pc_type=interior_pc_type) diagonal_terms = [] # Loop over all pairs of subspaces for Vrow, Vcol in product(V, V): if (Vrow, Vcol) in Pmats: continue if symmetric and (Vcol, Vrow) in Pmats: Pmats[Vrow, Vcol] = PETSc.Mat().createTranspose(Pmats[Vcol, Vrow]) continue # Preallocate and assemble the FDM auxiliary sparse operator on_diag = Vrow == Vcol P = self.setup_block(Vrow, Vcol) addv = self.insert_mode[Vrow, Vcol] assemble_sparsity = P.getType() == "is" if assemble_sparsity: self.set_values(P, Vrow, Vcol, mat_type="preallocator") if on_diag: # populate diagonal entries i = numpy.arange(P.getLGMap()[0].getSize(), dtype=PETSc.IntType)[:, None] v = numpy.ones(i.shape, dtype=PETSc.ScalarType) P.setValuesLocalRCV(i, i, v, addv=addv) P.assemble() # append callables to zero entries, insert element matrices, and apply BCs assembly_callables.append(P.zeroEntries) assembly_callables.append(partial(self.set_values, P, Vrow, Vcol)) if on_diag: own = Vrow.dof_dset.layout_vec.getLocalSize() bdofs = numpy.flatnonzero(self.lgmaps[Vrow].indices[:own] < 0).astype(PETSc.IntType)[:, None] if assemble_sparsity: Vrow.dof_dset.lgmap.apply(bdofs, result=bdofs) assembly_callables.append(P.assemble) assembly_callables.append(partial(P.zeroRows, bdofs, 1.0)) else: v = numpy.ones(bdofs.shape, PETSc.ScalarType) assembly_callables.append(partial(P.setValuesLocalRCV, bdofs, bdofs, v, addv=addv)) assembly_callables.append(P.assemble) gamma = self.coefficients.get("facet") if gamma is not None and gamma.function_space() == Vrow.dual(): with gamma.dat.vec_ro as diag: diagonal_terms.append(partial(P.setDiagonal, diag, addv=addv)) Pmats[Vrow, Vcol] = P def sub_nullspace(nsp, iset): if not nsp.handle or iset is None: return nsp vectors = [vec.getSubVector(iset).copy() for vec in nsp.getVecs()] for v in vectors: v.normalize() return PETSc.NullSpace().create(constant=nsp.hasConstant(), vectors=vectors, comm=nsp.getComm()) def set_nullspaces(P, A, iset=None): P.setNullSpace(sub_nullspace(A.getNullSpace(), iset)) P.setTransposeNullSpace(sub_nullspace(A.getTransposeNullSpace(), iset)) P.setNearNullSpace(sub_nullspace(A.getNearNullSpace(), iset)) if len(V) == 1: Pmat = Pmats[V, V] set_nullspaces(Pmat, Amat) else: Pmat = PETSc.Mat().createNest([[Pmats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=self.comm) for Vrow, iset in zip(V, Pmat.getNestISs()[0]): set_nullspaces(Pmats[Vrow, Vrow], Amat, iset=iset) assembly_callables.append(Pmat.assemble) assembly_callables.extend(diagonal_terms) return Amat, Pmat, assembly_callables
@PETSc.Log.EventDecorator("FDMAssemble") def _assemble_P(self): for _assemble in self.assembly_callables: _assemble()
[docs] @PETSc.Log.EventDecorator("FDMUpdate") def update(self, pc): if hasattr(self, "A"): self._assemble_A(tensor=self.A) self._assemble_P()
[docs] def apply(self, pc, x, y): if hasattr(self, "fdm_interp"): self.fdm_interp.multTranspose(x, self.work_vec_x) with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref): self.pc.apply(self.work_vec_x, self.work_vec_y) self.fdm_interp.mult(self.work_vec_y, y) y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] else: self.pc.apply(x, y)
[docs] def applyTranspose(self, pc, x, y): if hasattr(self, "fdm_interp"): self.fdm_interp.multTranspose(x, self.work_vec_y) with dmhooks.add_hooks(self._dm, self, appctx=self._ctx_ref): self.pc.applyTranspose(self.work_vec_y, self.work_vec_x) self.fdm_interp.mult(self.work_vec_x, y) y.array_w[self.bc_nodes] = x.array_r[self.bc_nodes] else: self.pc.applyTranspose(x, y)
[docs] def view(self, pc, viewer=None): super(FDMPC, self).view(pc, viewer) if hasattr(self, "pc"): viewer.printfASCII("PC to apply inverse\n") self.pc.view(viewer)
[docs] def destroy(self, pc): if hasattr(self, "A"): self.A.petscmat.destroy() if hasattr(self, "pc"): self.pc.getOperators()[-1].destroy() self.pc.destroy()
[docs] def condense(self, A, J, bcs, fcp, pc_type="icc"): """Construct block matrices used for matrix-free static condensation. The inversion of the interior-interior block is replaced with a local KSP object that is reused on each cell within an MPI rank. Parameters ---------- A : PETSc.Mat The matrix to statically condense. J : ufl.Form The bilinear form to statically condense. bcs : .BCBase[] An iterable of boundary conditions to apply on ``A``. fcp : dict The form compiler parameters. pc_type : PETSc.PC.Type The preconditioner type for the interior solver. Returns ------- Smat : PETSc.Mat A matrix with the original blocks of ``A``, except that the matrix-free Schur complement replaces the interface-interface block. Pmat : dict A dict mapping pairs of function spaces to the preconditioner blocks ``[[inv(A00), A01], [A10, inv(S)]]``. """ Smats = {} V = J.arguments()[0].function_space() V0 = next((Vi for Vi in V if is_restricted(Vi.finat_element)[0]), None) V1 = next((Vi for Vi in V if is_restricted(Vi.finat_element)[1]), None) if V0 is None: V0 = FunctionSpace(V.mesh(), restrict_element(self.embedding_element, "interior")) if V1 is None: V1 = FunctionSpace(V.mesh(), restrict_element(self.embedding_element, "facet")) if len(V) == 1: J00 = J(*(t.reconstruct(function_space=V0) for t in J.arguments())) elif len(V) == 2: J00 = ExtractSubBlock().split(J, argument_indices=(V0.index, V0.index)) ises = V.dof_dset.field_ises Smats[V[0], V[1]] = A.createSubMatrix(ises[0], ises[1]) Smats[V[1], V[0]] = A.createSubMatrix(ises[1], ises[0]) unindexed = {Vsub: FunctionSpace(Vsub.mesh(), Vsub.ufl_element()) for Vsub in V} bcs = tuple(bc.reconstruct(V=unindexed[bc.function_space()], g=0) for bc in bcs) else: raise ValueError("Expecting at most 2 components") Pmats = dict(Smats) C0 = self.assemble_reference_tensor(V0) R0 = self.assemble_reference_tensor(V0, transpose=True) A0 = TripleProductKernel(R0, self._element_mass_matrix, C0) K0 = InteriorSolveKernel(A0, J00, fcp=fcp, pc_type=pc_type) K1 = ImplicitSchurComplementKernel(K0) kernels = {V0: K0, V1: K1} comm = self.comm args = [self.coefficients["cell"], V0.mesh().coordinates, *J00.coefficients(), *extract_firedrake_constants(J00)] args_acc = [arg.dat(op2.READ, arg.cell_node_map()) for arg in args] for Vsub in V: K = kernels[Vsub] x = Function(Vsub) y = Function(Vsub) sizes = (Vsub.dof_dset.layout_vec.getSizes(),) * 2 parloop = op2.ParLoop(K.kernel(), Vsub.mesh().cell_set, op2.PassthroughArg(op2.OpaqueType(K.result.klass), K.result.handle), *args_acc, x.dat(op2.READ, x.cell_node_map()), y.dat(op2.INC, y.cell_node_map())) ctx = PythonMatrixContext(parloop, x, y, bcs=bcs) Smats[Vsub, Vsub] = PETSc.Mat().createPython(sizes, context=ctx, comm=comm) if Vsub == V0: Pmats[Vsub, Vsub] = Smats[Vsub, Vsub] Smats[Vsub, Vsub] = A.createSubMatrix(ises[Vsub.index], ises[Vsub.index]) Smat = Smats[V, V] if len(V) == 1 else PETSc.Mat().createNest([[Smats[Vrow, Vcol] for Vcol in V] for Vrow in V], comm=comm) return Smat, Pmats
[docs] @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coefficients(self, J, fcp, block_diagonal=False): """ Obtain coefficients for the auxiliary operator as the diagonal of a weighted mass matrix in broken(V^k) * broken(V^{k+1}). See Section 3.2 of Brubeck2024. :arg J: the Jacobian bilinear :class:`ufl.Form`, :arg fcp: form compiler parameters to assemble the diagonal of the mass matrices. :arg block_diagonal: are we assembling the block diagonal of the mass matrices? :returns: a 2-tuple of a dict of coefficients and a list of assembly callables. """ assembly_callables = [] # Basic idea: take the original bilinear form and # replace the exterior derivatives with arguments in broken(V^{k+1}). # Then, replace the original arguments with arguments in broken(V^k). # Where the broken spaces have L2-orthogonal FDM basis functions. index = len(J.arguments()[0].function_space())-1 if index: splitter = ExtractSubBlock() J = splitter.split(J, argument_indices=(index, index)) args_J = J.arguments() e = args_J[0].ufl_element() mesh = args_J[0].function_space().mesh() tdim = mesh.topological_dimension() if isinstance(e, (finat.ufl.VectorElement, finat.ufl.TensorElement)): e = e._sub_element e = unrestrict_element(e) sobolev = e.sobolev_space # Replacement rule for the exterior derivative = grad(arg) * eps map_grad = None if sobolev == ufl.H1: map_grad = lambda p: p elif sobolev in [ufl.HCurl, ufl.HDiv]: u = ufl.Coefficient(ufl.FunctionSpace(mesh, e)) du = ufl.variable(ufl.grad(u)) dku = ufl.div(u) if sobolev == ufl.HDiv else ufl.curl(u) eps = expand_derivatives(ufl.diff(ufl.replace(expand_derivatives(dku), {ufl.grad(u): du}), du)) if sobolev == ufl.HDiv: map_grad = lambda p: ufl.outer(p, eps/tdim) elif len(eps.ufl_shape) == 3: map_grad = lambda p: ufl.dot(p, eps/2) else: map_grad = lambda p: p*(eps/2) # Construct Z = broken(V^k) * broken(V^{k+1}) V = args_J[0].function_space() fe = V.finat_element formdegree = fe.formdegree degree, = set(as_tuple(fe.degree)) qdeg = degree if formdegree == tdim: qfam = "DG" if tdim == 1 else "DQ" qdeg = 0 elif formdegree == 0: qfam = "DG" if tdim == 1 else "RTCE" if tdim == 2 else "NCE" elif formdegree == 1 and tdim == 3: qfam = "NCF" else: qfam = "DQ L2" qdeg = degree - 1 qvariant = "fdm_quadrature" elements = [e.reconstruct(variant=qvariant), finat.ufl.FiniteElement(qfam, cell=mesh.ufl_cell(), degree=qdeg, variant=qvariant)] elements = list(map(finat.ufl.BrokenElement, elements)) if V.shape: elements = [finat.ufl.TensorElement(ele, shape=V.shape) for ele in elements] Z = FunctionSpace(mesh, finat.ufl.MixedElement(elements)) # Transform the exterior derivative and the original arguments of J to arguments in Z args = (TestFunctions(Z), TrialFunctions(Z)) repargs = {t: v[0] for t, v in zip(args_J, args)} repgrad = {ufl.grad(t): map_grad(v[1]) for t, v in zip(args_J, args)} if map_grad else {} Jcell = expand_indices(expand_derivatives(ufl.Form(J.integrals_by_type("cell")))) mixed_form = ufl.replace(ufl.replace(Jcell, repgrad), repargs) # Return coefficients and assembly callables if block_diagonal and V.shape: from firedrake.assemble import assemble bdiags = [] M = assemble(mixed_form, mat_type="matfree", form_compiler_parameters=fcp) for iset in Z.dof_dset.field_ises: sub = M.petscmat.createSubMatrix(iset, iset) ctx = sub.getPythonContext() bdiags.append(ctx._block_diagonal) assembly_callables.append(ctx._assemble_block_diagonal) W = MixedFunctionSpace([c.function_space() for c in bdiags]) tensor = Function(W, val=op2.MixedDat([c.dat for c in bdiags])) else: from firedrake.assemble import get_assembler tensor = Function(Z.dual()) assembly_callables.append(partial(get_assembler(mixed_form, form_compiler_parameters=fcp, diagonal=True).assemble, tensor=tensor)) coefficients = {"cell": tensor} facet_integrals = [i for i in J.integrals() if "facet" in i.integral_type()] J_facet = expand_indices(expand_derivatives(ufl.Form(facet_integrals))) if len(J_facet.integrals()) > 0: from firedrake.assemble import get_assembler gamma = coefficients.setdefault("facet", Function(V.dual())) assembly_callables.append(partial(get_assembler(J_facet, form_compiler_parameters=fcp, tensor=gamma, diagonal=True).assemble, tensor=gamma)) return coefficients, assembly_callables
[docs] @PETSc.Log.EventDecorator("FDMRefTensor") def assemble_reference_tensor(self, V, transpose=False, sort_interior=False): """ Return the reference tensor used in the diagonal factorisation of the sparse cell matrices. See Section 3.2 of Brubeck2024. :arg V: a :class:`.FunctionSpace` :returns: a :class:`PETSc.Mat` interpolating V^k * d(V^k) onto broken(V^k) * broken(V^{k+1}) on the reference element. """ bsize = V.block_size fe = V.finat_element tdim = fe.cell.get_spatial_dimension() formdegree = fe.formdegree degree, = set(as_tuple(fe.degree)) if formdegree == tdim: degree = degree + 1 is_interior, is_facet = is_restricted(fe) key = (bsize, tdim, degree, formdegree, is_interior, is_facet, transpose, sort_interior) cache = self._cache.setdefault("reference_tensor", {}) try: return cache[key] except KeyError: pass if transpose: result = self.assemble_reference_tensor(V, transpose=False, sort_interior=sort_interior) result = PETSc.Mat().createTranspose(result).convert(result.getType()) return cache.setdefault(key, result) if sort_interior and is_interior: assert is_interior and not is_facet and not transpose # Sort DOFs to make A00 block diagonal with blocks of increasing dimension along the diagonal result = self.assemble_reference_tensor(V, transpose=transpose, sort_interior=False) if formdegree != 0: # Compute the stiffness matrix on the interior of a cell A00 = self._element_mass_matrix.PtAP(result) indptr, indices, _ = A00.getValuesCSR() degree = numpy.diff(indptr) # Sort by blocks uniq, u_index = numpy.unique(indices, return_index=True) perm = uniq[u_index.argsort(kind='stable')] # Sort by degree degree = degree[perm] perm = perm[degree.argsort(kind='stable')] A00.destroy() isperm = PETSc.IS().createGeneral(perm, comm=result.getComm()) result = get_submat(result, iscol=isperm, permute=True) isperm.destroy() return cache.setdefault(key, result) short_key = key[:-3] + (False,) * 3 try: result = cache[short_key] except KeyError: # Get CG(k) and DG(k-1) 1D elements from V elements = sorted(get_base_elements(fe), key=lambda e: e.formdegree) e0 = elements[0] if elements[0].formdegree == 0 else None e1 = elements[-1] if elements[-1].formdegree == 1 else None if e0 and is_interior: e0 = FIAT.RestrictedElement(e0, restriction_domain="interior") # Get broken(CG(k)) and DG(k-1) 1D elements from the coefficient spaces Z = self.coefficients["cell"].function_space() Q0 = Z[0].finat_element.element elements = sorted(get_base_elements(Q0), key=lambda e: e.formdegree) q0 = elements[0] if elements[0].formdegree == 0 else None q1 = elements[-1] if q1.formdegree != 1: Q1 = Z[1].finat_element.element q1 = sorted(get_base_elements(Q1), key=lambda e: e.formdegree)[-1] # Interpolate V * d(V) -> space(beta) * space(alpha) comm = COMM_SELF zero = PETSc.Mat() A00 = petsc_sparse(evaluate_dual(e0, q0), comm=comm) if e0 and q0 else zero A11 = petsc_sparse(evaluate_dual(e1, q1), comm=comm) if e1 else zero A10 = petsc_sparse(evaluate_dual(e0, q1, derivative="grad"), comm=comm) if e0 else zero B_blocks = mass_blocks(tdim, formdegree, A00, A11) A_blocks = diff_blocks(tdim, formdegree, A00, A11, A10) result = block_mat(B_blocks + A_blocks, destroy_blocks=True) A00.destroy() A11.destroy() A10.destroy() if bsize != 1: eye = petsc_sparse(numpy.eye(bsize), comm=result.getComm()) temp = result result = temp.kron(eye) temp.destroy() eye.destroy() if is_facet: cache[short_key] = result result = get_submat(result, iscol=self.fises) return cache.setdefault(key, result)
@cached_property def _element_mass_matrix(self): Z = self.coefficients["cell"].function_space() shape = (sum(V.finat_element.space_dimension() for V in Z),) + Z[0].shape data = numpy.ones(shape, dtype=PETSc.RealType) shape += (1,) * (3-len(shape)) nrows = shape[0] * shape[1] ai = numpy.arange(nrows+1, dtype=PETSc.IntType) aj = numpy.tile(ai[:-1].reshape((-1, shape[1])), (1, shape[2])) if shape[2] > 1: ai *= shape[2] data = numpy.tile(numpy.eye(shape[2], dtype=data.dtype), shape[:1] + (1,)*(len(shape)-1)) return PETSc.Mat().createAIJ((nrows, nrows), csr=(ai, aj, data), comm=COMM_SELF) @cached_property def _element_kernels(self): M = self._element_mass_matrix element_kernels = {} for Vrow, Vcol in product(self.V, self.V): # Interpolation of basis and exterior derivative onto broken spaces C1 = self.assemble_reference_tensor(Vcol) R1 = self.assemble_reference_tensor(Vrow, transpose=True) # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2022b element_kernel = TripleProductKernel(R1, M, C1) schur_kernel = self.schur_kernel.get(Vrow) if Vrow == Vcol else None if schur_kernel is not None: V0 = FunctionSpace(Vrow.mesh(), restrict_element(self.embedding_element, "interior")) C0 = self.assemble_reference_tensor(V0, sort_interior=True) R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True) element_kernel = schur_kernel(element_kernel, TripleProductKernel(R1, M, C0), TripleProductKernel(R0, M, C1), TripleProductKernel(R0, M, C0)) element_kernels[Vrow, Vcol] = element_kernel return element_kernels @cached_property def insert_mode(self): is_dg = {} for Vsub in self.V: element = Vsub.finat_element is_dg[Vsub] = element.entity_dofs() == element.entity_closure_dofs() insert_mode = {} for Vrow, Vcol in product(self.V, self.V): addv = PETSc.InsertMode.ADD_VALUES if is_dg[Vrow] or is_dg[Vcol]: addv = PETSc.InsertMode.INSERT insert_mode[Vrow, Vcol] = addv return insert_mode @cached_property def assembly_lgmaps(self): if self.mat_type != "is": return {Vsub: Vsub.dof_dset.lgmap for Vsub in self.V} lgmaps = {} for Vsub in self.V: lgmap = Vsub.dof_dset.lgmap if self.allow_repeated: indices = broken_function(Vsub, lgmap.indices).dat.data_ro else: indices = lgmap.indices.copy() local_indices = numpy.arange(len(indices), dtype=PETSc.IntType) cell_node_map = broken_function(Vsub, local_indices).dat.data_ro ghost = numpy.setdiff1d(local_indices, numpy.unique(cell_node_map), assume_unique=True) indices[ghost] = -1 lgmaps[Vsub] = PETSc.LGMap().create(indices, bsize=lgmap.getBlockSize(), comm=lgmap.getComm()) return lgmaps
[docs] def setup_block(self, Vrow, Vcol): # Preallocate the auxiliary sparse operator sizes = tuple(Vsub.dof_dset.layout_vec.getSizes() for Vsub in (Vrow, Vcol)) rmap = self.assembly_lgmaps[Vrow] cmap = self.assembly_lgmaps[Vcol] on_diag = Vrow == Vcol ptype = self.mat_type if on_diag else PETSc.Mat.Type.AIJ preallocator = PETSc.Mat().create(comm=self.comm) preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) preallocator.setSizes(sizes) preallocator.setISAllowRepeated(self.allow_repeated) preallocator.setLGMap(rmap, cmap) preallocator.setOption(PETSc.Mat.Option.IGNORE_ZERO_ENTRIES, False) if ptype.endswith("sbaij"): preallocator.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) preallocator.setUp() self.set_values(preallocator, Vrow, Vcol) preallocator.assemble() dnz, onz = get_preallocation(preallocator, sizes[0][0]) if on_diag: numpy.maximum(dnz, 1, out=dnz) preallocator.destroy() P = PETSc.Mat().create(comm=self.comm) P.setType(ptype) P.setSizes(sizes) P.setISAllowRepeated(self.allow_repeated) P.setLGMap(rmap, cmap) if on_diag and ptype == "is" and self.allow_repeated: bsize = Vrow.finat_element.space_dimension() * Vrow.value_size local_mat = P.getISLocalMat() nblocks = local_mat.getSize()[0] // bsize local_mat.setVariableBlockSizes([bsize] * nblocks) P.setPreallocationNNZ((dnz, onz)) if not (ptype.endswith("sbaij") or ptype == "is"): P.setOption(PETSc.Mat.Option.UNUSED_NONZERO_LOCATION_ERR, True) P.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) P.setOption(PETSc.Mat.Option.STRUCTURALLY_SYMMETRIC, on_diag) P.setOption(PETSc.Mat.Option.FORCE_DIAGONAL_ENTRIES, True) P.setOption(PETSc.Mat.Option.KEEP_NONZERO_PATTERN, True) if ptype.endswith("sbaij"): P.setOption(PETSc.Mat.Option.IGNORE_LOWER_TRIANGULAR, True) P.setUp() return P
[docs] @PETSc.Log.EventDecorator("FDMSetValues") def set_values(self, A, Vrow, Vcol, mat_type=None): """Assemble the auxiliary operator in the FDM basis using sparse reference tensors and diagonal mass matrices. Parameters ---------- A : PETSc.Mat The (initialized) matrix to assemble. Vrow : FunctionSpace The test space. Vcol : FunctionSpace The trial space. """ key = (Vrow.ufl_element(), Vcol.ufl_element()) on_diag = Vrow == Vcol if mat_type is None: mat_type = A.getType() try: assembler = self.assemblers[key] except KeyError: addv = self.insert_mode[Vrow, Vcol] spaces = (Vrow,) if on_diag else (Vrow, Vcol) indices_acc = tuple(self.indices_acc[V] for V in spaces) M = self._element_mass_matrix # Interpolation of basis and exterior derivative onto broken spaces C1 = self.assemble_reference_tensor(Vcol) R1 = self.assemble_reference_tensor(Vrow, transpose=True) # Element stiffness matrix = R1 * M * C1, see Equation (3.9) of Brubeck2024 element_kernel = TripleProductKernel(R1, M, C1) schur_kernel = self.schur_kernel.get(Vrow) if on_diag else None if schur_kernel is not None: V0 = FunctionSpace(Vrow.mesh(), restrict_element(self.embedding_element, "interior")) C0 = self.assemble_reference_tensor(V0, sort_interior=True) R0 = self.assemble_reference_tensor(V0, sort_interior=True, transpose=True) element_kernel = schur_kernel(element_kernel, TripleProductKernel(R1, M, C0), TripleProductKernel(R0, M, C1), TripleProductKernel(R0, M, C0)) coefficients = self.coefficients["cell"] coefficients_acc = coefficients.dat(op2.READ, coefficients.cell_node_map()) element_kernel = self._element_kernels[Vrow, Vcol] kernel = element_kernel.kernel(on_diag=on_diag, addv=addv) assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set, *element_kernel.make_args(A), coefficients_acc, *indices_acc) self.assemblers.setdefault(key, assembler) if mat_type == "preallocator": key = key + ("preallocator",) try: assembler = self.assemblers[key] except KeyError: # Determine the global sparsity pattern by inserting a constant sparse element matrix args = assembler.arguments[:2] kernel = ElementKernel(PETSc.Mat(), name="preallocate").kernel(mat_type=mat_type, on_diag=on_diag, addv=addv) assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set, *(op2.PassthroughArg(op2.OpaqueType("Mat"), arg.data) for arg in args), *indices_acc) self.assemblers.setdefault(key, assembler) assembler.arguments[0].data = A.handle assembler()
class ElementKernel: """Base class for sparse element kernel builders. By default, it inserts the same matrix on each cell.""" code = dedent(""" PetscErrorCode %(name)s(const Mat A, const Mat B, %(indices)s) { PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); return PETSC_SUCCESS; }""") def __init__(self, A, name=None): self.result = A self.mats = [self.result] self.name = name or type(self).__name__ self.rules = {} def make_args(self, *mats): return [op2.PassthroughArg(op2.OpaqueType(mat.klass), mat.handle) for mat in list(mats) + self.mats] def kernel(self, mat_type="aij", on_diag=False, addv=None): if addv is None: addv = PETSc.InsertMode.INSERT indices = ("rindices",) if on_diag else ("rindices", "cindices") code = "" if "MatSetValuesArray" in self.code: code = dedent(""" static inline PetscErrorCode MatSetValuesArray(Mat A, const PetscScalar *restrict values) { PetscBool done; PetscInt m; const PetscInt *ai; PetscScalar *vals; PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); PetscCall(MatSeqAIJGetArrayWrite(A, &vals)); PetscCall(PetscMemcpy(vals, values, ai[m] * sizeof(*vals))); PetscCall(MatSeqAIJRestoreArrayWrite(A, &vals)); PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); return PETSC_SUCCESS; }""") if mat_type != "matfree": code += dedent(""" static inline PetscErrorCode MatSetValuesLocalSparse(const Mat A, const Mat B, const PetscInt *restrict rindices, const PetscInt *restrict cindices, InsertMode addv) { PetscBool done; PetscInt m, ncols, istart, *indices; const PetscInt *ai, *aj; const PetscScalar *vals; PetscCall(MatGetRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); PetscCall(PetscMalloc1(ai[m], &indices)); for (PetscInt j = 0; j < ai[m]; j++) indices[j] = cindices[aj[j]]; PetscCall(MatSeqAIJGetArrayRead(B, &vals)); for (PetscInt i = 0; i < m; i++) { istart = ai[i]; ncols = ai[i + 1] - istart; PetscCall(MatSetValuesLocal(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv)); } PetscCall(MatSeqAIJRestoreArrayRead(B, &vals)); PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); PetscCall(PetscFree(indices)); return PETSC_SUCCESS; }""") code += self.code % dict(self.rules, name=self.name, indices=", ".join("const PetscInt *restrict %s" % s for s in indices), rows=indices[0], cols=indices[-1], addv=addv) return op2.Kernel(code, self.name) class TripleProductKernel(ElementKernel): """Kernel builder to assemble a triple product of the form L * C * R for each cell, where L, C, R are sparse matrices and the entries of C are updated on each cell.""" code = dedent(""" PetscErrorCode %(name)s(const Mat A, const Mat B, const PetscScalar *restrict coefficients, %(indices)s) { Mat C; PetscCall(MatProductGetMats(B, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); PetscCall(MatProductNumeric(B)); PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); return PETSC_SUCCESS; }""") def __init__(self, L, C, R, name=None): self.product = partial(L.matMatMult, C, R) super().__init__(self.product(), name=name) class SchurComplementKernel(ElementKernel): """Base class for Schur complement kernel builders.""" condense_code = "" code = dedent(""" #include <petscblaslapack.h> PetscErrorCode %(name)s(const Mat A, const Mat B, const Mat A11, const Mat A10, const Mat A01, const Mat A00, const PetscScalar *restrict coefficients, %(indices)s) { Mat C; PetscCall(MatProductGetMats(A11, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); %(condense)s PetscCall(MatSetValuesLocalSparse(A, A11, %(rows)s, %(cols)s, %(addv)d)); PetscCall(MatSetValuesLocalSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); return PETSC_SUCCESS; }""") def __init__(self, *kernels, name=None): self.children = kernels self.submats = [k.result for k in kernels] self.work = [None for _ in range(2)] # Dict of slices with the extents of the diagonal blocks A00 = self.submats[-1] degree = numpy.diff(A00.getValuesCSR()[0]) istart = 0 self.slices = {1: slice(0, 0)} unique_degree, counts = numpy.unique(degree, return_counts=True) for k, kdofs in sorted(zip(unique_degree, counts)): self.slices[k] = slice(istart, istart + k * kdofs) istart += k * kdofs self.blocks = sorted(degree for degree in self.slices if degree > 1) result = self.condense() result.axpy(1.0, self.submats[0]) super().__init__(result, name=name) self.mats.extend(self.submats) self.rules["condense"] = self.condense_code def condense(self, result=None): return result class SchurComplementPattern(SchurComplementKernel): """Kernel builder to pad with zeros the Schur complement sparsity pattern.""" condense_code = dedent(""" PetscCall(MatProductNumeric(A11)); PetscCall(MatZeroEntries(B)); """) def condense(self, result=None): """Pad with zeros the statically condensed pattern""" if result is None: _, A10, A01, A00 = self.submats result = A10.matMatMult(A00, A01, result=result) result.zeroEntries() return result class SchurComplementDiagonal(SchurComplementKernel): """Schur complement kernel builder that assumes a diagonal interior block.""" condense_code = dedent(""" Vec vec; PetscInt n; PetscScalar *vals; PetscCall(MatProductNumeric(A11)); PetscCall(MatProductNumeric(A10)); PetscCall(MatProductNumeric(A01)); PetscCall(MatProductNumeric(A00)); PetscCall(MatGetSize(A00, &n, NULL)); PetscCall(MatSeqAIJGetArray(A00, &vals)); PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n, vals, &vec)); PetscCall(VecReciprocal(vec)); PetscCall(VecScale(vec, -1.0)); PetscCall(MatDiagonalScale(A01, vec, NULL)); PetscCall(VecDestroy(&vec)); PetscCall(MatSeqAIJRestoreArray(A00, &vals)); PetscCall(MatProductNumeric(B)); """) def condense(self, result=None): A11, A10, A01, A00 = self.submats self.work[0] = A00.getDiagonal(result=self.work[0]) self.work[0].reciprocal() self.work[0].scale(-1) A01.diagonalScale(L=self.work[0]) result = A10.matMult(A01, result=result) return result class SchurComplementBlockCholesky(SchurComplementKernel): """Schur complement kernel builder that assumes a block-diagonal interior block, and uses its Cholesky factorization to compute S = A11 - (L^-1 A01)^T (L^-1 A01).""" condense_code = dedent(""" PetscBLASInt bn, lierr; PetscBool done; PetscInt m, bsize, irow; const PetscInt *ai; PetscScalar *vals, *U; Mat X; PetscCall(MatProductNumeric(A11)); PetscCall(MatProductNumeric(A01)); PetscCall(MatProductNumeric(A00)); PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); PetscCall(MatSeqAIJGetArray(A00, &vals)); irow = 0; while (irow < m && ai[irow + 1] - ai[irow] == 1) { vals[irow] = PetscSqrtReal(1.0 / vals[irow]); irow++; } U = &vals[irow]; while (irow < m) { bsize = ai[irow + 1] - ai[irow]; PetscCall(PetscBLASIntCast(bsize, &bn)); PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("U", &bn, U, &bn, &lierr)); PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("U", "N", &bn, U, &bn, &lierr)); for (PetscInt j = 0; j < bsize - 1; j++) for (PetscInt i = j + 1; i < bsize; i++) U[i + bsize * j] = 0.0; U += bsize * bsize; irow += bsize; } PetscCall(MatSeqAIJRestoreArray(A00, &vals)); PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); PetscCall(MatProductGetMats(B, &X, NULL, NULL)); PetscCall(MatProductNumeric(X)); PetscCall(MatProductNumeric(B)); PetscCall(MatScale(B, -1.0)); """) def condense(self, result=None): # asssume that A10 = A01^T A11, _, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() zlice = self.slices[1] numpy.sqrt(R[zlice], out=R[zlice]) numpy.reciprocal(R[zlice], out=R[zlice]) flops = 2 * (zlice.stop - zlice.start) for k in self.blocks: Rk = R[self.slices[k]] A = Rk.reshape((-1, k, k)) rinv = numpy.linalg.inv(numpy.linalg.cholesky(A)) numpy.copyto(Rk, rinv.flat) flops += A.shape[0] * ((k**3)//3 + k**3) PETSc.Log.logFlops(flops) A00.setValuesCSR(indptr, indices, R) A00.assemble() self.work[0] = A00.matMult(A01, result=self.work[0]) result = self.work[0].transposeMatMult(self.work[0], result=result) result.scale(-1.0) return result class SchurComplementBlockLU(SchurComplementKernel): """Schur complement kernel builder that assumes a block-diagonal interior block, and uses its LU factorization to compute S = A11 - (A10 U^-1) (L^-1 A01).""" condense_code = dedent(""" PetscBLASInt bn, lierr, lwork; PetscBool done; PetscInt m, bsize, irow, icol, nnz, iswap, *ipiv, *perm; const PetscInt *ai; PetscScalar *vals, *work, *L, *U; Mat X; PetscCall(MatProductNumeric(A11)); PetscCall(MatProductNumeric(A10)); PetscCall(MatProductNumeric(A01)); PetscCall(MatProductNumeric(A00)); PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); PetscCall(MatSeqAIJGetArray(A00, &vals)); // A00 = (U^T) * (L^T) nnz = ai[m]; bsize = ai[m] - ai[m - 1]; PetscCall(PetscMalloc2(bsize, &ipiv, bsize, &perm)); PetscCall(PetscCalloc1(nnz, &work)); irow = 0; while (irow < m && ai[irow + 1] - ai[irow] == 1) { work[irow] = 1.0; vals[irow] = 1.0 / vals[irow]; irow++; } L = &work[irow]; U = &vals[irow]; while (irow < m) { bsize = ai[irow + 1] - ai[irow]; PetscCall(PetscBLASIntCast(bsize, &bn)); PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, U, &bn, ipiv, &lierr)); PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("U", "N", &bn, U, &bn, &lierr)); PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("L", "U", &bn, U, &bn, &lierr)); for (PetscInt j = 0; j < bsize; j++) perm[j] = j; for (PetscInt j = 0; j < bsize; j++) { icol = ipiv[j] - 1; iswap = perm[icol]; perm[icol] = perm[j]; perm[j] = iswap; } for (PetscInt j = 0; j < bsize; j++) { L[j + bsize * perm[j]] = 1.0; for (PetscInt i = j + 1; i < bsize; i++) { L[i + bsize * perm[j]] = U[i + bsize * j]; U[i + bsize * j] = 0.0; } } L += bsize * bsize; U += bsize * bsize; irow += bsize; } PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); // A00 = inv(U^T) PetscCall(MatSeqAIJRestoreArray(A00, &vals)); // X = inv(U^T) * A01 PetscCall(MatProductGetMats(B, NULL, NULL, &X)); PetscCall(MatProductNumeric(X)); // A00 = -inv(L^T) PetscCall(MatSeqAIJGetArray(A00, &vals)); for (PetscInt i = 0; i < nnz; i++) vals[i] = -work[i]; PetscCall(MatSeqAIJRestoreArray(A00, &vals)); PetscCall(PetscFree3(ipiv, perm, work)); // B = - A10 * inv(L^T) * X PetscCall(MatProductNumeric(B)); """) def condense(self, result=None): A11, A10, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() Q = numpy.ones(R.shape, dtype=R.dtype) zlice = self.slices[1] numpy.reciprocal(R[zlice], out=R[zlice]) flops = zlice.stop - zlice.start for k in self.blocks: zlice = self.slices[k] A = R[zlice].reshape((-1, k, k)) q, r = numpy.linalg.qr(A, mode="complete") numpy.copyto(Q[zlice], numpy.transpose(q, axes=(0, 2, 1)).flat) rinv = numpy.linalg.inv(r) numpy.copyto(R[zlice], rinv.flat) flops += A.shape[0] * ((4*k**3)//3 + k**3) PETSc.Log.logFlops(flops) A00.setValuesCSR(indptr, indices, Q) A00.assemble() self.work[0] = A00.matMult(A01, result=self.work[0]) A00.setValuesCSR(indptr, indices, R) A00.assemble() A00.scale(-1.0) result = A10.matMatMult(A00, self.work[0], result=result) return result class SchurComplementBlockInverse(SchurComplementKernel): """Schur complement kernel builder that assumes a block-diagonal interior block, and uses its inverse to compute S = A11 - A10 A00^-1 A01.""" condense_code = dedent(""" PetscBLASInt bn, lierr, lwork; PetscBool done; PetscInt m, irow, bsize, *ipiv; const PetscInt *ai; PetscScalar *vals, *work, *ainv, swork; PetscCall(MatProductNumeric(A11)); PetscCall(MatProductNumeric(A10)); PetscCall(MatProductNumeric(A01)); PetscCall(MatProductNumeric(A00)); PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); lwork = -1; bsize = ai[m] - ai[m - 1]; PetscCall(PetscMalloc1(bsize, &ipiv)); PetscCall(PetscBLASIntCast(bsize, &bn)); PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, ainv, &bn, ipiv, &swork, &lwork, &lierr)); bsize = (PetscInt)swork; PetscCall(PetscBLASIntCast(bsize, &lwork)); PetscCall(PetscMalloc1(bsize, &work)); PetscCall(MatSeqAIJGetArray(A00, &vals)); irow = 0; while (irow < m && ai[irow + 1] - ai[irow] == 1) { vals[irow] = 1.0 / vals[irow]; irow++; } ainv = &vals[irow]; while (irow < m) { bsize = ai[irow + 1] - ai[irow]; PetscCall(PetscBLASIntCast(bsize, &bn)); PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, ainv, &bn, ipiv, &lierr)); PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, ainv, &bn, ipiv, work, &lwork, &lierr)); ainv += bsize * bsize; irow += bsize; } PetscCall(PetscFree2(ipiv, work)); PetscCall(MatSeqAIJRestoreArray(A00, &vals)); PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); PetscCall(MatScale(A00, -1.0)); PetscCall(MatProductNumeric(B)); """) def condense(self, result=None): A11, A10, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() zlice = self.slices[1] numpy.reciprocal(R[zlice], out=R[zlice]) flops = zlice.stop - zlice.start for k in self.blocks: Rk = R[self.slices[k]] A = Rk.reshape((-1, k, k)) rinv = numpy.linalg.inv(A) numpy.copyto(Rk, rinv.flat) flops += A.shape[0] * (k**3) PETSc.Log.logFlops(flops) A00.setValuesCSR(indptr, indices, R) A00.assemble() A00.scale(-1.0) result = A10.matMatMult(A00, A01, result=result) return result def matmult_kernel_code(a, prefix="form", fcp=None, matshell=False): """Generate code for the matrix-vector multiplication local kernel. Parameters ---------- a : ufl.Form The bilinear form. prefix : str The kernel prefix. fcp : dict The form compiler parameters. matshell : bool A flag to wrap the kernel with a :class:`PETSc.Mat` of type shell. This is used for the local matrix-free KSP for the interior solve. Returns ------- matmult_struct : str The C code to compute the matrix-vector product. matmult_call : callable - ``x``: the pointer name of the input vector (`str`). - ``y``: the pointer name of the output vector (`str`). A lambda to generate the C code calling the matrix-vector product. ctx_struct : str The signature of the kernel. ctx_pack : str Code to update the coefficient array pointers to be called before applying the matshell. """ cache = a._cache.setdefault("fdm_kernels", {}) key = (prefix,) try: matmult_struct, matmult_call, ctx_struct, ctx_pack = cache[key] except KeyError: v, u = a.arguments() V = u.function_space() F = a(v, ufl.Coefficient(V)) kernels = compile_form(F, prefix, parameters=fcp) kernel = kernels[-1].kinfo.kernel nargs = len(kernel.arguments) - len(a.arguments()) ncoef = nargs - len(extract_firedrake_constants(F)) matmult_struct = cache_generate_code(kernel, V._comm) matmult_struct = matmult_struct.replace("void "+kernel.name, "static void "+kernel.name) ctx_coeff = "".join(f"appctx[{i}], " for i in range(ncoef)) ctx_const = "".join(f", appctx[{i}]" for i in range(ncoef, nargs)) matmult_call = lambda x, y: f"{kernel.name}({y}, {ctx_coeff}{x}{ctx_const});" ctx_struct = "".join(f"const PetscScalar *restrict c{i}, " for i in range(nargs)) ctx_pointers = ", ".join(f"c{i}" for i in range(nargs)) ctx_pack = f"const PetscScalar *appctx[{nargs}] = {{ {ctx_pointers} }};" cache[key] = (matmult_struct, matmult_call, ctx_struct, ctx_pack) if matshell: matmult_struct += dedent(""" static PetscErrorCode %(prefix)s(Mat A, Vec X, Vec Y) { PetscScalar **appctx, *y; const PetscScalar *x; PetscCall(MatShellGetContext(A, &appctx)); PetscCall(VecZeroEntries(Y)); PetscCall(VecGetArray(Y, &y)); PetscCall(VecGetArrayRead(X, &x)); %(matmult_call)s PetscCall(VecRestoreArrayRead(X, &x)); PetscCall(VecRestoreArray(Y, &y)); return PETSC_SUCCESS; }""" % {"prefix": prefix, "matmult_call": matmult_call("x", "y")}) return matmult_struct, matmult_call, ctx_struct, ctx_pack class InteriorSolveKernel(ElementKernel): """Kernel builder that solves the interior block using a local KSP across cells owned by an MPI rank.""" code = dedent(""" %(A_struct)s PetscErrorCode %(name)s(const KSP ksp, const PetscScalar *restrict coefficients, %(ctx_struct)s const PetscScalar *restrict y, PetscScalar *restrict x){ %(ctx_pack)s PetscInt m; Mat A, B, C; Vec X, Y; PetscCall(KSPGetOperators(ksp, &A, &B)); PetscCall(MatShellSetContext(A, &appctx)); PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior)); PetscCall(MatProductGetMats(B, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); PetscCall(MatProductNumeric(B)); PetscCall(MatGetSize(B, &m, NULL)); PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, y, &Y)); PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, x, &X)); PetscCall(KSPSolve(ksp, Y, X)); PetscCall(VecDestroy(&X)); PetscCall(VecDestroy(&Y)); return PETSC_SUCCESS; }""") def __init__(self, kernel, form, name=None, prefix="interior_", fcp=None, pc_type="icc"): self.child = kernel self.form = form self.fcp = fcp B = kernel.result comm = B.getComm() A = PETSc.Mat().create(comm=comm) A.setType(PETSc.Mat.Type.SHELL) A.setSizes(B.getSizes()) A.setUp() # Set up the local KSP for the cell interiors ksp = PETSc.KSP().create(comm=comm) ksp.setOptionsPrefix(prefix) ksp.setOperators(A, B) # Default solver options, these can be overriden via -interior_ksp_type, etc. rtol = 1E-8 atol = 1E-14 ksp_type = PETSc.KSP.Type.MINRES norm_type = PETSc.KSP.NormType.PRECONDITIONED ksp.pc.setType(pc_type) ksp.setType(ksp_type) ksp.setNormType(norm_type) ksp.setTolerances(rtol=rtol, atol=atol) ksp.setFromOptions() ksp.setUp() super().__init__(ksp, name=name) A_struct, _, ctx_struct, ctx_pack = matmult_kernel_code(self.form, prefix="A_interior", fcp=self.fcp, matshell=True) rules = dict(A_struct=A_struct, ctx_struct=ctx_struct, ctx_pack=ctx_pack) self.rules.update(rules) class ImplicitSchurComplementKernel(ElementKernel): """Kernel builder that applies the matrix-free Schur complement matvec reusing a local KSP to invert the interior blocks.""" code = dedent(""" %(A_struct)s %(A00_struct)s PetscErrorCode %(name)s(const KSP ksp, const PetscScalar *restrict coefficients, %(ctx_struct)s const PetscScalar *restrict xf, PetscScalar *restrict yf) { %(ctx_pack)s static const PetscInt idofs[%(isize)d] = {%(idofs)s}; static const PetscInt fdofs[%(fsize)d] = {%(fdofs)s}; static PetscScalar xi[%(isize)d], yi[%(isize)d], x[%(size)d], y[%(size)d]; PetscInt i; Mat A, B, C; Vec X, Y; PetscCall(KSPGetOperators(ksp, &A, &B)); PetscCall(MatShellSetContext(A, &appctx)); PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior)); PetscCall(MatProductGetMats(B, NULL, &C, NULL)); PetscCall(MatSetValuesArray(C, coefficients)); PetscCall(MatProductNumeric(B)); // x[fdofs] = x1; y = A * x; for (i = 0; i < %(size)d; i++) y[i] = 0.0; for (i = 0; i < %(size)d; i++) x[i] = 0.0; for (i = 0; i < %(fsize)d; i++) x[fdofs[i]] = xf[i]; %(A_call)s // x[idofs] = -inv(Aii) * y[idofs]; for (i = 0; i < %(isize)d; i++) yi[i] = y[idofs[i]]; PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, %(isize)d, yi, &Y)); PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, %(isize)d, xi, &X)); PetscCall(KSPSolve(ksp, Y, X)); PetscCall(VecDestroy(&X)); PetscCall(VecDestroy(&Y)); for (i = 0; i < %(isize)d; i++) x[idofs[i]] = -xi[i]; // y = A * x; y1 += y[fdofs]; for (i = 0; i < %(size)d; i++) y[i] = 0.0; %(A_call)s for (i = 0; i < %(fsize)d; i++) yf[i] += y[fdofs[i]]; return PETSC_SUCCESS; }""") def __init__(self, kernel, name=None): self.child = kernel super().__init__(kernel.result, name=name) comm = self.result.getComm() form = self.child.form fcp = self.child.fcp args = form.arguments() Q = args[0].function_space() V = FunctionSpace(Q.mesh(), unrestrict_element(Q.ufl_element())) V0 = FunctionSpace(Q.mesh(), restrict_element(V.ufl_element(), "interior")) V1 = FunctionSpace(Q.mesh(), restrict_element(V.ufl_element(), "facet")) idofs = PETSc.IS().createBlock(V.block_size, restricted_dofs(V0.finat_element, V.finat_element), comm=comm) fdofs = PETSc.IS().createBlock(V.block_size, restricted_dofs(V1.finat_element, V.finat_element), comm=comm) size = idofs.size + fdofs.size assert size == V.finat_element.space_dimension() * V.block_size # Bilinear form on the space with interior and interface a = form if Q == V else form(*(t.reconstruct(function_space=V) for t in args)) # Generate code to apply the action of A within the Schur complement action A_struct, A_call, ctx_struct, ctx_pack = matmult_kernel_code(a, prefix="A", fcp=fcp) # Bilinear form on the interior a00 = form if Q == V0 else form(*(t.reconstruct(function_space=V0) for t in args)) # Generate code to apply A00 as a PETSc.Mat of type shell within the interior KSP A00_struct, *_ = matmult_kernel_code(a00, prefix="A_interior", fcp=fcp, matshell=True) A00_struct = A00_struct.replace("#include <stdint.h>", "") # Replacement rules to use idofs, fdofs, A, and A00 on self.code rules = dict(A_struct=A_struct, A_call=A_call("x", "y"), ctx_struct=ctx_struct, ctx_pack=ctx_pack, A00_struct=A00_struct, size=size, isize=idofs.size, fsize=fdofs.size, idofs=", ".join(map(str, idofs.indices)), fdofs=", ".join(map(str, fdofs.indices))) self.rules.update(rules) idofs.destroy() fdofs.destroy() class PythonMatrixContext: """Python matrix context that handles boundary conditions.""" def __init__(self, mult_callable, x, y, bcs=None): """ Parameters ---------- mult_callable : callable The callable performing the matrix-vector product. x : Function The tensor holding the input to the matrix-vector product. y : Function The tensor holding the output to the matrix-vector product. bcs : .BCBase[] or None An iterable of boundary conditions to apply on ``x`` and ``y``. """ self._mult_callable = mult_callable self._x = x self._y = y Vrow = y.function_space() Vcol = x.function_space() self.on_diag = Vrow == Vcol self.row_bcs = tuple(bc for bc in bcs if bc.function_space() == Vrow) if self.on_diag: self.col_bcs = self.row_bcs else: self.col_bcs = tuple(bc for bc in bcs if bc.function_space() == Vcol) def _op(self, action, X, Y, W=None): with self._y.dat.vec_wo as v: if W is None: v.zeroEntries() else: Y.copy(v) with self._x.dat.vec_wo as v: X.copy(v) for bc in self.col_bcs: bc.zero(self._x) action() if self.on_diag: if len(self.row_bcs) > 0: # TODO, can we avoid the copy? with self._x.dat.vec_wo as v: X.copy(v) for bc in self.row_bcs: bc.set(self._y, self._x) else: for bc in self.row_bcs: bc.zero(self._y) with self._y.dat.vec_ro as v: v.copy(Y if W is None else W) @PETSc.Log.EventDecorator() def mult(self, mat, X, Y): self._op(self._mult_callable, X, Y) @PETSc.Log.EventDecorator() def multAdd(self, mat, X, Y, W): self._op(self._mult_callable, X, Y, W) def is_restricted(finat_element): """Determine if an element is a restriction onto interior or facets""" tdim = finat_element.cell.get_dimension() idofs = len(finat_element.entity_dofs()[tdim][0]) is_interior = idofs == finat_element.space_dimension() is_facet = idofs == 0 return is_interior, is_facet def petsc_sparse(A_numpy, rtol=1E-10, comm=None): """Convert dense numpy matrix into a sparse PETSc matrix""" atol = rtol * abs(max(A_numpy.min(), A_numpy.max(), key=abs)) sparsity = abs(A_numpy) > atol nnz = numpy.count_nonzero(sparsity, axis=1).astype(PETSc.IntType) A = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) rows, cols = numpy.nonzero(sparsity) rows = rows.astype(PETSc.IntType) cols = cols.astype(PETSc.IntType) vals = A_numpy[sparsity] A.setValuesRCV(rows[:, None], cols[:, None], vals[:, None], PETSc.InsertMode.INSERT) A.assemble() return A def kron3(A, B, C, scale=None): """Returns scale * kron(A, kron(B, C))""" temp = B.kron(C) if scale is not None: temp.scale(scale) result = A.kron(temp) temp.destroy() return result def get_submat(A, isrow=None, iscol=None, permute=False): """Return the sub matrix A[isrow, iscol]""" needs_rows = isrow is None needs_cols = iscol is None if needs_rows and needs_cols: return A size = A.getSize() if needs_rows: isrow = PETSc.IS().createStride(size[0], step=1, comm=A.getComm()) if needs_cols: iscol = PETSc.IS().createStride(size[1], step=1, comm=A.getComm()) if permute: submat = A.permute(isrow, iscol) else: submat = A.createSubMatrix(isrow, iscol) if needs_rows: isrow.destroy() if needs_cols: iscol.destroy() return submat def block_mat(A_blocks, destroy_blocks=False): """Return a concrete Mat corresponding to a block matrix given as a list of lists. Optionally, destroys the input Mats if a new Mat is created.""" if len(A_blocks) == 1: if len(A_blocks[0]) == 1: return A_blocks[0][0] result = PETSc.Mat().createNest(A_blocks, comm=A_blocks[0][0].getComm()) # A nest Mat would not allow us to take matrix-matrix products result = result.convert(mat_type=A_blocks[0][0].getType()) if destroy_blocks: for row in A_blocks: for mat in row: mat.destroy() return result def mass_blocks(tdim, formdegree, B00, B11): """Construct mass block matrix on reference cell from 1D mass matrices B00 and B11. The 1D matrices may come with different test and trial spaces.""" if tdim == 1: B_diag = [B11 if formdegree else B00] elif tdim == 2: if formdegree == 0: B_diag = [B00.kron(B00)] elif formdegree == 1: B_diag = [B00.kron(B11), B11.kron(B00)] else: B_diag = [B11.kron(B11)] elif tdim == 3: if formdegree == 0: B_diag = [kron3(B00, B00, B00)] elif formdegree == 1: B_diag = [kron3(B00, B00, B11), kron3(B00, B11, B00), kron3(B11, B00, B00)] elif formdegree == 2: B_diag = [kron3(B00, B11, B11), kron3(B11, B00, B11), kron3(B11, B11, B00)] else: B_diag = [kron3(B11, B11, B11)] n = len(B_diag) if n == 1: return [B_diag] else: zero = PETSc.Mat() return [[B_diag[i] if i == j else zero for j in range(n)] for i in range(n)] def diff_blocks(tdim, formdegree, A00, A11, A10): """Construct exterior derivative block matrix on reference cell from 1D mass matrices A00 and A11, and exterior derivative moments A10. The 1D matrices may come with different test and trial spaces.""" if formdegree == tdim: ncols = A10.shape[0]**tdim zero = PETSc.Mat().createAIJ((1, ncols), nnz=(0, 0), comm=A10.getComm()) zero.assemble() A_blocks = [[zero]] elif tdim == 1: A_blocks = [[A10]] elif tdim == 2: if formdegree == 0: A_blocks = [[A00.kron(A10)], [A10.kron(A00)]] elif formdegree == 1: A_blocks = [[A10.kron(A11), A11.kron(A10)]] A_blocks[-1][-1].scale(-1) elif tdim == 3: if formdegree == 0: A_blocks = [[kron3(A00, A00, A10)], [kron3(A00, A10, A00)], [kron3(A10, A00, A00)]] elif formdegree == 1: zero = PETSc.Mat() A_blocks = [[kron3(A00, A10, A11, scale=-1), kron3(A00, A11, A10), zero], [kron3(A10, A00, A11, scale=-1), zero, kron3(A11, A00, A10)], [zero, kron3(A10, A11, A00), kron3(A11, A10, A00, scale=-1)]] elif formdegree == 2: A_blocks = [[kron3(A10, A11, A11, scale=-1), kron3(A11, A10, A11), kron3(A11, A11, A10)]] return A_blocks def tabulate_exterior_derivative(Vc, Vf, cbcs=[], fbcs=[], comm=None): """ Tabulate exterior derivative: Vc -> Vf as an explicit sparse matrix. Works for any tensor-product basis. These are the same matrices one needs for HypreAMS and friends. """ if comm is None: comm = Vf.comm ec = Vc.finat_element ef = Vf.finat_element if ef.formdegree - ec.formdegree != 1: raise ValueError("Expecting Vf = d(Vc)") if Vf.mesh().ufl_cell().is_simplex(): c0 = ec.fiat_equivalent f1 = ef.fiat_equivalent derivative = {ufl.H1: "grad", ufl.HCurl: "curl", ufl.HDiv: "div"}[Vc.ufl_element().sobolev_space] Dhat = petsc_sparse(evaluate_dual(c0, f1, derivative), comm=COMM_SELF) else: elements = sorted(get_base_elements(ec), key=lambda e: e.formdegree) c0, c1 = elements[::len(elements)-1] elements = sorted(get_base_elements(ef), key=lambda e: e.formdegree) f0, f1 = elements[::len(elements)-1] if f0.formdegree != 0: f0 = None if c1.formdegree != 1: c1 = None tdim = Vc.mesh().topological_dimension() zero = PETSc.Mat() A00 = petsc_sparse(evaluate_dual(c0, f0), comm=COMM_SELF) if f0 else zero A11 = petsc_sparse(evaluate_dual(c1, f1), comm=COMM_SELF) if c1 else zero A10 = petsc_sparse(evaluate_dual(c0, f1, "grad"), comm=COMM_SELF) Dhat = block_mat(diff_blocks(tdim, ec.formdegree, A00, A11, A10), destroy_blocks=True) A00.destroy() A11.destroy() if Dhat != A10: A10.destroy() if any(is_restricted(ec)) or any(is_restricted(ef)): scalar_element = lambda e: e._sub_element if isinstance(e, (finat.ufl.TensorElement, finat.ufl.VectorElement)) else e fdofs = restricted_dofs(ef, create_element(unrestrict_element(scalar_element(Vf.ufl_element())))) cdofs = restricted_dofs(ec, create_element(unrestrict_element(scalar_element(Vc.ufl_element())))) temp = Dhat fises = PETSc.IS().createGeneral(fdofs, comm=temp.getComm()) cises = PETSc.IS().createGeneral(cdofs, comm=temp.getComm()) Dhat = temp.createSubMatrix(fises, cises) temp.destroy() fises.destroy() cises.destroy() if Vf.block_size > 1: temp = Dhat eye = petsc_sparse(numpy.eye(Vf.block_size, dtype=PETSc.RealType), comm=temp.getComm()) Dhat = temp.kron(eye) temp.destroy() eye.destroy() sizes = tuple(V.dof_dset.layout_vec.getSizes() for V in (Vf, Vc)) preallocator = PETSc.Mat().create(comm=comm) preallocator.setType(PETSc.Mat.Type.PREALLOCATOR) preallocator.setSizes(sizes) preallocator.setUp() kernel = ElementKernel(Dhat, name="exterior_derivative").kernel() indices = tuple(op2.Dat(V.dof_dset, V.local_to_global_map(bcs).indices)(op2.READ, V.cell_node_map()) for V, bcs in zip((Vf, Vc), (fbcs, cbcs))) assembler = op2.ParLoop(kernel, Vc.mesh().cell_set, *(op2.PassthroughArg(op2.OpaqueType("Mat"), m.handle) for m in (preallocator, Dhat)), *indices) assembler() preallocator.assemble() nnz = get_preallocation(preallocator, sizes[0][0]) preallocator.destroy() Dmat = PETSc.Mat().createAIJ(sizes, Vf.block_size, nnz=nnz, comm=comm) Dmat.setOption(PETSc.Mat.Option.NEW_NONZERO_ALLOCATION_ERR, True) assembler.arguments[0].data = Dmat.handle assembler() Dmat.assemble() Dhat.destroy() return Dmat def restrict_element(ele, restriction_domain): """Get an element that is not restricted and return the restricted element.""" if isinstance(ele, finat.ufl.VectorElement): return type(ele)(restrict_element(ele._sub_element, restriction_domain), dim=ele.num_sub_elements) elif isinstance(ele, finat.ufl.TensorElement): return type(ele)(restrict_element(ele._sub_element, restriction_domain), shape=ele._shape, symmetry=ele.symmetry()) elif isinstance(ele, finat.ufl.MixedElement): return type(ele)(*(restrict_element(e, restriction_domain) for e in ele.sub_elements)) else: return ele[restriction_domain] def unrestrict_element(ele): """Get an element that might or might not be restricted and return the parent unrestricted element.""" if isinstance(ele, finat.ufl.VectorElement): return type(ele)(unrestrict_element(ele._sub_element), dim=ele.num_sub_elements) elif isinstance(ele, finat.ufl.TensorElement): return type(ele)(unrestrict_element(ele._sub_element), shape=ele._shape, symmetry=ele.symmetry()) elif isinstance(ele, finat.ufl.MixedElement): return type(ele)(*(unrestrict_element(e) for e in ele.sub_elements)) elif isinstance(ele, finat.ufl.RestrictedElement): return unrestrict_element(ele._element) else: return ele def get_base_elements(e): if isinstance(e, finat.EnrichedElement): return list(chain.from_iterable(map(get_base_elements, e.elements))) elif isinstance(e, finat.TensorProductElement): return list(chain.from_iterable(map(get_base_elements, e.factors))) elif isinstance(e, finat.FlattenedDimensions): return get_base_elements(e.product) elif isinstance(e, (finat.HCurlElement, finat.HDivElement)): return get_base_elements(e.wrappee) elif isinstance(e, finat.finiteelementbase.FiniteElementBase): return get_base_elements(e.fiat_equivalent) elif isinstance(e, FIAT.RestrictedElement): return get_base_elements(e._element) return [e] class SparseAssembler: """Class to generate and cache python wrappers to insert sparse element matrices directly with PETSc C code.""" _cache = {} @staticmethod def setSubMatCSR(comm, triu=False): """ Compile C code to insert sparse submatrices and store in class cache :arg triu: are we inserting onto the upper triangular part of the matrix? :returns: a python wrapper for the matrix insertion function """ cache = SparseAssembler._cache.setdefault("setSubMatCSR", {}) key = (id(comm), triu) try: return cache[key] except KeyError: return cache.setdefault(key, SparseAssembler.load_setSubMatCSR(comm, triu)) @staticmethod def load_c_code(code, name, comm, argtypes, restype): petsc_dir = get_petsc_dir() cppargs = [f"-I{d}/include" for d in petsc_dir] ldargs = ([f"-L{d}/lib" for d in petsc_dir] + [f"-Wl,-rpath,{d}/lib" for d in petsc_dir] + ["-lpetsc", "-lm"]) dll = load(code, "c", cppargs=cppargs, ldargs=ldargs, comm=comm) fn = getattr(dll, name) fn.argtypes = argtypes fn.restype = restype return fn @staticmethod def load_setSubMatCSR(comm, triu=False): """Insert one sparse matrix into another sparse matrix. Done in C for efficiency, since it loops over rows.""" if triu: name = "setSubMatCSR_SBAIJ" select_cols = "icol -= (icol < irow) * (1 + icol);" else: name = "setSubMatCSR_AIJ" select_cols = "" code = dedent(f""" #include <petsc.h> PetscErrorCode {name}(Mat A, Mat B, PetscInt *rindices, PetscInt *cindices, InsertMode addv) {{ PetscInt m, ncols, irow, icol; PetscInt *indices, *cols; PetscScalar *vals; PetscCall(MatGetSize(B, &m, NULL)); PetscCall(MatSeqAIJGetMaxRowNonzeros(B, &ncols)); PetscCall(PetscMalloc1(ncols, &indices)); for (PetscInt i = 0; i < m; i++) {{ PetscCall(MatGetRow(B, i, &ncols, &cols, &vals)); irow = rindices[i]; for (PetscInt j = 0; j < ncols; j++) {{ icol = cindices[cols[j]]; {select_cols} indices[j] = icol; }} PetscCall(MatSetValues(A, 1, &irow, ncols, indices, vals, addv)); PetscCall(MatRestoreRow(B, i, &ncols, &cols, &vals)); }} PetscCall(PetscFree(indices)); return PETSC_SUCCESS; }} """) argtypes = [ctypes.c_voidp, ctypes.c_voidp, ctypes.c_voidp, ctypes.c_voidp, ctypes.c_int] funptr = SparseAssembler.load_c_code(code, name, comm=comm, argtypes=argtypes, restype=ctypes.c_int) @PETSc.Log.EventDecorator(name) def wrapper(A, B, rows, cols, addv): return funptr(A.handle, B.handle, rows.ctypes.data, cols.ctypes.data, addv) return wrapper
[docs] class PoissonFDMPC(FDMPC): """ A preconditioner for tensor-product elements that changes the shape functions so that the H^1 Riesz map is sparse in the interior of a Cartesian cell, and assembles a global sparse matrix on which other preconditioners, such as `ASMStarPC`, can be applied. Here we assume that the volume integrals in the Jacobian can be expressed as: inner(grad(v), alpha(grad(u)))*dx + inner(v, beta(u))*dx where alpha and beta are possibly tensor-valued. The sparse matrix is obtained by approximating alpha and beta by cell-wise constants and discarding the coefficients in alpha that couple together mixed derivatives and mixed components. For spaces that are not H^1-conforming, this preconditioner will use the symmetric interior-penalty DG method. The penalty coefficient can be provided in the application context, keyed on ``"eta"``. """ _variant = "fdm_ipdg" _citation = "Brubeck2022"
[docs] def assemble_reference_tensor(self, V): try: _, line_elements, shifts = get_permutation_to_nodal_elements(V) except ValueError: raise ValueError("FDMPC does not support the element %s" % V.ufl_element()) line_elements, = line_elements axes_shifts, = shifts degree = max(e.degree() for e in line_elements) eta = float(self.appctx.get("eta", degree*(degree+1))) element = V.finat_element is_dg = element.entity_dofs() == element.entity_closure_dofs() Afdm = [] # sparse interval mass and stiffness matrices for each direction Dfdm = [] # tabulation of normal derivatives at the boundary for each direction bdof = [] # indices of point evaluation dofs for each direction cache = self._cache.setdefault("ipdg_reference_tensor", {}) for e in line_elements: key = (e.degree(), eta) try: rtensor = cache[key] except KeyError: rtensor = cache.setdefault(key, fdm_setup_ipdg(e, eta, comm=COMM_SELF)) Afdm[:0], Dfdm[:0], bdof[:0] = tuple(zip(rtensor)) if not is_dg and e.degree() == degree: # do not apply SIPG along continuous directions Dfdm[0] = None return Afdm, Dfdm, bdof, axes_shifts
[docs] @PETSc.Log.EventDecorator("FDMSetValues") def set_values(self, A, Vrow, Vcol, mat_type=None): """Assemble the stiffness matrix in the FDM basis using Kronecker products of interval matrices. Parameters ---------- A : PETSc.Mat The (initialized) matrix to assemble. Vrow : FunctionSpace The test space. Vcol : FunctionSpace The trial space. mat_type : PETSc.Mat.Type The matrix type of auxiliary operator. This only used when ``A`` is a preallocator to determine the nonzeros on the upper triangual part of an ``'sbaij'`` matrix. """ if mat_type is None: mat_type = A.getType() triu = A.getType() == "preallocator" and mat_type.endswith("sbaij") set_submat = SparseAssembler.setSubMatCSR(COMM_SELF, triu=triu) update_A = lambda A, Ae, rindices: set_submat(A, Ae, rindices, rindices, addv) condense_element_mat = lambda x: x addv = PETSc.InsertMode.ADD_VALUES def cell_to_global(lgmap, cell_to_local, cell_index, result=None): # Be careful not to create new arrays result = cell_to_local(cell_index, result=result) return lgmap.apply(result, result=result) cell_to_local, nel = extrude_node_map(Vrow.cell_node_map(), bsize=Vrow.block_size) get_rindices = partial(cell_to_global, self.lgmaps[Vrow], cell_to_local) Afdm, Dfdm, bdof, axes_shifts = self.assemble_reference_tensor(Vrow) Gq = self.coefficients.get("alpha") Bq = self.coefficients.get("beta") bcflags = self.coefficients.get("bcflags") Gq_facet = self.coefficients.get("Gq_facet") PT_facet = self.coefficients.get("PT_facet") V = Vrow bsize = V.block_size ncomp = V.ufl_element().reference_value_size sdim = (V.finat_element.space_dimension() * bsize) // ncomp # dimension of a single component tdim = V.mesh().topological_dimension() shift = axes_shifts * bsize index_coef, _ = extrude_node_map((Gq or Bq).cell_node_map()) index_bc, _ = extrude_node_map(bcflags.cell_node_map()) flag2id = numpy.kron(numpy.eye(tdim, tdim, dtype=PETSc.IntType), [[1], [2]]) # pshape is the shape of the DOFs in the tensor product pshape = tuple(Ak[0].size[0] for Ak in Afdm) static_condensation = False if sdim != numpy.prod(pshape): static_condensation = True if set(shift) != {0}: assert ncomp == tdim pshape = [tuple(numpy.roll(pshape, -shift[k])) for k in range(ncomp)] # assemble zero-th order term separately, including off-diagonals (mixed components) # I cannot do this for hdiv elements as off-diagonals are not sparse, this is because # the FDM eigenbases for CG(k) and CG(k-1) are not orthogonal to each other use_diag_Bq = Bq is None or len(Bq.ufl_shape) != 2 or static_condensation rindices = None if not use_diag_Bq: bshape = Bq.ufl_shape # Be = Bhat kron ... kron Bhat Be = Afdm[0][0].copy() for k in range(1, tdim): Be = Be.kron(Afdm[k][0]) aptr = numpy.arange(0, (bshape[0]+1)*bshape[1], bshape[1], dtype=PETSc.IntType) aidx = numpy.tile(numpy.arange(bshape[1], dtype=PETSc.IntType), bshape[0]) for e in range(nel): # Ae = Be kron Bq[e] adata = numpy.sum(Bq.dat.data_ro[index_coef(e)], axis=0) Ae = PETSc.Mat().createAIJWithArrays(bshape, (aptr, aidx, adata), comm=COMM_SELF) Ae = Be.kron(Ae) rindices = get_rindices(e, result=rindices) update_A(A, Ae, rindices) Ae.destroy() Be.destroy() Bq = None # assemble the second order term and the zero-th order term if any, # discarding mixed derivatives and mixed components ae = numpy.zeros((ncomp, tdim), dtype=PETSc.RealType) be = numpy.zeros((ncomp,), dtype=PETSc.RealType) je = None for e in range(nel): je = index_coef(e, result=je) bce = bcflags.dat.data_ro_with_halos[index_bc(e)] > 1E-8 # get coefficients on this cell if Gq is not None: ae[:] = numpy.sum(Gq.dat.data_ro[je], axis=0) if Bq is not None: be[:] = numpy.sum(Bq.dat.data_ro[je], axis=0) rindices = get_rindices(e, result=rindices) rows = numpy.reshape(rindices, (-1, bsize)) rows = numpy.transpose(rows) rows = numpy.reshape(rows, (ncomp, -1)) # for each component: compute the stiffness matrix Ae for k in range(ncomp): # permutation of axes with respect to the first vector component axes = numpy.roll(numpy.arange(tdim), -shift[k]) bck = bce[:, k] if len(bce.shape) == 2 else bce fbc = numpy.dot(bck, flag2id) if Gq is not None: # Ae = ae[k][0] Ahat + be[k] Bhat Be = Afdm[axes[0]][0].copy() Ae = Afdm[axes[0]][1+fbc[0]].copy() Ae.scale(ae[k][0]) if Bq is not None: Ae.axpy(be[k], Be) if tdim > 1: # Ae = Ae kron Bhat + ae[k][1] Bhat kron Ahat Ae = Ae.kron(Afdm[axes[1]][0]) if Gq is not None: Ae.axpy(ae[k][1], Be.kron(Afdm[axes[1]][1+fbc[1]])) if tdim > 2: # Ae = Ae kron Bhat + ae[k][2] Bhat kron Bhat kron Ahat Be = Be.kron(Afdm[axes[1]][0]) Ae = Ae.kron(Afdm[axes[2]][0]) if Gq is not None: Ae.axpy(ae[k][2], Be.kron(Afdm[axes[2]][1+fbc[2]])) Be.destroy() elif Bq is not None: Ae = Afdm[axes[0]][0] for m in range(1, tdim): Ae = Ae.kron(Afdm[axes[m]][0]) Ae.scale(be[k]) Ae = condense_element_mat(Ae) update_A(A, Ae, rows[k].astype(PETSc.IntType)) Ae.destroy() # assemble SIPG interior facet terms if the normal derivatives have been set up if any(Dk is not None for Dk in Dfdm): if static_condensation: raise NotImplementedError("Static condensation for SIPG not implemented") if tdim < V.mesh().geometric_dimension(): raise NotImplementedError("SIPG on immersed meshes is not implemented") eta = float(self.appctx.get("eta")) lgmap = self.lgmaps[V] index_facet, local_facet_data, nfacets = extrude_interior_facet_maps(V) index_coef, _, _ = extrude_interior_facet_maps(Gq_facet or Gq) rows = numpy.zeros((2, sdim), dtype=PETSc.IntType) for e in range(nfacets): # for each interior facet: compute the SIPG stiffness matrix Ae ie = index_facet(e) je = numpy.reshape(index_coef(e), (2, -1)) lfd = local_facet_data(e) idir = lfd // 2 if PT_facet: icell = numpy.reshape(lgmap.apply(ie), (2, ncomp, -1)) iord0 = numpy.insert(numpy.delete(numpy.arange(tdim), idir[0]), 0, idir[0]) iord1 = numpy.insert(numpy.delete(numpy.arange(tdim), idir[1]), 0, idir[1]) je = je[[0, 1], lfd] Pfacet = PT_facet.dat.data_ro_with_halos[je] Gfacet = Gq_facet.dat.data_ro_with_halos[je] else: Gfacet = numpy.sum(Gq.dat.data_ro_with_halos[je], axis=1) for k in range(ncomp): axes = numpy.roll(numpy.arange(tdim), -shift[k]) Dfacet = Dfdm[axes[0]] if Dfacet is None: continue if PT_facet: k0 = iord0[k] if shift[1] != 1 else tdim-1-iord0[-k-1] k1 = iord1[k] if shift[1] != 1 else tdim-1-iord1[-k-1] Piola = Pfacet[[0, 1], [k0, k1]] mu = Gfacet[[0, 1], idir] else: if len(Gfacet.shape) == 3: mu = Gfacet[[0, 1], [k, k], idir] elif len(Gfacet.shape) == 2: mu = Gfacet[[0, 1], idir] else: mu = Gfacet offset = Dfacet.shape[0] Adense = numpy.zeros((2*offset, 2*offset), dtype=PETSc.RealType) dense_indices = [] for j, jface in enumerate(lfd): j0 = j * offset j1 = j0 + offset jj = j0 + bdof[axes[0]][jface % 2] dense_indices.append(jj) for i, iface in enumerate(lfd): i0 = i * offset i1 = i0 + offset ii = i0 + bdof[axes[0]][iface % 2] sij = 0.5E0 if i == j else -0.5E0 if PT_facet: smu = [sij*numpy.dot(numpy.dot(mu[0], Piola[i]), Piola[j]), sij*numpy.dot(numpy.dot(mu[1], Piola[i]), Piola[j])] else: smu = sij*mu Adense[ii, jj] += eta * sum(smu) Adense[i0:i1, jj] -= smu[i] * Dfacet[:, iface % 2] Adense[ii, j0:j1] -= smu[j] * Dfacet[:, jface % 2] Ae = numpy_to_petsc(Adense, dense_indices, diag=False) if tdim > 1: # assume that the mesh is oriented Ae = Ae.kron(Afdm[axes[1]][0]) if tdim > 2: Ae = Ae.kron(Afdm[axes[2]][0]) if bsize == ncomp: icell = numpy.reshape(lgmap.apply(k+bsize*ie), (2, -1)) rows[0] = pull_axis(icell[0], pshape, idir[0]) rows[1] = pull_axis(icell[1], pshape, idir[1]) else: assert pshape[k0][idir[0]] == pshape[k1][idir[1]] rows[0] = pull_axis(icell[0][k0], pshape[k0], idir[0]) rows[1] = pull_axis(icell[1][k1], pshape[k1], idir[1]) update_A(A, Ae, rows) Ae.destroy()
[docs] def condense(self, A, J, bcs, fcp): return A, {}
[docs] @PETSc.Log.EventDecorator("FDMCoefficients") def assemble_coefficients(self, J, fcp): from firedrake.assemble import get_assembler coefficients = {} assembly_callables = [] args_J = J.arguments() V = args_J[-1].function_space() mesh = V.mesh() tdim = mesh.topological_dimension() Finv = ufl.JacobianInverse(mesh) degree, = set(as_tuple(V.ufl_element().degree())) quad_deg = fcp.get("degree", 2*degree+1) dx = ufl.dx(degree=quad_deg, domain=mesh) family = "Discontinuous Lagrange" if tdim == 1 else "DQ" DG = finat.ufl.FiniteElement(family, mesh.ufl_cell(), degree=0) # extract coefficients directly from the bilinear form integrals_J = J.integrals_by_type("cell") mapping = args_J[0].ufl_element().mapping().lower() Piola = get_piola_tensor(mapping, mesh) # get second order coefficient ref_grad = [ufl.variable(ufl.grad(t)) for t in args_J] if Piola: replace_grad = {ufl.grad(t): ufl.dot(Piola, ufl.dot(dt, Finv)) for t, dt in zip(args_J, ref_grad)} else: replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} alpha = expand_derivatives(sum([ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), ref_grad[0]), ref_grad[1]) for i in integrals_J])) # discard mixed derivatives and mixed components if len(alpha.ufl_shape) == 2: alpha = ufl.diag_vector(alpha) else: ashape = alpha.ufl_shape ashape = ashape[:len(ashape)//2] alpha = ufl.as_tensor(numpy.reshape([alpha[i+i] for i in numpy.ndindex(ashape)], (ashape[0], -1))) # assemble second order coefficient if not isinstance(alpha, ufl.constantvalue.Zero): Q = FunctionSpace(mesh, finat.ufl.TensorElement(DG, shape=alpha.ufl_shape)) tensor = coefficients.setdefault("alpha", Function(Q.dual())) assembly_callables.append(partial(get_assembler(ufl.inner(TestFunction(Q), alpha)*dx, form_compiler_parameters=fcp).assemble, tensor=tensor)) # get zero-th order coefficent ref_val = [ufl.variable(t) for t in args_J] if Piola: dummy_element = finat.ufl.TensorElement(family, cell=mesh.ufl_cell(), degree=1, shape=Piola.ufl_shape) dummy_Piola = ufl.Coefficient(ufl.FunctionSpace(mesh, dummy_element)) replace_val = {t: ufl.dot(dummy_Piola, s) for t, s in zip(args_J, ref_val)} else: replace_val = {t: s for t, s in zip(args_J, ref_val)} beta = expand_derivatives(sum(ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_val), ref_val[0]), ref_val[1]) for i in integrals_J)) if Piola: beta = ufl.replace(beta, {dummy_Piola: Piola}) # assemble zero-th order coefficient if not isinstance(beta, ufl.constantvalue.Zero): if Piola: # keep diagonal beta = ufl.diag_vector(beta) Q = FunctionSpace(mesh, finat.ufl.TensorElement(DG, shape=beta.ufl_shape) if beta.ufl_shape else DG) tensor = coefficients.setdefault("beta", Function(Q.dual())) assembly_callables.append(partial(get_assembler(ufl.inner(TestFunction(Q), beta)*dx, form_compiler_parameters=fcp).assemble, tensor=tensor)) family = "CG" if tdim == 1 else "DGT" degree = 1 if tdim == 1 else 0 DGT = finat.ufl.BrokenElement(finat.ufl.FiniteElement(family, cell=mesh.ufl_cell(), degree=degree)) if Piola: # make DGT functions with the second order coefficient # and the Piola tensor for each side of each facet extruded = mesh.cell_set._extruded dS_int = ufl.dS_h(degree=quad_deg) + ufl.dS_v(degree=quad_deg) if extruded else ufl.dS(degree=quad_deg) area = ufl.FacetArea(mesh) ifacet_inner = lambda v, u: ((ufl.inner(v('+'), u('+')) + ufl.inner(v('-'), u('-')))/area)*dS_int replace_grad = {ufl.grad(t): ufl.dot(dt, Finv) for t, dt in zip(args_J, ref_grad)} alpha = expand_derivatives(sum(ufl.diff(ufl.diff(ufl.replace(i.integrand(), replace_grad), ref_grad[0]), ref_grad[1]) for i in integrals_J)) G = alpha G = ufl.as_tensor([[[G[i, k, j, k] for i in range(G.ufl_shape[0])] for j in range(G.ufl_shape[2])] for k in range(G.ufl_shape[3])]) G = G * abs(ufl.JacobianDeterminant(mesh)) Q = FunctionSpace(mesh, finat.ufl.TensorElement(DGT, shape=G.ufl_shape)) tensor = coefficients.setdefault("Gq_facet", Function(Q.dual())) assembly_callables.append(partial(get_assembler(ifacet_inner(TestFunction(Q), G), form_compiler_parameters=fcp).assemble, tensor=tensor)) PT = Piola.T Q = FunctionSpace(mesh, finat.ufl.TensorElement(DGT, shape=PT.ufl_shape)) tensor = coefficients.setdefault("PT_facet", Function(Q.dual())) assembly_callables.append(partial(get_assembler(ifacet_inner(TestFunction(Q), PT), form_compiler_parameters=fcp).assemble, tensor=tensor)) # make DGT functions with BC flags shape = V.ufl_element().reference_value_shape Q = FunctionSpace(mesh, finat.ufl.TensorElement(DGT, shape=shape) if shape else DGT) test = TestFunction(Q) ref_args = [ufl.variable(t) for t in args_J] replace_args = {t: s for t, s in zip(args_J, ref_args)} forms = [] md = {"quadrature_degree": 0} for it in J.integrals(): itype = it.integral_type() if itype.startswith("exterior_facet"): beta = ufl.diff(ufl.diff(ufl.replace(it.integrand(), replace_args), ref_args[0]), ref_args[1]) beta = expand_derivatives(beta) if beta.ufl_shape: beta = ufl.diag_vector(beta) ds_ext = ufl.Measure(itype, domain=mesh, subdomain_id=it.subdomain_id(), metadata=md) forms.append(ufl.inner(test, beta)*ds_ext) tensor = coefficients.setdefault("bcflags", Function(Q.dual())) if len(forms): form = sum(forms) if len(form.arguments()) == 1: assembly_callables.append(partial(get_assembler(form, form_compiler_parameters=fcp).assemble, tensor=tensor)) # set arbitrary non-zero coefficients for preallocation for coef in coefficients.values(): with coef.dat.vec as cvec: cvec.set(1.0E0) return coefficients, assembly_callables
def get_piola_tensor(mapping, domain): tdim = domain.topological_dimension() if mapping == 'identity': return None elif mapping == 'covariant piola': return ufl.JacobianInverse(domain).T * ufl.as_tensor(numpy.flipud(numpy.identity(tdim))) elif mapping == 'contravariant piola': sign = ufl.diag(ufl.as_tensor([-1]+[1]*(tdim-1))) return ufl.Jacobian(domain)*sign/ufl.JacobianDeterminant(domain) else: raise NotImplementedError("Unsupported element mapping %s" % mapping) def pull_axis(x, pshape, idir): """permute x by reshaping into pshape and moving axis idir to the front""" return numpy.reshape(numpy.moveaxis(numpy.reshape(x.copy(), pshape), idir, 0), x.shape) def numpy_to_petsc(A_numpy, dense_indices, diag=True, block=False, comm=None): """ Create a SeqAIJ Mat from a dense matrix using the diagonal and a subset of rows and columns. If dense_indices is empty, then also include the off-diagonal corners of the matrix. """ n = A_numpy.shape[0] nbase = int(diag) if block else min(n, int(diag) + len(dense_indices)) nnz = numpy.full((n,), nbase, dtype=PETSc.IntType) nnz[dense_indices] = len(dense_indices) if block else n imode = PETSc.InsertMode.INSERT A_petsc = PETSc.Mat().createAIJ(A_numpy.shape, nnz=(nnz, 0), comm=comm) idx = numpy.arange(n, dtype=PETSc.IntType) if block: values = A_numpy[dense_indices, :][:, dense_indices] A_petsc.setValues(dense_indices, dense_indices, values, imode) else: for j in dense_indices: A_petsc.setValues(j, idx, A_numpy[j, :], imode) A_petsc.setValues(idx, j, A_numpy[:, j], imode) if diag: idx = idx[:, None] values = A_numpy.diagonal()[:, None] A_petsc.setValuesRCV(idx, idx, values, imode) A_petsc.assemble() return A_petsc def fdm_setup_ipdg(fdm_element, eta, comm=None): """ Setup for the fast diagonalisation method for the IP-DG formulation. Compute sparsified interval stiffness and mass matrices and tabulate the normal derivative of the shape functions. :arg fdm_element: a :class:`FIAT.FDMElement` :arg eta: penalty coefficient as a `float` :arg comm: an mpi4py communicator :returns: 3-tuple of: Afdm: a list of :class:`PETSc.Mats` with the sparse interval matrices Bhat, and bcs(Ahat) for every combination of either natural or weak Dirichlet BCs on each endpoint. Dfdm: the tabulation of the normal derivatives of the Dirichlet eigenfunctions. bdof: the indices of the vertex degrees of freedom. """ ref_el = fdm_element.get_reference_element() degree = fdm_element.degree() rule = FIAT.quadrature.make_quadrature(ref_el, degree+1) edof = fdm_element.entity_dofs() bdof = edof[0][0] + edof[0][1] phi = fdm_element.tabulate(1, rule.get_points()) Jhat = phi[(0, )] Dhat = phi[(1, )] Ahat = numpy.dot(numpy.multiply(Dhat, rule.get_weights()), Dhat.T) Bhat = numpy.dot(numpy.multiply(Jhat, rule.get_weights()), Jhat.T) # Facet normal derivatives basis = fdm_element.tabulate(1, ref_el.get_vertices()) Dfacet = basis[(1,)] Dfacet[:, 0] = -Dfacet[:, 0] Afdm = [numpy_to_petsc(Bhat, bdof, block=True, comm=comm)] for bc in range(4): bcs = (bc % 2, bc//2) Abc = Ahat.copy() for k in range(2): if bcs[k] == 1: j = bdof[k] Abc[:, j] -= Dfacet[:, k] Abc[j, :] -= Dfacet[:, k] Abc[j, j] += eta Afdm.append(numpy_to_petsc(Abc, bdof, comm=comm)) return Afdm, Dfacet, bdof def extrude_interior_facet_maps(V): """ Extrude V.interior_facet_node_map and V.mesh().interior_facets.local_facet_dat :arg V: a :class:`.FunctionSpace` :returns: the 3-tuple of facet_to_nodes_fun: maps interior facets to the nodes of the two cells sharing it, local_facet_data_fun: maps interior facets to the local facet numbering in the two cells sharing it, nfacets: the total number of interior facets owned by this process """ if isinstance(V, (Function, Cofunction)): V = V.function_space() mesh = V.mesh() intfacets = mesh.interior_facets facet_to_cells = intfacets.facet_cell_map.values local_facet_data = intfacets.local_facet_dat.data_ro facet_node_map = V.interior_facet_node_map() facet_to_nodes = facet_node_map.values nbase = facet_to_nodes.shape[0] if mesh.cell_set._extruded: facet_offset = facet_node_map.offset local_facet_data_h = numpy.array([5, 4], local_facet_data.dtype) cell_node_map = V.cell_node_map() cell_to_nodes = cell_node_map.values_with_halo cell_offset = cell_node_map.offset nelv = cell_node_map.values.shape[0] layers = facet_node_map.iterset.layers_array itype = cell_offset.dtype shift_h = numpy.array([[0], [1]], itype) if mesh.variable_layers: nv = 0 to_base = [] to_layer = [] for f, cells in enumerate(facet_to_cells): istart = max(layers[cells, 0]) iend = min(layers[cells, 1]) nz = iend-istart-1 nv += nz to_base.append(numpy.full((nz,), f, itype)) to_layer.append(numpy.arange(nz, dtype=itype)) nh = layers[:, 1]-layers[:, 0]-2 to_base.append(numpy.repeat(numpy.arange(len(nh), dtype=itype), nh)) to_layer += [numpy.arange(nf, dtype=itype) for nf in nh] to_base = numpy.concatenate(to_base) to_layer = numpy.concatenate(to_layer) nfacets = nv + sum(nh[:nelv]) local_facet_data_fun = lambda e: local_facet_data[to_base[e]] if e < nv else local_facet_data_h facet_to_nodes_fun = lambda e: facet_to_nodes[to_base[e]] + to_layer[e]*facet_offset if e < nv else numpy.reshape(cell_to_nodes[to_base[e]] + numpy.kron(to_layer[e]+shift_h, cell_offset), (-1,)) else: nelz = layers[0, 1]-layers[0, 0]-1 nv = nbase * nelz nh = nelv * (nelz-1) nfacets = nv + nh local_facet_data_fun = lambda e: local_facet_data[e//nelz] if e < nv else local_facet_data_h facet_to_nodes_fun = lambda e: facet_to_nodes[e//nelz] + (e % nelz)*facet_offset if e < nv else numpy.reshape(cell_to_nodes[(e-nv)//(nelz-1)] + numpy.kron(((e-nv) % (nelz-1))+shift_h, cell_offset), (-1,)) else: facet_to_nodes_fun = lambda e: facet_to_nodes[e] local_facet_data_fun = lambda e: local_facet_data[e] nfacets = nbase return facet_to_nodes_fun, local_facet_data_fun, nfacets def extrude_node_map(node_map, bsize=1): """ Construct a (possibly vector-valued) cell to node map from an un-extruded scalar map. :arg node_map: a :class:`pyop2.Map` mapping entities to their local dofs, including ghost entities. :arg bsize: the block size :returns: a 2-tuple with the cell to node map and the number of cells owned by this process """ nel = node_map.values.shape[0] if node_map.offset is None: def _scalar_map(map_values, e, result=None): if result is None: result = numpy.empty_like(map_values[e]) numpy.copyto(result, map_values[e]) return result scalar_map = partial(_scalar_map, node_map.values_with_halo) else: layers = node_map.iterset.layers_array if layers.shape[0] == 1: def _scalar_map(map_values, offset, nelz, e, result=None): if result is None: result = numpy.empty_like(offset) numpy.copyto(result, offset) result *= (e % nelz) result += map_values[e // nelz] return result nelz = layers[0, 1]-layers[0, 0]-1 nel *= nelz scalar_map = partial(_scalar_map, node_map.values_with_halo, node_map.offset, nelz) else: def _scalar_map(map_values, offset, to_base, to_layer, e, result=None): if result is None: result = numpy.empty_like(offset) numpy.copyto(result, offset) result *= to_layer[e] result += map_values[to_base[e]] return result nelz = layers[:, 1]-layers[:, 0]-1 nel = sum(nelz[:nel]) to_base = numpy.repeat(numpy.arange(node_map.values_with_halo.shape[0], dtype=node_map.offset.dtype), nelz) to_layer = numpy.concatenate([numpy.arange(nz, dtype=node_map.offset.dtype) for nz in nelz]) scalar_map = partial(_scalar_map, node_map.values_with_halo, node_map.offset, to_base, to_layer) if bsize == 1: return scalar_map, nel def vector_map(bsize, ibase, e, result=None): index = None if result is not None: index = result[:, 0] index = scalar_map(e, result=index) index *= bsize return numpy.add.outer(index, ibase, out=result) ibase = numpy.arange(bsize, dtype=node_map.values.dtype) return partial(vector_map, bsize, ibase), nel