Source code for firedrake.slate.slac.compiler

"""This is Slate's Linear Algebra Compiler. This module is
responsible for generating C kernel functions representing
symbolic linear algebra expressions written in Slate.

This linear algebra compiler uses both Firedrake's form compiler,
the Two-Stage Form Compiler (TSFC) and loopy. TSFC provides this
compiler with appropriate kernel functions (in C) for evaluating integral
expressions (finite element variational forms written in UFL).
"""
import time
from hashlib import md5

from firedrake_citations import Citations
from firedrake.tsfc_interface import SplitKernel, KernelInfo, TSFCKernel

from firedrake.slate.slac.kernel_builder import LocalLoopyKernelBuilder
from firedrake.slate.slac.utils import slate_to_gem, merge_loopy
from firedrake.slate.slac.optimise import optimise

from firedrake import tsfc_interface
from firedrake.logging import logger
from firedrake.parameters import parameters
from firedrake.petsc import get_petsc_variables
from firedrake.utils import complex_mode
from gem import impero_utils
from itertools import chain

from pyop2.utils import get_petsc_dir
from pyop2.mpi import COMM_WORLD
from pyop2.codegen.rep2loopy import SolveCallable, INVCallable

import firedrake.slate.slate as slate
import numpy as np
import loopy
import gem
from gem import indices as make_indices
from tsfc.kernel_args import OutputKernelArg, CoefficientKernelArg
from tsfc.loopy import generate as generate_loopy
import copy

from petsc4py import PETSc

__all__ = ['compile_expression']

GREEN = "\033[1;37;32m%s\033[0m"


try:
    PETSC_DIR, PETSC_ARCH = get_petsc_dir()
except ValueError:
    PETSC_DIR, = get_petsc_dir()
    PETSC_ARCH = None

BLASLAPACK_LIB = None
BLASLAPACK_INCLUDE = None
if COMM_WORLD.rank == 0:
    petsc_variables = get_petsc_variables()
    BLASLAPACK_LIB = petsc_variables.get("BLASLAPACK_LIB", "")
    BLASLAPACK_LIB = COMM_WORLD.bcast(BLASLAPACK_LIB, root=0)
    BLASLAPACK_INCLUDE = petsc_variables.get("BLASLAPACK_INCLUDE", "")
    BLASLAPACK_INCLUDE = COMM_WORLD.bcast(BLASLAPACK_INCLUDE, root=0)
else:
    BLASLAPACK_LIB = COMM_WORLD.bcast(None, root=0)
    BLASLAPACK_INCLUDE = COMM_WORLD.bcast(None, root=0)

cell_to_facets_dtype = np.dtype(np.int8)


class SlateKernel(TSFCKernel):
    @classmethod
    def _cache_key(cls, expr, compiler_parameters):
        return md5(
            (expr.expression_hash + str(sorted(compiler_parameters.items()))).encode()).hexdigest(), expr.ufl_domains()[0].comm

    def __init__(self, expr, compiler_parameters):
        if self._initialized:
            return
        self.split_kernel = generate_loopy_kernel(expr, compiler_parameters)
        self._initialized = True


[docs] def compile_expression(slate_expr, compiler_parameters=None): """Takes a Slate expression `slate_expr` and returns the appropriate ``pyop2.op2.Kernel`` object representing the Slate expression. :arg slate_expr: a :class:`~.Tensor` expression. :arg tsfc_parameters: an optional `dict` of form compiler parameters to be passed to TSFC during the compilation of ufl forms. Returns: A ``tuple`` containing a ``SplitKernel(idx, kinfo)`` """ if complex_mode: raise NotImplementedError("SLATE doesn't work in complex mode yet") if not isinstance(slate_expr, slate.TensorBase): raise ValueError("Expecting a `TensorBase` object, not %s" % type(slate_expr)) # Update default parameters with passed parameters # The deepcopy is needed because parameters is a nested dict params = copy.deepcopy(parameters) if compiler_parameters and "slate_compiler" in compiler_parameters.keys(): params["slate_compiler"].update(compiler_parameters.pop("slate_compiler")) if compiler_parameters: params["form_compiler"].update(compiler_parameters) # If the expression has already been symbolically compiled, then # simply reuse the produced kernel. cache = slate_expr._metakernel_cache key = str(sorted(params.items())) try: return cache[key] except KeyError: kernel = SlateKernel(slate_expr, params).split_kernel return cache.setdefault(key, kernel)
def get_temp_info(loopy_kernel): """Get information about temporaries in loopy kernel. Returns memory in bytes and number of temporaries. """ mems = [temp.nbytes for temp in loopy_kernel.temporary_variables.values()] mem_total = sum(mems) num_temps = len(loopy_kernel.temporary_variables) # Get number of temporaries of different shapes shapes = {} for temp in loopy_kernel.temporary_variables.values(): shape = temp.shape if temp.storage_shape is not None: shape = temp.storage_shape shapes[len(shape)] = shapes.get(len(shape), 0) + 1 return mem_total, num_temps, mems, shapes def generate_loopy_kernel(slate_expr, compiler_parameters=None): cpu_time = time.time() if len(slate_expr.ufl_domains()) > 1: raise NotImplementedError("Multiple domains not implemented.") Citations().register("Gibson2018") orig_expr = slate_expr # Optimise slate expr, e.g. push blocks as far inward as possible if compiler_parameters["slate_compiler"]["optimise"]: slate_expr = optimise(slate_expr, compiler_parameters["slate_compiler"]) # Create a loopy builder for the Slate expression, # e.g. contains the loopy kernels coming from TSFC gem_expr, var2terminal = slate_to_gem(slate_expr, compiler_parameters["slate_compiler"]) scalar_type = compiler_parameters["form_compiler"]["scalar_type"] (slate_loopy, slate_loopy_event), output_arg = gem_to_loopy(gem_expr, var2terminal, scalar_type) builder = LocalLoopyKernelBuilder(expression=slate_expr, tsfc_parameters=compiler_parameters["form_compiler"]) name = "slate_wrapper" loopy_merged, arguments, events = merge_loopy(slate_loopy, output_arg, builder, var2terminal, name) loopy_merged = loopy.register_callable(loopy_merged, INVCallable.name, INVCallable()) loopy_merged = loopy.register_callable(loopy_merged, SolveCallable.name, SolveCallable()) loopykernel = tsfc_interface.as_pyop2_local_kernel(loopy_merged, name, len(arguments), include_dirs=BLASLAPACK_INCLUDE.split(), ldargs=BLASLAPACK_LIB.split(), events=events+(slate_loopy_event,)) # map the coefficients in the order that PyOP2 needs orig_coeffs = orig_expr.coefficients() new_coeffs = slate_expr.coefficients() map_new_to_orig = [orig_coeffs.index(c) for c in new_coeffs] coefficient_numbers = tuple((map_new_to_orig[n], split_map) for (n, split_map) in slate_expr.coeff_map) coefficients = list(filter(lambda elm: isinstance(elm, CoefficientKernelArg), arguments)) # do the same for constants orig_constants = orig_expr.constants() new_constants = slate_expr.constants() constant_numbers = tuple(orig_constants.index(c) for c in new_constants) assert len(list(chain(*(map[1] for map in coefficient_numbers)))) == len(coefficients), \ "KernelInfo must be generated with a coefficient map that maps EXACTLY all coefficients that are in its arguments attribute." assert len(loopy_merged.callables_table[name].subkernel.args) - int(builder.bag.needs_mesh_layers) == len(arguments), \ "Outer loopy kernel must have the same amount of args as there are in arguments" kinfo = KernelInfo(kernel=loopykernel, integral_type="cell", # slate can only do things as contributions to the cell integrals oriented=builder.bag.needs_cell_orientations, subdomain_id=("otherwise",), domain_number=0, coefficient_numbers=coefficient_numbers, constant_numbers=constant_numbers, needs_cell_facets=builder.bag.needs_cell_facets, pass_layer_arg=builder.bag.needs_mesh_layers, needs_cell_sizes=builder.bag.needs_cell_sizes, arguments=arguments, events=events) # Cache the resulting kernel # Slate kernels are never split, so indicate that with None in the index slot. idx = tuple([None]*slate_expr.rank) logger.info(GREEN % "compile_slate_expression finished in %g seconds.", time.time() - cpu_time) return (SplitKernel(idx, kinfo),) def gem_to_loopy(gem_expr, var2terminal, scalar_type): """ Method encapsulating stage 2. Converts the gem expression dag into imperoc first, and then further into loopy. :return slate_loopy: 2-tuple of loopy kernel for slate operations and loopy GlobalArg for the output variable. """ # Creation of return variables for outer loopy shape = gem_expr.shape if len(gem_expr.shape) != 0 else (1,) idx = make_indices(len(shape)) indexed_gem_expr = gem.Indexed(gem_expr, idx) output_loopy_arg = loopy.GlobalArg("output", shape=shape, dtype=scalar_type, is_input=True, is_output=True) args = [output_loopy_arg] + [loopy.GlobalArg(var.name, shape=var.shape, dtype=scalar_type) for var in var2terminal.keys()] ret_vars = [gem.Indexed(gem.Variable("output", shape), idx)] preprocessed_gem_expr = impero_utils.preprocess_gem([indexed_gem_expr]) # glue assignments to return variable assignments = list(zip(ret_vars, preprocessed_gem_expr)) # Part A: slate to impero_c impero_c = impero_utils.compile_gem(assignments, (), remove_zeros=False) # Part B: impero_c to loopy output_arg = OutputKernelArg(output_loopy_arg) return generate_loopy(impero_c, args, scalar_type, "slate_loopy", [], log=PETSc.Log.isActive()), output_arg