Source code for firedrake.slate.slac.optimise

from gem.node import MemoizerArg
from functools import singledispatch
from itertools import repeat
from firedrake.slate.slate import *
from collections import namedtuple
from firedrake.ufl_expr import adjoint

""" ActionBag class
:arg coeff:     what we contract with.
:arg pick_op:   decides which argument in Tensor is exchanged against the coefficient
                and also in which operand the action has to be pushed,
                basically determins if we pre or postmultiply
"""
ActionBag = namedtuple("ActionBag", ["coeff", "pick_op"])


[docs] def flip(pick_op): """Flip an index. Using this function essentially reverses the order of multiplication.""" return pick_op ^ 1
[docs] def optimise(expression, parameters): """Optimises a Slate expression, by pushing blocks and multiplications inside the expression and by removing double transposes. :arg expression: A (potentially unoptimised) Slate expression. :arg parameters: A dict of compiler parameters. Returns: An optimised Slate expression """ # 0) Block optimisation expression = push_block(expression) # 1) DiagonalTensor optimisation expression = push_diag(expression) # 2) Multiplication optimisation if expression.rank < 2: expression = push_mul(expression, parameters) # 3) Transpose optimisation expression = drop_double_transpose(expression) return expression
[docs] def push_block(expression): """Executes a Slate compiler optimisation pass. The optimisation is achieved by pushing blocks from the outside to the inside of an expression. Without the optimisation the local TSFC kernels are assembled first and then the result of the assembly kernel gets indexed in the Slate kernel (and further linear algebra operations maybe done on it). The optimisation pass essentially changes the order of assembly and indexing. :arg expression: A (potentially unoptimised) Slate expression. Returns: An optimised Slate expression, where Blocks are terminal whereever possible. """ mapper = MemoizerArg(_push_block) ret = mapper(expression, ()) return ret
@singledispatch def _push_block(expr, self, indices): raise AssertionError("Cannot handle terminal type: %s" % type(expr)) @_push_block.register(Transpose) def _push_block_transpose(expr, self, indices): """Indices of the Blocks are transposed if Block is pushed into a Transpose.""" return Transpose(*map(self, expr.children, repeat(indices[::-1]))) if indices else expr @_push_block.register(Add) @_push_block.register(Negative) @_push_block.register(DiagonalTensor) @_push_block.register(Reciprocal) def _push_block_distributive(expr, self, indices): """Distributes Blocks for these nodes""" return type(expr)(*map(self, expr.children, repeat(indices))) @_push_block.register(Factorization) @_push_block.register(Inverse) @_push_block.register(Solve) @_push_block.register(Mul) def _push_block_stop(expr, self, indices): """Blocks cannot be pushed further into this set of nodes.""" expr = type(expr)(*map(self, expr.children, repeat(tuple()))) return Block(expr, indices) if indices else expr @_push_block.register(Tensor) def _push_block_tensor(expr, self, indices): """Turns a Block on a Tensor into a Tensor of an indexed form.""" return Tensor(Block(expr, indices).form) if indices else expr @_push_block.register(AssembledVector) def _push_block_assembled_vector(expr, self, indices): """Turns a Block on an AssembledVector into the specialized node BlockAssembledVector.""" return BlockAssembledVector(expr._function, expr, indices) if indices else expr @_push_block.register(Block) def _push_block_block(expr, self, indices): """Inlines Blocks into each other. Note that the indices are inlined from the ouside. Example: If we have got the Slate expression A.blocks[:3,:3].blocks[0,0], we encounter the (0,0)-blocks first. The first time round the indices are empty and the (0,0) are in expr._indices. The second time round, (0,0) is stored in indices and the slices are in expr._indices. So in the following line we basically say indices = ((0,1,2)[0], (0,1,2)[0]) """ reindexed = tuple(big[slice(small[0], small[-1]+1)] for big, small in zip(expr._indices, indices)) indices = expr._indices if not indices else reindexed block, = map(self, expr.children, repeat(indices)) return block
[docs] def push_diag(expression): """Executes a Slate compiler optimisation pass. The optimisation is achieved by pushing DiagonalTensor from the outside to the inside of an expression. :arg expression: A (potentially unoptimised) Slate expression. Returns: An optimised Slate expression, where DiagonalTensors are sitting on terminal tensors whereever possible. """ mapper = MemoizerArg(_push_diag) return mapper(expression, False)
@singledispatch def _push_diag(expr, self, diag): raise AssertionError("Cannot handle terminal type: %s" % type(expr)) @_push_diag.register(Transpose) @_push_diag.register(Add) @_push_diag.register(Negative) def _push_diag_distributive(expr, self, diag): """Distributes the DiagonalTensors into these nodes""" return type(expr)(*map(self, expr.children, repeat(diag))) @_push_diag.register(Factorization) @_push_diag.register(Inverse) @_push_diag.register(Solve) @_push_diag.register(Mul) @_push_diag.register(Tensor) def _push_diag_stop(expr, self, diag): """Diagonal Tensors cannot be pushed further into this set of nodes.""" expr = type(expr)(*map(self, expr.children, repeat(False))) if not expr.terminal else expr return DiagonalTensor(expr) if diag else expr @_push_diag.register(Block) def _push_diag_block(expr, self, diag): """Diagonal Tensors cannot be pushed further into this set of nodes.""" expr = type(expr)(*map(self, expr.children, repeat(False)), expr._indices) if not expr.terminal else expr return DiagonalTensor(expr) if diag else expr @_push_diag.register(AssembledVector) @_push_diag.register(Reciprocal) def _push_diag_vectors(expr, self, diag): """DiagonalTensors should not be pushed onto rank-1 tensors.""" if diag: raise AssertionError("It is not legal to define DiagonalTensors on rank-1 tensors.") else: return expr @_push_diag.register(DiagonalTensor) def _push_diag_diag(expr, self, diag): """DiagonalTensors are either pushed down or ignored when wrapped into another DiagonalTensor.""" return self(*expr.children, not diag)
[docs] def push_mul(tensor, options): """Executes a Slate compiler optimisation pass. The optimisation is achieved by pushing coefficients from the outside to the inside of an expression. The optimisation pass essentially changes the order of operations in the expressions so that only matrix-vector products are executed. :arg tensor: A (potentially unoptimised) Slate expression. :arg options: Optimisation pass options, e.g. if the multiplication should be replaced by an action. Returns: An optimised Slate expression, where only matrix-vector products are executed whereever possible. """ from gem.node import MemoizerArg mapper = MemoizerArg(_push_mul) mapper.action = options["replace_mul"] a = mapper(tensor, ActionBag(None, 1)) return a
[docs] def drop_double_transpose(expr): """Remove double transposes from optimised Slate expression.""" from gem.node import Memoizer mapper = Memoizer(_drop_double_transpose) a = mapper(expr) return a
@singledispatch def _drop_double_transpose(expr, self): raise AssertionError("Cannot handle terminal type: %s" % type(expr)) @_drop_double_transpose.register(Tensor) @_drop_double_transpose.register(AssembledVector) @_drop_double_transpose.register(Block) def _drop_double_transpose_terminals(expr, self): """Terminal expression is encountered.""" return expr @_drop_double_transpose.register(Factorization) def _drop_double_transpose_factorization(expr, self): """Drop any factorisations. This is needed because whenever a Solve node is created so is a Factorization node inside it. The Factorization nodes are only needed for the Eigen Backend however.""" return self(*expr.children) @_drop_double_transpose.register(Transpose) def _drop_double_transpose_transpose(expr, self): """When the expression and its child are transposes the grandchild is returned, because A=A.T.T.""" child, = expr.children if isinstance(child, Transpose): grandchild, = child.children return self(grandchild) elif child.terminal and child.rank > 1: return Tensor(adjoint(child.form)) else: return type(expr)(*map(self, expr.children)) @_drop_double_transpose.register(Negative) @_drop_double_transpose.register(Add) @_drop_double_transpose.register(Mul) @_drop_double_transpose.register(Solve) @_drop_double_transpose.register(Inverse) @_drop_double_transpose.register(DiagonalTensor) @_drop_double_transpose.register(Reciprocal) def _drop_double_transpose_distributive(expr, self): """Distribute into the children of the expression. """ return type(expr)(*map(self, expr.children)) @singledispatch def _push_mul(expr, self, state): raise AssertionError("Cannot handle terminal type: %s" % type(expr)) @_push_mul.register(Tensor) @_push_mul.register(Block) def _push_mul_tensor(expr, self, state): if not self.action: if state.coeff: return Mul(expr, state.coeff) if state.pick_op == 1 else Mul(state.coeff, expr) else: return expr else: raise NotImplementedError("Actions in Slate are not yet supported.") @_push_mul.register(AssembledVector) @_push_mul.register(DiagonalTensor) @_push_mul.register(Reciprocal) def _push_mul_vector(expr, self, state): """Do not push into AssembledVectors.""" return expr @_push_mul.register(Negative) @_push_mul.register(Add) def _push_mul_distributive(expr, self, state): """Distribute the multiplication into the children of the expression. """ return type(expr)(*map(self, expr.children, (state,)*len(expr.children))) @_push_mul.register(Inverse) def _push_mul_inverse(expr, self, state): """ Rewrites the multiplication of Inverse with a coefficient into a Solve via A.inv*b = A.solve(b) or b*A^{-1}= (A.T.inv*b.T).T = A.T.solve(b.T).T .""" child, = expr.children if expr.diagonal: # Don't optimise further so that the translation to gem at a later can just spill ]1/a_ii[ return expr * state.coeff if state.pick_op else state.coeff * expr else: return (Solve(child, state.coeff) if state.pick_op else Transpose(Solve(Transpose(child), Transpose(state.coeff)))) @_push_mul.register(Transpose) def _push_mul_transpose(expr, self, state): """ Pushes a multiplication through a transpose. This works with help of A.T*x = (x.T*A).T. Another example for expr:=C*A.solve(B.T) : (C*A.solve(B.T)).T * y = (y.T * (C*A.solve(B.T))).T :arg expr: a Transpose :arg self: a MemoizerArg object. :arg state: state carries a coefficient in .coeff, and information if multiply from front (0) or back (1) in .pick_op :returns: an optimised transposed expression """ if expr.rank == 2: # switch the multiplication order with pick_op transposed_state = ActionBag(Transpose(state.coeff), flip(state.pick_op)) # push mul into A with new state pushed_expr = self(*expr.children, transposed_state) # transpose the end result return self(Transpose(pushed_expr), state) else: return expr @_push_mul.register(Solve) def _push_mul_solve(expr, self, state): """ Pushes a multiplication through a solve. case 1) child 1 is matrix, child2 is vector case 2) child 1 is matrix, child2 is matrix a) multiplication from front b) multiplication from back """ if expr.rank == 2 and state.pick_op == 0: """ case 2) child 1 is matrix, child2 is matrix and a coefficient is passed through a) multiplication from front We exploit A.T*x = (x.T*A).T and previously used A.inv*b=A.solve(b). e.g. (1) T3*T3.inv*T3.T transforms to (2) T3.T.solve(C3.T).T*T3.T (3) (T_3*T3.T.solve(C3.T)).T From (2) to (3) we need to swap rhs of the solve with the coefficient. or (1) (C*(A.solve(A.solve(A)))) is (2) (A.solve(A).T*A.T.solve(C.T)).T transforms to (3) (A.T.solve(C.T)).T*A.solve(A)).T.T (4) (A.T*A.T.solve((A.T.solve(C.T))) (5) (A.T*A.T.solve(A.T.solve(C.T))).T (6) A.T.solve(A.T.solve(C.T)).T*A """ mat = Transpose(expr.children[state.pick_op]) swapped_op = Transpose(expr.children[flip(state.pick_op)]) new_rhs = Transpose(state.coeff) pushed_child = self(Solve(mat, new_rhs), ActionBag(None, flip(state.pick_op))) return Transpose(self(swapped_op, ActionBag(pushed_child, flip(state.pick_op)))) else: """ case 1) a) child 1 is matrix, child2 is vector and there is no coefficient passed through b) child 2 is matrix, child2 is matrix and there is a coefficient passed through -> multiplication from back A.solve(B)*x = A.inv*B*x = A.inv*(B*x) = A.solve(Bx) We always push into the right hand side of the solve. """ mat, rhs = expr.children return (expr if (rhs.rank == 1 and state.coeff) else Solve(mat, self(self(rhs, state), state))) @_push_mul.register(Mul) def _push_mul_mul(expr, self, state): """ Pushes an multiplication by a coefficent through a multiplication to the innermost node. e.g. (A1*A2)*b = A1*(A2*b) --------case 1) child 1 is matrix, child2 is vector or other way around In this case the Mul expression is already partially optimised. a) The child with rank 1 must contain the coefficient. Both childs may still leave space for optimisation. Optimise child of rank 1 first and use result as coefficient to push the Mul into the other child. No coefficient would be multiplied on the outside of the Mul. e.g. (A0*A1)*((A2+A3)*y) = (A0*A1)*(A2*y+A3*y) = A0*(A1*(A2*y+A3*y)) b) A coefficient is multiplied from the outside. Both childs may still leave space for optimisation. Optimise child of rank 1 first but do not use result as coefficient. Push coefficient from the outside into the Mul of the other child. e.g. ((y.T*(A0+A1))*B)*y = ((y.T*A0+y.T*A1)*B)*y = (y.T*A0+y.T*A1)*(B*y) --------case 2) child 1 is matrix, child2 is matrix Examples: a) (A*B*C)*y ––––––––––––––––––––––––––––––––––––––––––––––––––––––– outmost_mul(A*B*C,y) ----> A*B*outmost_mul(C,y) ----> A*(B*outmost_mul(C,y)) outmost_mul1 Mul2 Mul3 / | / | / | Mul2 y ----> Mul3 outmost_mul1 ----> A Mul2 / | / | / | / | Mul3 C A B C y B outmost_mul / | / | A B C y b) y.T*TensorOp(op1, op2) ––––––––––––––––––––––––––––––––––––––––––––––––––––––– outmost_mul TensorOp / | / | y.T TensorOp ----> outmost_mul op2 / | / | op1 op2 y.T op1 :arg expr: a Multiplication :arg self: a MemoizerArg object. :arg state: state carries a coefficient in .coeff, and information if multiply from front (0) or back (1) in .pick_op :returns: an optimised Multiplication """ if expr.rank == 1: # Optimise the child first that must contain a coefficient prio_child, = tuple(filter(lambda child: child.rank == 1, expr.children)) pick_op = expr.children.index(prio_child) else: # Optimise the first child first if multiplication happens from front, # otw. we we walk into second child first. prio_child = expr.children[state.pick_op] pick_op = state.pick_op other_child = expr.children[flip(pick_op)] if state.coeff and expr.rank == 1: assert "So far Slate cannot express linear algebra as in case 1b. If that changes, remove the assertion." # Optimise child of rank 1 first but do not use result as coefficient. coeff = self(prio_child, ActionBag(None, pick_op)) pushed_other_child = self(other_child, ActionBag(state.coeff, pick_op)) return Mul(pushed_other_child, prio_child) if pick_op else Mul(prio_child, pushed_other_child) else: # Optimise child of rank 1 first and use result as coefficient coeff = self(prio_child, state) return self(other_child, ActionBag(coeff, pick_op)) @_push_mul.register(Factorization) def _push_mul_factorization(expr, self, state): """Drop any factorisations. This is needed because whenever a Solve node is created so is a Factorization node inside it. The Factorization nodes are only needed for the Eigen Backend however.""" return self(*expr.children, state)