Source code for vittles.sparse_hessian_lib

import autograd
import numpy as np
from .sensitivity_lib import _append_jvp

from copy import deepcopy
import scipy as sp
import scipy.sparse
from scipy.sparse import coo_matrix


[docs]class SparseBlockHessian(): """Efficiently calculate block-sparse Hessians. The objective function is expected to be of the form .. math :: x = (x_1 , ... , x_G) \\textrm{ (or some permutation thereof)} f(x) = \sum_{g=1}^{G} f_g(x_g) Each :math:`x_g` is expected to have the same dimension. Consequently, the Hessian matrix of ``f`` with respect to ``x``, is block diagonal with ``G`` blocks, up to a permutation of the order of ``x``. The purpose of this class is to efficiently calculate this Hessian when the block structure (i.e., the partition of ``x``) is known. """
[docs] def __init__(self, objective_function, sparsity_array): """In terms of the class description, ``objective_function = f``, ``opt_par = x``, and ``sparsity_array`` describes the partition of ``x`` into :math:`(x_1, ..., x_G)`. Parameters ------------ objective_function : `callable` An objective function of which to calculate a Hessian. The argument should be - ``opt_par``: `numpy.ndarray` (N,) The optimization parameter. sparsity_array : `numpy.ndarray` (G, M) An array containing the indices of rows and columns of each block. The Hessian should contain ``G`` dense blocks, each of which is ``M`` by ``M``. Each row of ``sparsity_array`` should contain the indices of the corresponding block. There must be no repeated indices, and each block must be the same size. """ self._fun = objective_function self._sparsity_array = sparsity_array self._num_blocks = self._sparsity_array.shape[0] self._block_size = self._sparsity_array.shape[1] if len(np.unique(sparsity_array)) != len(sparsity_array.flatten()): raise ValueError( 'The indices in ``sparsity array`` must be unique.') self._f_grad = autograd.grad(self._fun, argnum=0) self._f_fwd_hess = _append_jvp(self._f_grad, num_base_args=1)
def _hess_summed_term(self, opt_par, ib): """``ib`` is the index within the block. """ v = np.zeros_like(opt_par) v[self._sparsity_array[:, ib]] = 1 return self._f_fwd_hess(opt_par, v)
[docs] def get_block_hessian(self, opt_par, print_every=0): """Get the block Hessian at ``opt_par`` and ``weights``. Parmeters ---------- opt_par : `numpy.ndarray` The argument to ``objective_function`` at which to evaluate the Hessian matrix. print_every : `int`, optional. How often to display progress. If ``0``, nothing is printed. Returns -------- hessian : `scipy.sparse.coo_matrix` (N, N) The block-sparse Hessian given by and ``sparsity_array``. """ opt_par = np.atleast_1d(opt_par) if opt_par.ndim != 1: raise ValueError('``opt_par`` must be a vector.') mat_vals = [] # These will be the entries of the Hessian mat_rows = [] # These will be the row indices mat_cols = [] # These will be the column indices for ib in range(self._block_size): if print_every > 0: if ib % print_every == 0: print('Block index {} of {}.'.format(ib, self._block_size)) hess_prod = self._hess_summed_term(opt_par, ib) for b in range(self._num_blocks): hess_inds = self._sparsity_array[b, :] mat_vals.extend(hess_prod[hess_inds]) mat_rows.extend(hess_inds) mat_cols.extend(np.full(self._block_size, hess_inds[ib])) if print_every > 0: print('Done differentiating.') d = len(opt_par) h_sparse = coo_matrix((mat_vals, (mat_rows, mat_cols)), (d, d)) return h_sparse
[docs] def get_global_hessian(self, opt_par, global_inds=None, print_every=0): """Get the dense Hessian terms for the global parameters, which are, by default, indexed by any indices not in ``_sparsity_array``. """ local_inds = np.hstack(self._sparsity_array) if global_inds is None: global_inds = np.setdiff1d(np.arange(len(opt_par)), local_inds) global_local_intersection = np.intersect1d(global_inds, local_inds) if len(global_local_intersection) > 0: raise ValueError( 'The global and local indices must be disjoint. {}'.format( global_local_intersection)) mat_vals = [] # These will be the entries of the Hessian mat_rows = [] # These will be the row indices mat_cols = [] # These will be the column indices v = np.zeros_like(opt_par) count = 0 for ig in global_inds: if print_every > 0: if count % print_every == 0: print('Global index {} of {}.'.format( count, len(global_inds))) v[ig] = 1 hess_row = self._f_fwd_hess(opt_par, v) for il in local_inds: mat_vals.append(hess_row[il]) mat_cols.append(ig) mat_rows.append(il) mat_vals.append(hess_row[il]) mat_cols.append(il) mat_rows.append(ig) for ig2 in global_inds: mat_vals.append(0.5 * hess_row[ig2]) mat_cols.append(ig) mat_rows.append(ig2) mat_vals.append(0.5 * hess_row[ig2]) mat_cols.append(ig2) mat_rows.append(ig) v[ig] = 0 count += 1 if print_every > 0: print('Done differentiating.') d = len(opt_par) h_sparse = coo_matrix((mat_vals, (mat_rows, mat_cols)), (d, d)) return h_sparse
def get_hessian(self, opt_par, print_every=0): local_hessian = self.get_block_hessian(opt_par, print_every=print_every) global_hessian = self.get_global_hessian(opt_par, print_every=print_every) return local_hessian + global_hessian