# Copyright 2021-2023 Lawrence Livermore National Security, LLC and other
# MuyGPyS Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: MIT
"""
MuyGPs implementation
"""
from typing import Callable, List, Optional, Tuple
import MuyGPyS._src.math as mm
from MuyGPyS._src.util import auto_str
from MuyGPyS._src.mpi_utils import mpi_chunk
from MuyGPyS.gp.fast_precompute import (
_muygps_fast_posterior_mean_precompute,
FastPrecomputeCoefficients,
)
from MuyGPyS.gp.fast_mean import _muygps_fast_posterior_mean, FastPosteriorMean
from MuyGPyS.gp.hyperparameter import FixedScale, ScaleFn
from MuyGPyS.gp.kernels import KernelFn
from MuyGPyS.gp.mean import _muygps_posterior_mean, PosteriorMean
from MuyGPyS.gp.noise import HomoscedasticNoise, NoiseFn
from MuyGPyS.gp.variance import _muygps_diagonal_variance, PosteriorVariance
[docs]@auto_str
class MuyGPS:
"""
Local Kriging Gaussian Process.
Performs approximate GP inference by locally approximating an observation's
response using its nearest neighbors. Implements the MuyGPs algorithm as
articulated in [muyskens2021muygps]_.
Kernels accept different hyperparameter dictionaries specifying
hyperparameter settings. Keys can include `val` and `bounds`. `bounds` must
be either a len == 2 iterable container whose elements are scalars in
increasing order, or the string `fixed`. If `bounds == fixed` (the default
behavior), the hyperparameter value will remain fixed during optimization.
`val` must be either a scalar (within the range of the upper and lower
bounds if given) or the strings `"sample"` or `log_sample"`, which will
randomly sample a value within the range given by the bounds.
In addition to individual kernel hyperparamters, each MuyGPS object also
possesses a noise model, possibly with parameters, and a vector of
:math:`\\sigma^2` indicating the scale parameter associated with the
posterior variance of each dimension of the response.
Example:
>>> from MuyGPyS.gp import MuyGPS
>>> muygps = MuyGPS(
... kernel=Matern(
... smoothness=Parameter(0.38, (0.1, 2.5)),
... deformation=Isotropy(
... metric=F2,
... length_scale=Parameter(0.2),
... ),
... ),
... noise=HomoscedasticNoise(1e-5),
... scale=AnalyticScale(),
... )
MuyGPyS depends upon linear operations on specially-constructed tensors in
order to efficiently estimate GP realizations. One can use (see their
documentation for details) :func:`MuyGPyS.gp.tensors.pairwise_tensor` to
construct pairwise difference tensors and
:func:`MuyGPyS.gp.tensors.crosswise_tensor` to produce crosswise diff
tensors that `MuyGPS` can then use to construct kernel tensors and
cross-covariance matrices, respectively.
We can easily realize kernel tensors using a `MuyGPS` object's `kernel`
functor once we have computed a `pairwise_diffs` tensor and a
`crosswise_diffs` matrix.
Example:
>>> Kin = muygps.kernel(pairwise_diffs)
>>> Kcross = muygps.kernel(crosswise_diffs)
Args:
kernel:
The kernel to be used. Defines :math:`Kin_\\theta(\\cdot, \\cdot)` as
referenced in `MuyGPS` functions.
noise:
A noise model. Defines :math:`\\varepsilon` as referenced in
`MuyGPS` functions.
scale:
A variance scale parameter. Defines :math:`\\sigma^2` as referenced
in `MuyGPS` functions.
"""
def __init__(
self,
kernel: KernelFn,
noise: NoiseFn = HomoscedasticNoise(0.0, "fixed"),
scale: ScaleFn = FixedScale(response_count=1),
_backend_mean_fn: Callable = _muygps_posterior_mean,
_backend_var_fn: Callable = _muygps_diagonal_variance,
_backend_fast_mean_fn: Callable = _muygps_fast_posterior_mean,
_backend_fast_precompute_fn: Callable = _muygps_fast_posterior_mean_precompute,
):
self.kernel = kernel
self.scale = scale
self.noise = noise
self._backend_mean_fn = _backend_mean_fn
self._backend_var_fn = _backend_var_fn
self._backend_fast_mean_fn = _backend_fast_mean_fn
self._backend_fast_precompute_fn = _backend_fast_precompute_fn
self._make()
def _make(self) -> None:
self.kernel._make()
self._mean_fn = PosteriorMean(
self.noise, _backend_fn=self._backend_mean_fn
)
Kout = self.kernel.Kout()
self._var_fn = PosteriorVariance(
Kout, self.noise, self.scale, _backend_fn=self._backend_var_fn
)
self._fast_posterior_mean_fn = FastPosteriorMean(
_backend_fn=self._backend_fast_mean_fn
)
self._fast_precompute_fn = FastPrecomputeCoefficients(
self.noise, _backend_fn=self._backend_fast_precompute_fn
)
[docs] def fixed(self) -> bool:
"""
Checks whether all kernel and model parameters are fixed.
This is a convenience utility to determine whether optimization is
required.
Returns:
Returns `True` if all parameters are fixed, and `False` otherwise.
"""
for p in self.kernel._hyperparameters:
if not self.kernel._hyperparameters[p].fixed():
return False
if not self.noise.fixed():
return False
return True
[docs] def get_opt_params(
self,
) -> Tuple[List[str], mm.ndarray, mm.ndarray]:
"""
Return lists of unfixed hyperparameter names, values, and bounds.
Returns
-------
names:
A list of unfixed hyperparameter names.
params:
A list of unfixed hyperparameter values.
bounds:
A list of unfixed hyperparameter bound tuples.
"""
names, params, bounds = self.kernel.get_opt_params()
self.noise.append_lists(names, params, bounds)
return names, mm.array(params), mm.array(bounds)
[docs] def posterior_mean(
self, Kin: mm.ndarray, Kcross: mm.ndarray, batch_nn_targets: mm.ndarray
) -> mm.ndarray:
"""
Returns the posterior mean from the provided covariance,
cross-covariance, and target tensors.
Computes parallelized local solves of systems of linear equations using
the last two dimensions of `Kin` along with `Kcross` and
`batch_nn_targets` to predict responses in terms of the posterior mean.
Assumes that kernel tensor `Kin` and cross-covariance
matrix `Kcross` are already computed and given as arguments.
Returns the predicted response in the form of a posterior
mean for each element of the batch of observations, as computed in
Equation (3.4) of [muyskens2021muygps]_. Given observation set
:math:`X` with responses :math:`Y`, noise prior set
:math:`\\varepsilon`, and kernel function
:math:`Kin_\\theta(\\cdot, \\cdot)`, computes the following for each
prediction element :math:`\\mathbf{z}_i` with nearest neighbors index
set :math:`N_i`:
.. math::
\\widehat{Y} (\\mathbf{z}_i \\mid X_{N_i}) =
\\sigma^2 Kin_\\theta (\\mathbf{z}_i, X_{N_i})
(Kin_\\theta (X_{N_i}, X_{N_i}) + \\varepsilon_{N_i})^{-1}
Y(X_{N_i}).
Args:
Kin:
A tensor of shape `(batch_count, nn_count, nn_count)` containing
the `(nn_count, nn_count)`-shaped kernel matrices corresponding
to each of the batch elements.
Kcross:
A matrix of shape `(batch_count, nn_count)` whose rows consist
of `(1, nn_count)`-shaped cross-covariance vector corresponding
to each of the batch elements and its nearest neighbors.
batch_nn_targets:
A tensor of shape `(batch_count, nn_count, response_count)`
whose last dimension lists the vector-valued responses for the
nearest neighbors of each batch element.
Returns:
A matrix of shape `(batch_count, response_count)` whose rows are
the predicted response for each of the given indices.
"""
return self._mean_fn(Kin, Kcross, batch_nn_targets)
[docs] def posterior_variance(
self,
Kin: mm.ndarray,
Kcross: mm.ndarray,
) -> mm.ndarray:
"""
Returns the posterior variance from the provided covariance and
cross-covariance tensors.
Return the local posterior variances of each prediction, corresponding
to the diagonal elements of a covariance matrix. Given observation set
:math:`X` with responses :math:`Y`, noise prior set
:math:`\\varepsilon`, and kernel function
:math:`Kin_\\theta(\\cdot, \\cdot)`, computes the following for each
prediction element :math:`\\mathbf{z}_i` with nearest neighbors index
set :math:`N_i`:
.. math::
Var \\left (
\\widehat{Y} (\\mathbf{z}_i \\mid X_{N_i})
\\right) =
\\sigma^2 \\left (
Kin_\\theta (\\mathbf{z}_i, \\mathbf{z}_i) -
Kin_\\theta (\\mathbf{z}_i, X_{N_i})
\\left (
Kin_\\theta (X_{N_i}, X_{N_i}
\\right ) + \\varepsilon_{N_i})^{-1}
Kin_\\theta (X_{N_i}, \\mathbf{z}_i)
\\right ).
Args:
Kin:
A tensor of shape `(batch_count, nn_count, nn_count)` containing
the `(nn_count, nn_count)`-shaped kernel matrices corresponding
to each of the batch elements.
Kcross:
A matrix of shape `(batch_count, nn_count)` whose rows consist
of `(1, nn_count)`-shaped cross-covariance vector corresponding
to each of the batch elements and its nearest neighbors.
Returns:
A vector of shape `(batch_count, response_count)` consisting of the
diagonal elements of the posterior variance.
"""
return self._var_fn(Kin, Kcross)
[docs] def fast_coefficients(
self,
Kin: mm.ndarray,
train_nn_targets_fast: mm.ndarray,
) -> mm.ndarray:
"""
Produces coefficient matrix for the fast posterior mean given in
Equation (8) of [dunton2022fast]_.
Given observation set :math:`X` with responses :math:`Y`, noise prior
set :math:`\\varepsilon`, and kernel function
:math:`Kin_\\theta(\\cdot, \\cdot)`, computes the following for
each observation element :math:`\\mathbf{x}_i` with nearest neighbors
index set :math:`N^*_i`, containing `i` and the indices of the
`nn_count - 1` nearest neighbors of :math:`\\mathbf{x}_i`:
.. math::
C_i =
\\left ( Kin_\\theta (X_{N_i}, X_{N_i} \\right )
+ \\varepsilon_{N_i})^{-1} Y(X_{N_i}).
Args:
Kin:
A tensor of shape `(batch_count, nn_count, nn_count)` containing
the `(nn_count, nn_count)`-shaped kernel matrices corresponding
to each of the batch elements.
Kcross:
A matrix of shape `(batch_count, nn_count)` whose rows consist
of `(1, nn_count)`-shaped cross-covariance vector corresponding
to each of the batch elements and its nearest neighbors.
Returns:
A matrix :math:`C` of shape `(train_count, nn_count)` whose rows are
the precomputed coefficients for fast posterior mean inference.
"""
return self._fast_precompute_fn(Kin, train_nn_targets_fast)
[docs] def fast_posterior_mean(
self,
Kcross: mm.ndarray,
coeffs_tensor: mm.ndarray,
) -> mm.ndarray:
"""
Performs fast posterior mean inference using provided cross-covariance
and precomputed coefficient matrix.
Assumes that cross-covariance matrix `Kcross` is already computed and
given as an argument.
Returns the predicted response in the form of a posterior
mean for each element of the batch of observations, as computed in
Equation (9) of [dunton2022fast]_. Given the coefficients :math:`C`
created by :func:`~MuyGPyS.gp.muygps.MuyGPS.fast_coefficients` and
Equation (8) of [dunton2022fast]_, observation set
:math:`X`, noise prior set :math:`\\varepsilon`, and kernel function
:math:`Kin_\\theta(\\cdot, \\cdot)`, computes the following for each test
point :math:`\\mathbf{z}` and index set :math:`N^*_i` containing the
union of the index :math:`i` of the nearest neighbor
:math:`\\mathbf{x}_i` of :math:`\\mathbf{z}` and the `nn_count - 1`
nearest neighbors of :math:`\\mathbf{x}_i`:
.. math::
\\widehat{Y} \\left ( \\mathbf{z} \\mid X \\right ) =
\\sigma^2 Kin_\\theta(\\mathbf{z}, X_{N^*_i}) C_i.
Args:
Kcross:
A matrix of shape `(batch_count, nn_count)` whose rows consist
of `(1, nn_count)`-shaped cross-covariance vector corresponding
to each of the batch elements and its nearest neighbors.
coeffs_tensor:
A matrix of shape `(batch_count, nn_count, response_count)`
whose rows are given by precomputed coefficients.
Returns:
A matrix of shape `(batch_count, response_count)` whose rows are
the predicted response for each of the given indices.
"""
return self._fast_posterior_mean_fn(Kcross, coeffs_tensor)
[docs] def get_opt_mean_fn(self) -> Callable:
"""
Return a posterior mean function for use in optimization.
Assumes that optimization parameter literals will be passed as keyword
arguments.
Returns:
A function implementing the posterior mean, where `noise` is either
fixed or takes updating values during optimization. The function
expects keyword arguments corresponding to current hyperparameter
values for unfixed parameters.
"""
return self._mean_fn.get_opt_fn()
[docs] def get_opt_var_fn(self) -> Callable:
"""
Return a posterior variance function for use in optimization.
Assumes that optimization parameter literals will be passed as keyword
arguments.
Returns:
A function implementing posterior variance, where `noise` is either
fixed or takes updating values during optimization. The function
expects keyword arguments corresponding to current hyperparameter
values for unfixed parameters.
"""
return self._var_fn.get_opt_fn()
[docs] def optimize_scale(
self, pairwise_diffs: mm.ndarray, nn_targets: mm.ndarray
):
"""
Optimize the value of the :math:`sigma^2` scale parameter.
Uses the optimization method specified by the type of the `scale`
parameter to optimize its value.
Args:
pairwise_diffs:
A tensor of shape
`(batch_count, nn_count, nn_count, feature_count)` containing
the `(nn_count, nn_count, feature_count)`-shaped pairwise
nearest neighbor difference tensors corresponding to each of the
batch elements.
nn_targets:
Tensor of floats of shape
`(batch_count, nn_count, response_count)` containing the
expected response for each nearest neighbor of each batch
element.
Returns:
A reference to this model with a freshly-optimized `scale`
parameter.
"""
Kin = self.kernel(pairwise_diffs)
opt_fn = self.scale.get_opt_fn(self)
self.scale._set(opt_fn(Kin, nn_targets))
self._make()
return self
[docs] @mpi_chunk(return_count=3)
def make_predict_tensors(
self,
batch_indices: mm.ndarray,
batch_nn_indices: mm.ndarray,
test_features: Optional[mm.ndarray],
train_features: mm.ndarray,
train_targets: mm.ndarray,
**kwargs,
) -> Tuple[mm.ndarray, mm.ndarray, mm.ndarray]:
"""
Create the metric and target tensors for prediction using the model's
deformation.
Creates the `crosswise_tensor`, `pairwise_tensor` and `batch_nn_targets`
tensors required by :func:`~MuyGPyS.gp.MuyGPS.posterior_mean` and
:func:`~MuyGPyS.gp.MuyGPS.posterior_variance`.
Args:
batch_indices:
A vector of integers of shape `(batch_count,)` identifying the
training batch of observations to be approximated.
batch_nn_indices:
A matrix of integers of shape `(batch_count, nn_count)` listing
the nearest neighbor indices for all observations in the batch.
test_features:
The full floating point testing data matrix of shape
`(test_count, feature_count)`.
train_features:
The full floating point training data matrix of shape
`(train_count, ...)`.
train_targets:
A matrix of shape `(train_count, ...)` whose rows are
vector-valued responses for each training element.
Returns
-------
crosswise_tensor:
A tensor of shape `(batch_count, nn_count, ...)` whose second and
subsequent dimensions list the metric comparison between each batch
element element and its nearest neighbors.
pairwise_diffs:
A tensor of shape `(batch_count, nn_count, nn_count, ...)`
containing the `(nn_count, nn_count, ...)`-shaped pairwise nearest
neighbor metrics tensors corresponding to each of the batch
elements.
batch_nn_targets:
Tensor of floats of shape `(batch_count, nn_count, ...)` containing
the expected response for each nearest neighbor of each batch
element.
"""
if test_features is None:
test_features = train_features
crosswise_tensor = self.kernel.deformation.crosswise_tensor(
test_features,
train_features,
batch_indices,
batch_nn_indices,
disable_mpi=True,
)
pairwise_tensor = self.kernel.deformation.pairwise_tensor(
train_features, batch_nn_indices, disable_mpi=True
)
batch_nn_targets = train_targets[batch_nn_indices]
return crosswise_tensor, pairwise_tensor, batch_nn_targets
[docs] @mpi_chunk(return_count=4)
def make_train_tensors(
self,
batch_indices: mm.ndarray,
batch_nn_indices: mm.ndarray,
train_features: mm.ndarray,
train_targets: mm.ndarray,
**kwargs,
) -> Tuple[mm.ndarray, mm.ndarray, mm.ndarray, mm.ndarray]:
"""
Create the metric and target tensors needed for training.
Similar to :func:`~MuyGPyS.gp.muygps.MuyGPS.make_predict_tensors` but
returns the additional `batch_targets` matrix, which is only defined for
a batch of training data.
Args:
batch_indices:
A vector of integers of shape `(batch_count,)` identifying the
training batch of observations to be approximated.
batch_nn_indices:
A matrix of integers of shape `(batch_count, nn_count)` listing the
nearest neighbor indices for all observations in the batch.
train_features:
The full floating point training data matrix of shape
`(train_count, ...)`.
train_targets:
A matrix of shape `(train_count, ...)` whose rows are
vector-valued responses for each training element.
Returns
-------
crosswise_tensor:
A tensor of shape `(batch_count, nn_count, ...)` whose second and
subsequent dimensions list the metric comparison between each batch
element element and its nearest neighbors.
pairwise_diffs:
A tensor of shape `(batch_count, nn_count, nn_count, ...)`
containing the `(nn_count, nn_count, ...)`-shaped pairwise nearest
neighbor metrics tensors corresponding to each of the batch
elements.
batch_targets:
Matrix of floats of shape `(batch_count, ...)` whose rows
give the expected response for each batch element.
batch_nn_targets:
Tensor of floats of shape `(batch_count, nn_count, ...)` containing
the expected response for each nearest neighbor of each batch
element.
"""
crosswise_tensor = self.kernel.deformation.crosswise_tensor(
train_features,
train_features,
batch_indices,
batch_nn_indices,
disable_mpi=True,
)
pairwise_tensor = self.kernel.deformation.pairwise_tensor(
train_features, batch_nn_indices, disable_mpi=True
)
batch_nn_targets = train_targets[batch_nn_indices]
batch_targets = train_targets[batch_indices]
return (
crosswise_tensor,
pairwise_tensor,
batch_targets,
batch_nn_targets,
)
def __eq__(self, rhs) -> bool:
if isinstance(rhs, self.__class__):
return all(
(
all(
self.kernel._hyperparameters[h]()
== rhs.kernel._hyperparameters[h]()
for h in self.kernel._hyperparameters
),
self.noise() == rhs.noise(),
self.scale() == rhs.scale(),
)
)
else:
return False