# Copyright 2021-2024 Lawrence Livermore National Security, LLC and other
# MuyGPyS Project Developers. See the top-level COPYRIGHT file for details.
#
# SPDX-License-Identifier: MIT
"""
Resources and high-level API for a fast posterior mean inference workflow.
The `MuyGPyS.examples` subpackage is deprecated and will be streamlined or
removed in future versions.
:func:`~MuyGPyS.examples.fast_posterior_mean.make_fast_regressor` is a
high-level API for creating the necessary components for fast posterior mean
inference.
:func:`~MuyGPyS.examples.fast_posterior_mean.do_fast_posterior_mean` is a
high-level api for executing a simple, generic fast posterior medan workflow
given data.
It calls the maker APIs above and
:func:`~MuyGPyS.examples.fast_posterior_mean.fast_posterior_mean_any`.
"""
from time import perf_counter
from typing import Dict, Tuple
import MuyGPyS._src.math as mm
from MuyGPyS.gp import MuyGPS
from MuyGPyS.examples.from_indices import fast_posterior_mean_from_indices
from MuyGPyS.examples.regress import make_regressor
from MuyGPyS.gp.tensors import fast_nn_update
from MuyGPyS.neighbors import NN_Wrapper
from MuyGPyS.optimize import Bayes_optimize, OptimizeFn
from MuyGPyS.optimize.loss import LossFn, lool_fn
[docs]def make_fast_regressor(
muygps: MuyGPS,
nbrs_lookup: NN_Wrapper,
train_features: mm.ndarray,
train_targets: mm.ndarray,
) -> Tuple[mm.ndarray, mm.ndarray]:
"""
Convenience function for creating precomputed coefficient matrix and neighbor lookup data
structure.
Args:
muygps:
A (possibly trained) MuyGPS object.
nbrs_lookup:
A data structure supporting nearest neighbor queries into
`train_features`.
train_features:
A matrix of shape `(train_count, feature_count)` whose rows consist
of observation vectors of the train data.
train_targets:
A matrix of shape `(train_count, response_count)` whose rows consist
of response vectors of the train data.
Returns
-------
precomputed_coefficients_matrix:
A matrix of shape `(train_count, nn_count)` whose rows list the
precomputed coefficients for each nearest neighbors set in the
training data.
nn_indices:
A numpy.ndarrray supporting nearest neighbor queries.
"""
num_training_samples, _ = train_features.shape
nn_indices, _ = nbrs_lookup.get_batch_nns(
mm.arange(0, num_training_samples)
)
nn_indices = fast_nn_update(nn_indices)
train_nn_targets = train_targets[nn_indices]
Kin = muygps.kernel(
muygps.kernel.deformation.pairwise_tensor(train_features, nn_indices)
)
precomputed_coefficients_matrix = muygps.fast_coefficients(
Kin, train_nn_targets
)
return precomputed_coefficients_matrix, nn_indices
[docs]def do_fast_posterior_mean(
test_features: mm.ndarray,
train_features: mm.ndarray,
train_targets: mm.ndarray,
nn_count: int = 30,
batch_count: int = 200,
loss_fn: LossFn = lool_fn,
opt_fn: OptimizeFn = Bayes_optimize,
k_kwargs: Dict = dict(),
nn_kwargs: Dict = dict(),
opt_kwargs: Dict = dict(),
verbose: bool = False,
) -> Tuple[mm.ndarray, NN_Wrapper, mm.ndarray, mm.ndarray, Dict]:
"""
Convenience function initializing a model and performing fast posterior mean
inference.
Expected parameters include keyword argument dicts specifying kernel
parameters and nearest neighbor parameters. See the docstrings of the
appropriate functions for specifics.
Example:
>>> from MuyGPyS.testing.test_utils import _make_gaussian_data
>>> from MuyGPyS.examples.fast_posterior_mean import do_fast_posterior_mean
>>> from MuyGPyS.gp.deformation import F2, Isotropy
>>> from MuyGPyS.gp.hyperparameter import Parameter
>>> from MuyGPyS.gp.hyperparameter import AnalyticScale
>>> from MuyGPyS.gp.kernels import RBF
>>> from MuyGPyS.gp.noise import HomoscedasticNoise
>>> from MuyGPyS.optimize import Bayes_optimize
>>> from MuyGPyS.optimize.objective import mse_fn
>>> train_features, train_responses = make_train() # stand-in function
>>> test_features, test_responses = make_test() # stand-in function
>>> nn_kwargs = {"nn_method": "exact", "algorithm": "ball_tree"}
>>> k_kwargs = {
... "kernel": RBF(
... deformation=Isotropy(
... metric=F2,
... length_scale=Parameter(1.0, (1e-2, 1e2))
... )
... ),
... "noise": HomoscedasticNoise(1e-5),
... "scale": AnalyticScale(),
... }
>>> (
... muygps, nbrs_lookup, predictions, precomputed_coefficients_matrix
... ) = do_fast_posterior_mean(
... test_features,
... train_features,
... train_responses,
... nn_count=30,
... batch_count=200,
... loss_fn=lool_fn,
... opt_fn=Bayes_optimize,
... k_kwargs=k_kwargs,
... nn_kwargs=nn_kwargs,
... verbose=False,
... )
Args:
test_features:
A matrix of shape `(test_count, feature_count)` whose rows consist
of observation vectors of the test data.
train_features:
A matrix of shape `(train_count, feature_count)` whose rows consist
of observation vectors of the train data.
train_targets:
A matrix of shape `(train_count, response_count)` whose rows consist
of response vectors of the train data.
nn_count:
The number of nearest neighbors to employ.
batch_count:
The number of elements to sample batch for hyperparameter
optimization.
loss_fn:
The loss functor to use in hyperparameter optimization. Ignored if
all of the parameters specified by argument `k_kwargs` are fixed.
opt_fn:
The optimization functor to use in hyperparameter optimization.
Ignored if all of the parameters specified by argument `k_kwargs`
are fixed.
k_kwargs:
Assume that the elements are keyword arguments to a MuyGPs model
(see :func:`~MuyGPyS.examples.regress.make_regressor`).
nn_kwargs:
Parameters for the nearest neighbors wrapper. See
:class:`MuyGPyS.neighbors.NN_Wrapper` for the supported methods and
their parameters.
opt_kwargs:
Parameters for the wrapped optimizer. See the docs of the
corresponding library for supported parameters.
verbose:
If `True`, print summary statistics.
Returns
-------
muygps:
A (possibly trained) MuyGPs object.
nbrs_lookup:
A data structure supporting nearest neighbor queries into
`train_features`.
predictions:
The predicted response associated with each test observation.
precomputed_coefficients_matrix:
A matrix of shape `(train_count, nn_count)` whose rows list the
precomputed coefficients for each nearest neighbors set in the
training data.
timing:
A dictionary containing timings for the training, precomputation,
nearest neighbor computation, and prediction.
"""
regressor, nbrs_lookup = make_regressor(
train_features,
train_targets,
nn_count=nn_count,
batch_count=batch_count,
loss_fn=loss_fn,
opt_fn=opt_fn,
k_kwargs=k_kwargs,
nn_kwargs=nn_kwargs,
opt_kwargs=opt_kwargs,
verbose=verbose,
)
(
posterior_mean,
precomputed_coefficients_matrix,
timing,
) = fast_posterior_mean_any(
regressor,
test_features,
train_features,
nbrs_lookup,
train_targets,
)
return (
regressor,
nbrs_lookup,
posterior_mean,
precomputed_coefficients_matrix,
timing,
)
[docs]def fast_posterior_mean_any(
muygps: MuyGPS,
test_features: mm.ndarray,
train_features: mm.ndarray,
nbrs_lookup: NN_Wrapper,
train_targets: mm.ndarray,
) -> Tuple[mm.ndarray, mm.ndarray, Dict]:
"""
Convenience function performing fast posterior mean inference using a
pre-trained model.
Args:
muygps:
A (possibly trained) MuyGPS object.
test_features:
A matrix of shape `(test_count, feature_count)` whose rows consist
of observation vectors of the test data.
train_features:
A matrix of shape `(train_count, feature_count)` whose rows consist
of observation vectors of the train data.
nbrs_lookup:
A data structure supporting nearest neighbor queries into
`train_features`.
train_targets:
A matrix of shape `(train_count, response_count)` whose rows consist
of response vectors of the train data.
Returns
-------
posterior_mean:
The predicted response associated with each test observation.
precomputed_coefficients_matrix:
A matrix of shape `(train_count, nn_count)` whose rows list the
precomputed coefficients for each nearest neighbors set in the
training data.
timing:
A dictionary containing timings for the training, precomputation,
nearest neighbor computation, and prediction.
"""
time_start = perf_counter()
(
precomputed_coefficients_matrix,
nn_indices,
) = make_fast_regressor(
muygps,
nbrs_lookup,
train_features,
train_targets,
)
time_precomp = perf_counter()
time_agree = perf_counter()
nn_indices = fast_nn_update(nn_indices)
test_neighbors, _ = nbrs_lookup.get_nns(test_features)
time_nn = perf_counter()
closest_neighbor = test_neighbors[:, 0]
closest_set_new = nn_indices[closest_neighbor, :].astype(int)
num_test_samples, _ = test_features.shape
posterior_mean = fast_posterior_mean_from_indices(
muygps,
mm.arange(0, num_test_samples),
closest_set_new,
test_features,
train_features,
closest_neighbor,
precomputed_coefficients_matrix,
)
time_pred = perf_counter()
timing = {
"precompute": time_precomp - time_start,
"agree": time_agree - time_precomp,
"nn": time_nn - time_agree,
"pred": time_pred - time_nn,
}
return posterior_mean, precomputed_coefficients_matrix, timing