Source code for approxposterior.gpUtils

# -*- coding: utf-8 -*-
"""
:py:mod:`gpUtils.py` - Gaussian Process Utilities
-------------------------------------------------

Gaussian process utility functions for initializing GPs and optimizing their
hyperparameters.

"""

# Tell module what it's allowed to import
__all__ = ["defaultHyperPrior", "defaultGP", "optimizeGP"]

from . import utility as util
import numpy as np
import george
from scipy.optimize import minimize
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error


[docs]def defaultHyperPrior(p):
    """
    Default prior function for GP hyperparameters. This prior also keeps the
    hyperparameters within a reasonable huge range, [-20, 20]. Note that george
    operates on the *log* hyperparameters, except for the mean function.

    Parameters
    ----------
    p : array/iterable
        Array of GP hyperparameters

    Returns
    -------
    prior : float
    """

    # Restrict range of hyperparameters (ignoring mean term)
    if np.any(np.fabs(p)[1:] > 20):
        return -np.inf

    return 0.0
# end function


def _nll(p, gp, y, priorFn=None):
    """
    Given parameters and data, compute the negative log likelihood of the data
    under the george Gaussian process.

    Parameters
    ----------
    p : array
        GP hyperparameters
    gp : george.GP
    y : array
        data to condition GP on
    priorFn : callable
        Prior function for the GP hyperparameters, p

    Returns
    -------
    nll : float
        negative log-likelihood of y under gp
    """

    # Apply priors on GP hyperparameters
    if priorFn is not None:
        if not np.isfinite(priorFn(p)):
            return np.inf

    # Catch singular matrices
    try:
        gp.set_parameter_vector(p)
    except np.linalg.LinAlgError:
        return np.inf

    ll = gp.log_likelihood(y, quiet=True)
    return -ll if np.isfinite(ll) else np.inf
# end function


def _grad_nll(p, gp, y, priorFn=None):
    """
    Given parameters and data, compute the gradient of the negative log
    likelihood of the data under the george Gaussian process.

    Parameters
    ----------
    p : array
        GP hyperparameters
    gp : george.GP
    y : array
        data to condition GP on
    priorFn : callable
        Prior function for the GP hyperparameters, p

    Returns
    -------
    gnll : float
        gradient of the negative log-likelihood of y under gp
    """

    # Apply priors on GP hyperparameters
    if priorFn is not None:
        if not np.isfinite(priorFn(p)):
            return np.inf

    # Negative gradient of log likelihood
    return -gp.grad_log_likelihood(y, quiet=True)
# end function


[docs]def defaultGP(theta, y, order=None, white_noise=-12, fitAmp=False):
    """
    Basic utility function that initializes a simple GP with an ExpSquaredKernel.
    This kernel  works well in many applications as it effectively enforces a
    prior on the smoothness of the function and is infinitely differentiable.

    Parameters
    ----------
    theta : array
        Design points
    y : array
        Data to condition GP on, e.g. the lnlike + lnprior at each design point,
        theta.
    order : int, optional
        Order of PolynomialKernel to add to ExpSquaredKernel. Defaults to None,
        that is, no PolynomialKernel is added and the GP only uses the
        ExpSquaredKernel
    white_noise : float, optional
        From george docs: "A description of the logarithm of the white noise
        variance added to the diagonal of the covariance matrix". Defaults to
        ln(white_noise) = -12. Note: if order is not None, you might need to
        set the white_noise to a larger value for the computation to be
        numerically stable, but this, as always, depends on the application.
    fitAmp : bool, optional
        Whether or not to include an amplitude term. Defaults to False.

    Returns
    -------
    gp : george.GP
        Gaussian process with initialized kernel and factorized covariance
        matrix.
    """

    # Tidy up the shapes and determine dimensionality
    theta = np.asarray(theta).squeeze()
    y = np.asarray(y).squeeze()
    if theta.ndim <= 1:
        ndim = 1
    else:
        ndim = theta.shape[-1]

    # Guess initial metric, or scale length of the covariances (must be > 0)
    initialMetric = np.fabs(np.random.randn(ndim))

    # Create kernel: We'll model coveriances in loglikelihood space using a
    # ndim-dimensional Squared Expoential Kernel
    kernel = george.kernels.ExpSquaredKernel(metric=initialMetric,
                                             ndim=ndim)

    # Include an amplitude term?
    if fitAmp:
        kernel = np.var(y) * kernel

    # Add a linear regression kernel of order order?
    # Use a meh guess for the amplitude and for the scale length (log(gamma^2))
    if order is not None:
        kernel = kernel + (np.var(y)/10.0) * george.kernels.LinearKernel(log_gamma2=initialMetric[0],
                                                                         order=order,
                                                                         bounds=None,
                                                                         ndim=ndim)

    # Create GP and compute the kernel, aka factor the covariance matrix
    gp = george.GP(kernel=kernel, fit_mean=True, mean=np.median(y),
                   white_noise=white_noise, fit_white_noise=False)
    gp.compute(theta)

    return gp
# end function


[docs]def optimizeGP(gp, theta, y, seed=None, nGPRestarts=1, method="powell",
               options=None, p0=None, gpHyperPrior=defaultHyperPrior):
    """
    Optimize hyperparameters of an arbitrary george Gaussian Process by
    maximizing the marginal loglikelihood.

    Parameters
    ----------
    gp : george.GP
    theta : array
    y : array
        data to condition GP on
    seed : int, optional
        numpy RNG seed.  Defaults to None.
    nGPRestarts : int, optional
        Number of times to restart the optimization.  Defaults to 1. Increase
        this number if the GP isn't optimized well.
    method : str, optional
        scipy.optimize.minimize method.  Defaults to powell.
    options : dict, optional
        kwargs for the scipy.optimize.minimize function.  Defaults to None.
    p0 : array, optional
        Initial guess for kernel hyperparameters.  If None, defaults to
        np.random.randn for each parameter
    gpHyperPrior : str/callable, optional
        Prior function for GP hyperparameters. Defaults to the defaultHyperPrior fn.
        This function asserts that the mean must be negative and that each log
        hyperparameter is within the range [-20,20].

    Returns
    -------
    optimizedGP : george.GP
    """

    # Run the optimization routine nGPRestarts times
    res = []
    mll = []

    # Optimize GP hyperparameters by maximizing marginal log_likelihood
    for ii in range(nGPRestarts):
        # Initialize inputs for each minimization
        if p0 is None:
            # Pick random guesses for kernel hyperparameters
            x0 = [np.median(y)] + [np.random.randn() for _ in range(len(gp.get_parameter_vector())-1)]
        else:
            # Take user-supplied guess and slightly perturb it
            x0 = np.array(p0) + np.min(p0) * 1.0e-3 * np.random.randn(len(p0))

        # Minimize GP nll, save result, evaluate marginal likelihood
        if method not in ["nelder-mead", "powell", "cg"]:
            jac = _grad_nll
        else:
            jac = None

        resii = minimize(_nll, x0, args=(gp, y, gpHyperPrior), method=method,
                         jac=jac, bounds=None, options=options)["x"]
        res.append(resii)

        # Update the kernel with solution for computing marginal loglike
        gp.set_parameter_vector(resii)
        gp.recompute()

        # Compute marginal log likelihood for this set of kernel hyperparameters
        mll.append(gp.log_likelihood(y, quiet=True))

    # Pick result with largest marginal log likelihood
    ind = np.argmax(mll)

    # Update gp
    gp.set_parameter_vector(res[ind])
    gp.recompute()

    return gp
# end function