Source code for heron.regression

"""
Functions and classes for contructing regression surrogate models.
"""

import math as m
import numpy as np
import emcee
import scipy.linalg
from scipy.optimize import minimize
import george
import scipy
from .training import *
import copy

[docs]def load(filename):
    """
    Load a pickled heron Gaussian Process.
    """
    import pickle
    with open(filename, "rb") as gp_file:
        return pickle.load(gp_file)

[docs]class SingleTaskGP(object):
    """
    This is an implementaion of a Single task Gaussian process 
    regressor. That is, a GPR which is capable of acting as a 
    surrogate to a many-to-one function. The Single Task GPR is
    the fundamental building block of the MultiTask GPR, which 
    consists of multiple Single Tasks which are trained in tandem 
    (but which do NOT share correlation information).
    ---
    Ahem... There /are/ components of this code in here, but 
    things need a little bit more thought before this will work
    efficiently...
    An implementation of a Gaussian Process Regressor with 
    multiple response outputs and multiple inputs.
    """
    
    km = None
    
    def __repr__(self):
        """
        The printable representation of this object.
        """
        return "<Heron Gaussian Process instance with {} training points>".format(len(self.training_data))

    def _html_repr_(self):
        """
        The HTML representation of the object for use with IPython /
        Jupyter notebooks.

        """

        output = "<table>"
        output += "<tr>"
        output += "<th>Heron Gaussian Process</th>"
        output += "<th></th>"
        output += "</tr>"
        output += "<tr>"
        output += "<td>Training Points</th>"
        output += "<td>{}</td>".format(len(self.training_data))
        output += "</tr>"
        output += "<tr>"
        output += "<td>Test Points</th>"
        output += "<td>{}</td>".format(len(self.training_object.test_labels))
        output += "</tr>"
        output += "<tr>"
        output += "<td>Model Correlation</th>"
        output += "<td>{}</td>".format(self.correlation())
        output += "</tr>"
        output += "<tr>"
        output += "<td>Model RMSE</th>"
        output += "<td>{}</td>".format(self.rmse())
        output += "</tr>"
        output += "</table>"
        return output

    def __init__(self, training_data, kernel, tikh=1e-6, solver=george.HODLRSolver, hyperpriors = None, **kwargs):
        """
        Set up the Gaussian process regression.

        Parameters
        ----------
        training data : heron data object
           The training data, consisiting of labels and targets.
        kernel : heron kernel
           The kernel used to calculate the covariance matrix.
        tikh : float
           The Tikhonov regularization factor to be applied to the diagonal
           to avoid the attempt to invert an ill-posed matrix problem. Defaults to 1e-6.
        """

        self.tikh = tikh

        self.training_object = training_data
        self.training_data = self.training_object.targets
        self.training_y = self.training_object.labels
        self.yerror = self.training_object.label_sigma
        self.input_dim = self.training_data.ndim
        self.output_dim = self.training_y.ndim
        self.kernel = kernel #kernel(self.training_data.ndim, *kernel_args)
        #kwargs = {}
        if solver == george.HODLRSolver:
            kwargs['tol'] = self.tikh
        self.gp = george.GP(kernel,
                            solver=solver,
                            mean = np.mean(self.training_y),
                            fit_mean=False,
                            fit_white_noise=False,
                            **kwargs
        )
        self.kernel = self.gp.kernel
        self.hyperpriordistributions = hyperpriors
        self.update()
    
[docs]    def active_learn(self, afunction, x,y, iters=1, afunc_args={}):
        """
        Actively train the Gaussian process from a set of provided
        labels and targets using some acquisition function.

        afunction : function
           The acquisition function.
        x : array-like
           The input labels of the data. This can be a multi-dimensional array.
        y : array-like
           The input targets of the data. This can only be a single-dimensional 
           array at present.
        iters : int
           The number of times to iterate the learning process: equivalently, 
           the number of training points to digest.
        afunc_args : dict
           A dictionary of arguments for the acquisition function. Optional.

        """
        i=0
        while i < iters:
            # Choose the new sample from the area with the greatest uncertainty
            mean, var =  self.prediction(x)
            err = np.sqrt(np.diag(np.abs(var)))
            LB = afunction(mean, err, **afunc_args)
            new_sample = np.argmax(LB)
            self.add_data(np.atleast_1d(x[new_sample]), y[new_sample])
            i += 1

[docs]    def add_data(self, target, label, label_error=None):
        """
        Add data to the Gaussian process.
        """
        self.training_object.add_data(target, label, label_sigma=label_error, target_sigma=None)
        self.training_data = self.training_object.targets
        self.training_y = self.training_object.labels[0]
        self.yerror = self.training_object.label_sigma
        self.update()

[docs]    def set_bmatrix(self, values):
        """
        Set the values of the B matrix from a vector.
        """
        bm = values.reshape(self.output_dim, self.output_dim)
        bm += self.tikh * np.eye(bm.shape[0], bm.shape[1])
        if not np.all(np.linalg.eigvals(bm) > 0): return -1e25
        self.B_matrix = bm
        return self.loglikelihood()
        
[docs]    def set_hyperparameters(self, hypers):
        """
        Set the hyperparameters of the kernel function.
        """
        self.gp.set_parameter_vector(hypers)
        self.update()
        #return self.loglikelihood()

[docs]    def get_hyperparameters(self):
        """
        Return the kernel hyperparameters.
        """
        return self.gp.get_parameter_vector()
        
    
[docs]    def update(self):
        """
        Update the stored matrices.
        """
        self.gp.compute(self.training_data, self.training_object.label_sigma)
        #self.test_predict()


[docs]    def prediction(self, new_datum, normalised=False):
        """
        Produce a prediction at a new point, or set of points.

        Parameters
        ----------
        new_datum : array
           The coordinates of the new point(s) at which the GPR model should be evaluated.
        normalised : bool
           A flag to indicate if the input is already normalised 
           (this might be the case if you're trying to efficiently sample to parameter 
           space). If False the input will be normalised to the same range as the 
           training data.

        Returns
        -------
        prediction mean : array
           The mean values of the function drawn from the Gaussian Process.
        prediction variance : array
           The variance values for the function drawn from the GP.
        """
        
        training_y = self.training_y
        if training_y.ndim > 1:
            training_y = training_y[0,:]
        #new_datum = np.atleast_2d(new_datum)
        if not normalised:
            new_datum = self.training_object.normalise(new_datum, "target")
        mean, variance = self.gp.predict(self.training_y, new_datum, return_var=True)
        #return mean, variance
        return self.training_object.denormalise(mean, "label"), self.training_object.denormalise(variance, "label")


[docs]    def test_predict(self):
        """
        Calculate the value of the GP at the test targets.   
        """
        self.test_predictions = self.prediction(self.training_object.denormalise(self.training_object.test_targets, "target"))[0]

[docs]    def correlation(self):
        """
        Calculate the correlation between the model and the test data.
        
        Returns
        -------
        corr : float
           The correlation squared.
        """
        a = self.training_object.denormalise(self.training_object.test_labels, "label")
        b = self.test_predictions
        return np.linalg.det((np.cov(a,b) / np.sqrt(np.var(a) * np.var(b)))**2)

[docs]    def rmse(self):
        """
        Calculate the root mean squared error of the whole model.
        
        Returns
        -------
        rmse : float
           The root mean squared error.
        """

        a = self.training_object.denormalise(self.training_object.test_labels, "label")
        b = self.test_predictions
        return np.sqrt(np.mean((a - b)**2) )

[docs]    def expected_improvement(self, x):
        '''
        Returns the expected improvement at the design vector X in the model
        
        Parameters
        ==========
        x : array-like
           A real world coordinates design vector
        
        Returns
        =======
        EI: float 
           The expected improvement value at the point x in the model
        '''

        x = np.atleast_2d(x)
        p, S = self.prediction(x)
        p = np.abs(p)
        y_min = np.min(self.training_y)
        EI_one = ((y_min - p) * (0.5 + 0.5*scipy.special.erf((
            1./np.sqrt(2.))*((y_min - p) /
                             S))))
        EI_two = ((S * (1. / np.sqrt(2. * np.pi))) * (np.exp(-(1./2.) *
                                                             ((y_min - p)**2. / S**2.))))
        EI = EI_one + EI_two
        return EI

[docs]    def nei(self, x):
        """
        Calculate the negative of the expected improvement at a point x.
        """
        return -self.expected_improvement(x)

[docs]    def ln_likelihood(self, p):
        """
        Provides a convenient wrapper to the ln likelihood function.

        Notes
        -----
        This is implemented in a separate function because of the mild 
        peculiarities of how the pickle module needs to serialise 
        functions, which means that instancemethods (which this would 
        become) can't be serialised. 
        """
        return ln_likelihood(p, self)

    def _lnlikelihood(self, p):
        """
        Calculates the lnlikelihood for the GP.

        Parameters
        ----------
        p : list
           The vector of hyperparameters at which the lnlikelihood should be evaluated.

        Returns
        -------
        float 
           The lnlikelihood for the system.

        """
        self.set_hyperparameters(p)
        if self.training_y.ndim > 1:
            return  self.gp.lnlikelihood(self.training_y[0])
        else:
            return self.gp.lnlikelihood(self.training_y)
    
[docs]    def neg_ln_likelihood(self, p):
        """

        Returns the negative of the log-likelihood; designed for use with
        minimisation algorithms.

        Parameters
        ----------
        gp : heron `Regressor` object
           The gaussian process to be evaluated.
        p : array-like
           An array of the hyper-parameters at which the model is to be evaluated.

        Returns
        -------
        neg_ln_likelihood : float
           The negative of the log-likelihood for the Gaussian process
        """
        return -self.ln_likelihood(p)

[docs]    def entropy(self):
        """Return the entropy of the Gaussian Process distribution. This can
        be calculated directly from the covariance matrix, making this
        a nice, quick calculation to perform.

        Returns
        -------
        entropy : float
           The differential entropy of the GP.
        """
        
        return 0.5 * ( np.log((2*np.pi*np.e)**self.training_data.shape[1]) + self.gp.solver.log_determinant)

[docs]    def hyperpriortransform(self, p):
        """Return the true value in the desired hyperprior space, given an
        input of a unit-hypercube prior space.

        Parameters
        ----------
        p : array-like
           The point in the unit hypercube space

        Returns
        -------
        x : The position in the desired hyperparameter space of the point.
        """

        hypers = self.hyperpriordistributions
        x = []
        for hyper, pv in zip(hypers, p):
            x.append(hyper.transform(p))
        return np.array(x)
    
[docs]    def loghyperpriors(self, p):

        """
        Calculate the log of the hyperprior distributions at a given 
        point.
        
        Parameters
        ----------
        p : ndarray
            The location to be tested.
        """
        hypers = self.hyperpriordistributions
        probs = 1
        for hyper, pv in zip(hypers, p):
            probs += hyper.logp(pv)
        return probs
        

[docs]    def grad_neg_ln_likelihood(self, p):
        """
        Return the negative of the gradient of the log likelihood for the
        GP when its hyperparameters have some specified value.

        Parameters
        ----------
        gp : heron `Regressor` object
           The gaussian process to be evaluated
        p : array-like
           An array of the hyper-parameters at which the model is to be evaluated.

        Returns
        -------
        grad_ln_likelihood : float
           The gradient of log-likelihood for the Gaussian process
        """

        self.gp.set_parameter_vector(p)
        return self.gp.grad_lnlikelihood(self.training_y)

[docs]    def train(self, method="MCMC", metric="loglikelihood", sampler="ensemble", **kwargs):
        """
        Train the Gaussian process by finding the optimal 
        values for the kernel hyperparameters.
        
        Parameters
        ----------
        method : str {"MCMC", "MAP", "nested"}
           The method to be employed to calculate the hyperparameters.
        metric : str
           The metric which should be used to assess the model.
        hyperpriors : list
           The hyperprior distributions for the hyperparameters. Defaults to None, in which 
           case the prior is uniform over all real numbers.
        """

        if method=="MCMC":
            gp, samples, burn = run_training_mcmc(self, metric = metric, samplertype=sampler, **kwargs)
            self.gp = gp
            return samples, burn
        elif method=="nested":
            # Use nested sampling to train the model
            # NB this is experimental
            results = run_nested(gp, metric=metric, **kwargs)
            return results
        elif method == "MAP":
            MAP = run_training_map(self, metric = metric, **kwargs)
            
            return MAP
        
[docs]    def save(self, filename):
        """
        Save the Gaussian Process to a file which can be reloaded later.

        Parameters
        ----------
        filename : str
           The location at which the Gaussian Process should be written.

        Notes
        -----
        In the current implementation the serialisation of the GP is performed
        by the python `pickle` library, which isn't guaranteed to be binary-compatible 
        with all machines.
        """

        import pickle
        with open(filename, "wb") as filedump:
            pickle.dump(self, filedump)
        
[docs]class MultiTaskGP(SingleTaskGP):
    """
    An implementation of a co-trained set of Gaussian processes which
    share the same hyperparameters, but which model differing
    data. The training of these models is described in RW pp115--116.

    A multi-task GPR is capable of acting as a surrogate to a many-to-many function, 
    and is trained by making the assumption that all of the outputs from the function 
    share a common correlation structure.
    
    The principle difference compared to a single task GP is the
    presence of multiple Gaussian Processes, with one to model each
    dimension of the output data.

    Notes
    -----
    The MultiTask GPR implementation is very much a work in progress at the
    moment, and not all methods implemented in the SingleTask GPR are implemented
    correctly yet.

    """

    def __init__(self, training_data, kernel, tikh=1e-6, solver=george.HODLRSolver, hyperpriors = None):
        """
        Set up the multi-task Gaussian process regression.

        Parameters
        ----------
        training data : heron data object
           The training data, consisiting of labels and targets.
        kernel : heron kernel
           The kernel used to calculate the covariance matrix.
        tikh : float
           The Tikhonov regularization factor to be applied to the diagonal
           to avoid the attempt to invert an ill-posed matrix problem. Defaults to 1e-6.
        """

        self.tikh = tikh

        self.training_object = training_data
        self.training_data = self.training_object.targets
        self.training_y = self.training_object.labels
        self.yerror = self.training_object.label_sigma
        self.input_dim = self.training_data.ndim
        self.output_dim = self.training_y.ndim
        #self.kernel = kernel #kernel(self.training_data.ndim, *kernel_args)
        self.gps = []
        for i in xrange(self.output_dim):
            sub_training_data = training_data.copy()
            sub_training_data.labels = sub_training_data.labels[:,i]
            sub_training_data.label_sigma = sub_training_data.label_sigma[:,i]
            self.gps.append(SingleTaskGP(sub_training_data, kernel, tikh, solver, hyperpriors))
        self.kernel = self.gps[0].kernel
        self.hyperpriordistributions = hyperpriors
        self.update()

[docs]    def update(self):
        """
        Update the stored matrices.
        """
        for gp in self.gps:
            gp.update()

[docs]    def get_hyperparameters(self):
        """
        Return the kernel hyperparameters. Returns the hyperparameters of
        only the first GP in the network; the others /should/ all be
        the same, but there might be something to be said for checking
        this.

        Returns
        -------
        hypers : list
           A list of the kernel hyperparameters
        """
        return self.gps[0].get_hyperparameters()
            
[docs]    def set_hyperparameters(self, hypers):
        """
        Set the hyperparameters of the kernel function on each Gaussian process.
        """
        for gp in self.gps:
            gp.set_hyperparameters(hypers)

[docs]    def train(self, method="MCMC", metric="loglikelihood", sampler="ensemble", **kwargs):
        """
        Train the Gaussian process by finding the optimal 
        values for the kernel hyperparameters.
        
        Parameters
        ----------
        method : str {"MCMC", "MAP"}
           The method to be employed to calculate the hyperparameters.
        metric : str
           The metric which should be used to assess the model.
        hyperpriors : list
           The hyperprior distributions for the hyperparameters. Defaults to None, in which 
           case the prior is uniform over all real numbers.
        """

        if method=="MCMC":
            samples, burn = run_training_mcmc(self, metric = metric, samplertype=sampler, **kwargs)
            return samples, burn
        elif method == "MAP":
            MAP = run_training_map(self, metric = metric, **kwargs)
            return MAP

    def _lnlikelihood(self, p):
        """
        Calculates the lnlikelihood for the entire system of GPs in the multitask setup.

        Parameters
        ----------
        p : list
           The vector of hyperparameters at which the lnlikelihood should be evaluated.

        Returns
        -------
        float 
           The lnlikelihood for the system.

        """
        self.set_hyperparameters(p)
        lnlike = [gp._lnlikelihood(p) for gp in self.gps]
        return np.sum(lnlike)
        
[docs]    def ln_likelihood(self, p):
        """Provides a wrapper to the ln_likelihood functions for each
        component Gaussian process in the multi-task system.

        Notes
        -----
        This is implemented in a separate function because of the mild 
        peculiarities of how the pickle module needs to serialise 
        functions, which means that instancemethods (which this would 
        become) can't be serialised.

        """
        return ln_likelihood(p, self)

[docs]    def prediction(self, new_datum):
        """
        Produce a prediction at a new point, or set of points.

        Parameters
        ----------
        new_datum : array
           The coordinates of the new point(s) at which the GPR model should be evaluated.

        Returns
        -------
        prediction means : array
           The mean values of the function drawn from the Gaussian Process.
        prediction variances : array
           The variance values for the function drawn from the GP.
        """
        means, variances = [], []
        #new_datum = np.atleast_2d(new_datum)#.T
        new_datum = self.training_object.normalise(new_datum, "target")

        print(new_datum)
        
        for ix, gp in enumerate(self.gps):
            training_y = self.training_y[:,ix]
            mean, variance = gp.gp.predict(training_y, new_datum, return_var=True)
            means.append(mean)#gp.training_object.denormalise(mean, "label"))
            variances.append(variance) #gp.training_object.denormalise(variance, "label"))
        return means, variances

# For backwards compatibility...
[docs]class Regressor(SingleTaskGP):
    pass