Commits

Ruben Martinez-Cantin committed 3199f61

New algorithms and ways to learn hyperparams

Comments (0)

Files changed (11)

   ./src/hierarchical_gaussian_process.cpp
   ./src/gaussian_process.cpp
   ./src/gaussian_process_ml.cpp
+  ./src/student_t_process_jef.cpp
 #  ./src/gaussian_process_ign.cpp
 #  ./src/student_t_process.cpp
   ./src/parameters.cpp
   par.alpha = PRIOR_ALPHA;
   par.beta = PRIOR_BETA;
   par.noise = DEFAULT_NOISE;
-  par.surr_name = S_GAUSSIAN_PROCESS_ML;//S_STUDENT_T_PROCESS_JEFFREYS;
+  par.surr_name = S_STUDENT_T_PROCESS_JEFFREYS;
   par.kernel.name = "kSum(kSEISO,kConst)";
   par.mean.name = "mSum(mConst,mConst)";
   par.l_type = L_ML;

include/gauss_distribution.hpp

    * @param x query point
    * @return probability
    */
-  double pdf(double x) {return boost::math::pdf(d_,x); };
+  double pdf(double x) 
+  {
+    x = (x - mean_) / std_;
+    return boost::math::pdf(d_,x); 
+  };
 
   /** 
    * \brief Expected Improvement algorithm for minimization

include/gaussian_process_normal.hpp

+/** \file gaussian_process_normal.hpp
+    \brief Gaussian process with normal prior on the parameters */
+/*
+-------------------------------------------------------------------------
+   This file is part of BayesOpt, an efficient C++ library for 
+   Bayesian optimization.
+
+   Copyright (C) 2011-2013 Ruben Martinez-Cantin <rmcantin@unizar.es>
+ 
+   BayesOpt is free software: you can redistribute it and/or modify it 
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   BayesOpt is distributed in the hope that it will be useful, but 
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with BayesOpt.  If not, see <http://www.gnu.org/licenses/>.
+------------------------------------------------------------------------
+*/
+
+
+#ifndef __GAUSSIAN_PROCESS_NORMAL_HPP__
+#define __GAUSSIAN_PROCESS_NORMAL_HPP__
+
+#include "gauss_distribution.hpp"
+#include "hierarchical_gaussian_process.hpp"
+
+
+namespace bayesopt
+{
+  
+  /** \addtogroup NonParametricProcesses */
+  /**@{*/
+
+  /**
+   * \brief Gaussian process with normal prior on the parameters 
+   */
+  class GaussianProcessNormal: public HierarchicalGaussianProcess
+  {
+  public:
+    GaussianProcessNormal(size_t dim, bopt_params params);
+    virtual ~GaussianProcessNormal();
+
+    /** 
+     * \brief Function that returns the prediction of the GP for a query point
+     * in the hypercube [0,1].
+     * 
+     * @param query in the hypercube [0,1] to evaluate the Gaussian process
+     * @return pointer to the probability distribution.
+     */	
+    ProbabilityDistribution* prediction(const vectord &query);
+
+  private:
+
+    /** 
+     * \brief Computes the negative log likelihood and its gradient of
+     * the data. 
+     * @return value negative log likelihood
+     */
+    double negativeLogLikelihood();
+
+    /** 
+     * \brief Precompute some values of the prediction that do not depends on
+     * the query
+     * @return error code
+     */
+    int precomputePrediction();
+
+  private:
+    vectord mWML;           //!< GP ML parameters
+    double mSigML;           //!< GP ML parameters
+    
+    /// Precomputed GP prediction operations
+    vectord mAlphaF;
+    matrixd mKF, mL2;
+
+    GaussianDistribution* d_;      //!< Predictive distributions
+  };
+
+  /**@}*/
+
+} //namespace bayesopt
+
+#endif

include/parameters.h

     size_t n_iterations;         /**< Maximum BayesOpt evaluations (budget) */
     size_t n_inner_iterations;   /**< Maximum inner optimizer evaluations */
     size_t n_init_samples;       /**< Number of samples before optimization */
+    size_t n_iter_relearn;       /**< Number of samples before relearn kernel */
 
     size_t verbose_level;        /**< 1-Error,2-Warning,3-Info. 4-6 log file*/
     char* log_filename;          /**< Log file path (if applicable) */

include/student_t_distribution.hpp

    * @param x query point
    * @return probability
    */
-  double pdf(double x) {return boost::math::pdf(d_,x); };
+  double pdf(double x) 
+  {
+    x = (x - mean_) / std_;
+    return boost::math::pdf(d_,x); 
+  };
 
   /** 
    * \brief Expected Improvement algorithm for minimization

include/student_t_process_jef.hpp

+/** \file student_t_process_jef.hpp
+    \brief Student T process with Jeffreys priors */
+/*
+-------------------------------------------------------------------------
+   This file is part of BayesOpt, an efficient C++ library for 
+   Bayesian optimization.
+
+   Copyright (C) 2011-2013 Ruben Martinez-Cantin <rmcantin@unizar.es>
+ 
+   BayesOpt is free software: you can redistribute it and/or modify it 
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   BayesOpt is distributed in the hope that it will be useful, but 
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with BayesOpt.  If not, see <http://www.gnu.org/licenses/>.
+------------------------------------------------------------------------
+*/
+
+
+#ifndef __STUDENT_T_PROCESS_JEF_HPP__
+#define __STUDENT_T_PROCESS_JEF_HPP__
+
+#include "student_t_distribution.hpp"
+#include "hierarchical_gaussian_process.hpp"
+
+
+namespace bayesopt
+{
+  
+  /** \addtogroup NonParametricProcesses */
+  /**@{*/
+
+  /**
+   * \brief Student T process with Jeffreys prior
+   */
+  class StudentTProcessJeffreys: public HierarchicalGaussianProcess
+  {
+  public:
+    StudentTProcessJeffreys(size_t dim, bopt_params params);
+    virtual ~StudentTProcessJeffreys();
+
+    /** 
+     * \brief Function that returns the prediction of the GP for a query point
+     * in the hypercube [0,1].
+     * 
+     * @param query in the hypercube [0,1] to evaluate the Student T process
+     * @return pointer to the probability distribution.
+     */	
+    ProbabilityDistribution* prediction(const vectord &query);
+
+  private:
+
+    /** 
+     * \brief Computes the negative log likelihood and its gradient of
+     * the data. In this case, it is equivalent to the
+     * negativeTotalLogLikelihood
+     * @return value negative log likelihood
+     */
+    double negativeLogLikelihood();
+
+    /** 
+     * \brief Precompute some values of the prediction that do not depends on
+     * the query
+     * @return error code
+     */
+    int precomputePrediction();
+
+  private:
+    vectord mWML;           //!< GP ML parameters
+    double mSigML;           //!< GP ML parameters
+    
+    /// Precomputed GP prediction operations
+    vectord mAlphaF;
+    matrixd mKF, mL2;
+
+    StudentTDistribution* d_;      //!< Predictive distributions
+  };
+
+  /**@}*/
+
+} //namespace bayesopt
+
+#endif

src/bayesoptcont.cpp

     sampleInitialPoints();
 
     vectord xNext(mDims);
-    for (size_t ii = 0; ii < mParameters.n_iterations; ii++)
+    for (size_t ii = 0; ii < mParameters.n_iterations; ++ii)
       {      
 	// Find what is the next point.
 	nextPoint(xNext);
 	double yNext = evaluateNormalizedSample(xNext);
-	mGP->updateSurrogateModel(xNext,yNext); 
+	if ((mParameters.n_iter_relearn > 0) && 
+	    ((ii + 1) % mParameters.n_iter_relearn == 0))
+	  mGP->fullUpdateSurrogateModel(xNext,yNext); 
+	else
+	  mGP->updateSurrogateModel(xNext,yNext); 
+
 	plotStepData(ii,xNext,yNext);
       }
 

src/nonparametricprocess.cpp

 
 #include "gaussian_process.hpp"
 #include "gaussian_process_ml.hpp"
-#include "gaussian_process_ign.hpp"
-#include "student_t_process.hpp"
+//#include "gaussian_process_ign.hpp"
+#include "student_t_process_jef.hpp"
 
 
 namespace bayesopt
       // case S_GAUSSIAN_PROCESS_INV_GAMMA_NORMAL:
       // 	s_ptr = new GaussianProcessIGN(dim,parameters);	break;
 
-      // case S_STUDENT_T_PROCESS_JEFFREYS: 
-      // 	s_ptr = new StudentTProcess(dim,parameters); break;
+      case S_STUDENT_T_PROCESS_JEFFREYS: 
+      	s_ptr = new StudentTProcessJeffreys(dim,parameters); break;
 
       default:
 	FILE_LOG(logERROR) << "Error: surrogate function not supported.";

src/parameters.cpp

 
 
 static const bopt_params DEFAULT_PARAMS = {
-  DEFAULT_ITERATIONS, MAX_INNER_EVALUATIONS, DEFAULT_SAMPLES, 
+  DEFAULT_ITERATIONS, MAX_INNER_EVALUATIONS, 
+  DEFAULT_SAMPLES, DEFAULT_SAMPLES, 
   DEFAULT_VERBOSE, DEF_LOG_FILE,
   S_GAUSSIAN_PROCESS, 
   DEFAULT_SIGMA, DEFAULT_NOISE,

src/student_t_process_jef.cpp

+/*
+-----------------------------------------------------------------------------
+   Copyright (C) 2011 Ruben Martinez-Cantin <rmcantin@unizar.es>
+ 
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+-----------------------------------------------------------------------------
+*/
+
+
+#include "cholesky.hpp"
+#include "trace_ublas.hpp"
+#include "student_t_process_jef.hpp"
+
+namespace bayesopt
+{
+
+  using boost::numeric::ublas::inplace_solve;
+  using boost::numeric::ublas::inner_prod;
+  using boost::numeric::ublas::lower_tag;
+  using boost::numeric::ublas::lower;
+  using boost::numeric::ublas::trans;
+  
+
+  StudentTProcessJeffreys::StudentTProcessJeffreys(size_t dim, 
+						   bopt_params params):
+    HierarchicalGaussianProcess(dim, params)
+  {
+    d_ = new StudentTDistribution();
+  }  // Constructor
+
+
+
+  StudentTProcessJeffreys::~StudentTProcessJeffreys()
+  {
+    delete d_;
+  } // Default destructor
+
+
+
+
+  double StudentTProcessJeffreys::negativeLogLikelihood()
+  {
+    /* In this case, they are equivalent */
+    return negativeTotalLogLikelihood();
+  }
+
+
+  ProbabilityDistribution* 
+  StudentTProcessJeffreys::prediction(const vectord &query )
+  {
+    double kq = (*mKernel)(query, query);;
+    vectord kn = computeCrossCorrelation(query);
+    vectord phi = mMean->getFeatures(query);
+  
+    vectord v(kn);
+    inplace_solve(mL,v,lower_tag());
+
+    vectord rq = phi - prod(v,mKF);
+
+    vectord rho(rq);
+    inplace_solve(mL2,rho,lower_tag());
+    
+    double yPred = inner_prod(phi,mWML) + inner_prod(v,mAlphaF);
+    double sPred = sqrt( mSigML * (kq - inner_prod(v,v) 
+				   + inner_prod(rho,rho)));
+
+    d_->setMeanAndStd(yPred,sPred);
+    return d_;
+  }
+
+  int StudentTProcessJeffreys::precomputePrediction()
+  {
+    size_t n = mGPXX.size();
+    size_t p = mMean->nFeatures();
+
+    mKF = trans(mFeatM);
+    inplace_solve(mL,mKF,lower_tag());
+
+    matrixd FKF = prod(trans(mKF),mKF);
+    mL2 = FKF;
+    utils::cholesky_decompose(FKF,mL2);
+
+    vectord Ky(mGPY);
+    inplace_solve(mL,Ky,lower_tag());
+
+    mWML = prod(Ky,mKF);
+    utils::cholesky_solve(mL2,mWML,lower());
+
+    mAlphaF = mGPY - prod(mWML,mFeatM);
+    inplace_solve(mL,mAlphaF,lower_tag());
+    mSigML = inner_prod(mAlphaF,mAlphaF)/(n-p);
+    
+    d_->setDof(n-p);  
+    return 1;
+  }
+
+} //namespace bayesopt