Commits

Ruben Martinez-Cantin  committed 4bcd2ae

Empirical Bayes model

  • Participants
  • Parent commits 88f6dde

Comments (0)

Files changed (13)

File CMakeLists.txt

   ./src/bayesoptbase.cpp
   ./src/inneroptimization.cpp
   ./src/nonparametricprocess.cpp
+  ./src/empiricalbayesprocess.cpp
   ./src/hierarchical_gaussian_process.cpp
   ./src/gaussian_process.cpp
   ./src/gaussian_process_ml.cpp
     LIBRARY DESTINATION lib
     ARCHIVE DESTINATION lib
   )
-ENDIF(BAYESOPT_PYTHON_INTERFACE)
+ENDIF(BAYESOPT_PYTHON_INTERFACE)

File include/bayesoptcont.hpp

 
 #include "boundingbox.hpp"
 #include "bayesoptbase.hpp"
+#include "inneroptimization.hpp"
 
 namespace bayesopt  {
 
   /** \addtogroup BayesOpt */
   /**@{*/
 
-  class InnerOptimization;
-
   /**
    * \brief Bayesian optimization using different non-parametric
    * processes as distributions over surrogate functions. The

File include/fullbayesprocess.hpp

   /**
    * \brief Full Bayesian NonParametric process.
    */
-  class FullBayesProcess: public HierarchicalGaussianProcess
+  class FullBayesProcess: public NonParametricProcess
   {
   public:
     FullBayesProcess(size_t dim, bopt_params params);

File include/gaussian_process.hpp

 #define  _GAUSSIAN_PROCESS_HPP_
 
 #include "gauss_distribution.hpp"
-#include "nonparametricprocess.hpp"
+#include "empiricalbayesprocess.hpp"
 
 
 namespace bayesopt
   /**
    * \brief Standard zero mean gaussian process with noisy observations.
    */
-  class GaussianProcess: public NonParametricProcess
+  class GaussianProcess: public EmpiricalBayesProcess
   {
   public:
     GaussianProcess(size_t dim, bopt_params params);

File include/hierarchical_gaussian_process.hpp

 #ifndef __HIERARCHICAL_GAUSSIAN_PROCESS_HPP__
 #define __HIERARCHICAL_GAUSSIAN_PROCESS_HPP__
 
-#include "nonparametricprocess.hpp"
+#include "empiricalbayesprocess.hpp"
 
 
 namespace bayesopt
   /**
    * \brief Virtual class for hierarchical Gaussian processes.
    */
-  class HierarchicalGaussianProcess: public NonParametricProcess
+  class HierarchicalGaussianProcess: public EmpiricalBayesProcess
   {
   public:
     HierarchicalGaussianProcess(size_t dim, bopt_params params);

File include/nonparametricprocess.hpp

 #include "kernel_functors.hpp"
 #include "mean_functors.hpp"
 #include "prob_distribution.hpp"
-#include "inneroptimization.hpp"
 
 namespace bayesopt
 {
 		 		 
     /** 
      * \brief Computes the initial surrogate model. 
-     * It also estimates the kernel parameters by MAP or ML. This
+     * It also updates the kernel parameters estimation. This
      * function is hightly inefficient.  Use it only at the begining.
      * @return error code
      */
-    int fitInitialSurrogate(bool learnTheta = true);
-  
+    int fitSurrogateModel();
+    virtual int updateKernelParameters() = 0;
+    int precomputeSurrogate();
+
     /** 
      * \brief Sequential update of the surrogate model by adding a new point.
      *  Add new point efficiently using Matrix Decomposition Lemma 
      *  rows to the Cholesky decomposition.
      * @return error code
      */   
-    int updateSurrogateModel( const vectord &Xnew,
-			      double Ynew);
+    int updateSurrogateModel( const vectord &Xnew, double Ynew);
 
     /** 
      * \brief Full update of the surrogate model by adding a new point.
      *  It recomputes the kernel hyperparameters and full covariance matrix.
      * @return error code
      */   
-    int fullUpdateSurrogateModel( const vectord &Xnew,
-				  double Ynew);
+    int fitSurrogateModel( const vectord &Xnew, double Ynew);
 
 
     // Getters and setters
     /** Wrapper of setMean for the C structure */
     int setMean (mean_parameters mean, size_t dim);
 
-    /** 
-     * \brief Computes the score (eg:likelihood) of the kernel
-     * parameters.
-     * @param query set of parameters.
-     * @return score
-     */
-    double evaluateKernelParams(const vectord& query);
-
 
   protected:
     /** 
-     * \brief Computes the negative log likelihood of the data for all
-     * the parameters.
-     * @return value negative log likelihood
-     */
-    virtual double negativeTotalLogLikelihood() = 0;
-
-
-    /** 
-     * \brief Computes the negative log likelihood of the data for the
-     * kernel hyperparameters.
-     * @return value negative log likelihood
-     */
-    virtual double negativeLogLikelihood() = 0;
-
-    /** 
      * \brief Precompute some values of the prediction that do not
      * depends on the query.
      * @return error code
      */
     virtual int precomputePrediction() = 0;
 
+    //TODO: Refactorize from KernelModel
+    int computeCorrMatrix(matrixd& corrMatrix);
+    matrixd computeCorrMatrix();
+    matrixd computeDerivativeCorrMatrix(int dth_index);
+    vectord computeCrossCorrelation(const vectord &query);
+    double computeSelfCorrelation(const vectord& query);
 
+    vecOfvec mGPXX;                                              ///< Data inputs
+    vectord mGPY;                                                ///< Data values
+    
+    boost::scoped_ptr<ParametricFunction> mMean;    ///< Pointer to mean function
+    matrixd mFeatM;           ///< Value of the mean features at the input points
+    vectord mMu;                 ///< Mean of the parameters of the mean function
+    vectord mS_Mu;    ///< Variance of the params of the mean function W=mS_Mu*I
+
+    matrixd mL;             ///< Cholesky decomposition of the Correlation matrix
+    size_t dim_;
+    learning_type mLearnType;
+    KernelModel mKernel;
+
+    //TODO: might be unnecesary
+    vectord mMeanV;                           ///< Mean value at the input points
+
+  private:
     /** 
      * Computes the Cholesky decomposition of the Correlation (Kernel
      * or Gram) matrix 
 			      double selfcorrelation);
 
 
-    int computeCorrMatrix(matrixd& corrMatrix);
-    matrixd computeCorrMatrix();
-    matrixd computeDerivativeCorrMatrix(int dth_index);
-    vectord computeCrossCorrelation(const vectord &query);
-    double computeSelfCorrelation(const vectord& query);
-
-  private:
-    /**
-     * Computes the negative score of the data using cross validation.
-     * @return negative score
-     */
-    double negativeCrossValidation();
-
-    /** 
-     * \brief Computes the negative log prior of the hyperparameters.
-     * @return value negative log prior
-     */
-    double negativeLogPrior();
-
     inline void checkBoundsY( size_t i )
     {
       if ( mGPY(mMinIndex) > mGPY(i) )       mMinIndex = i;
       else if ( mGPY(mMaxIndex) < mGPY(i) )  mMaxIndex = i;
     };
 
-
-  protected:
-    vecOfvec mGPXX;                                              ///< Data inputs
-    vectord mGPY;                                                ///< Data values
-    
-    matrixd mFeatM;           ///< Value of the mean features at the input points
-    vectord mMu;                 ///< Mean of the parameters of the mean function
-    vectord mS_Mu;    ///< Variance of the params of the mean function W=mS_Mu*I
-
-    boost::scoped_ptr<ParametricFunction> mMean;    ///< Pointer to mean function
-
-    matrixd mL;             ///< Cholesky decomposition of the Correlation matrix
-    size_t dim_;
-    learning_type mLearnType;
-    KernelModel mKernel;
-
-  private:
     const double mRegularizer;   ///< Std of the obs. model (also used as nugget)
     size_t mMinIndex, mMaxIndex;	
     MeanFactory mPFactory;
 
-    //TODO: might be unnecesary
-    vectord mMeanV;                           ///< Mean value at the input points
-
-    InnerOptimization* kOptimizer;
   };
 
   /**@}*/

File include/optimizekernel.hpp

 #define __OPTIMIZEKERNEL_HPP__
 
 #include "inneroptimization.hpp"
-#include "nonparametricprocess.hpp"
+#include "empiricalbayesprocess.hpp"
 
 namespace bayesopt {
 
   class OptimizeKernel: public InnerOptimization
   {
   public:
-    explicit OptimizeKernel(NonParametricProcess* npp):
+    explicit OptimizeKernel(EmpiricalBayesProcess* npp):
       InnerOptimization(), npp_(npp) {};
     virtual ~OptimizeKernel(){};
 
     }
     
   private:
-    NonParametricProcess* npp_;
+    EmpiricalBayesProcess* npp_;
   };
 
 }

File src/bayesoptbase.cpp

     // Update surrogate model
     if ((mParameters.n_iter_relearn > 0) && 
 	((ii + 1) % mParameters.n_iter_relearn == 0))
-      mGP->fullUpdateSurrogateModel(xNext,yNext); 
+      mGP->fitSurrogateModel(xNext,yNext); 
     else
       mGP->updateSurrogateModel(xNext,yNext); 
     

File src/bayesoptcont.cpp

       }
     
     mGP->setSamples(xPoints,yPoints);
-    mGP->fitInitialSurrogate();
+    mGP->fitSurrogateModel();
     
     // For logging purpose
     if(mParameters.verbose_level > 0)

File src/bayesoptdisc.cpp

 	mGP->addSample(xPoint,yPoint);
       }
 
-    mGP->fitInitialSurrogate();
+    mGP->fitSurrogateModel();
 
     // For logging purpose
     if(mParameters.verbose_level > 0)

File src/gaussian_process.cpp

   namespace ublas = boost::numeric::ublas;
 
   GaussianProcess::GaussianProcess(size_t dim, bopt_params params):
-    NonParametricProcess(dim, params), mSigma(params.sigma_s)
+    EmpiricalBayesProcess(dim, params), mSigma(params.sigma_s)
   {
     d_ = new GaussianDistribution();
   }  // Constructor

File src/hierarchical_gaussian_process.cpp

 
   HierarchicalGaussianProcess::HierarchicalGaussianProcess(size_t dim, 
 							   bopt_params params):
-    NonParametricProcess(dim, params) {};
+    EmpiricalBayesProcess(dim, params) {};
 
   double HierarchicalGaussianProcess::negativeTotalLogLikelihood()
   {

File src/nonparametricprocess.cpp

 #include "log.hpp"
 #include "cholesky.hpp"
 #include "ublas_extra.hpp"
-#include "optimizekernel.hpp"	
 
 #include "gaussian_process.hpp"
 #include "gaussian_process_ml.hpp"
     dim_(dim), mRegularizer(parameters.noise),
     mMinIndex(0), mMaxIndex(0), mKernel(dim, parameters)
   { 
-    kOptimizer = new OptimizeKernel(this);
-
-    //TODO: Generalize
-    if (parameters.l_type == L_ML)
-      {
-	kOptimizer->setAlgorithm(BOBYQA);    // local search to avoid underfitting
-      }
-    else
-      {
-	kOptimizer->setAlgorithm(COMBINED);
-      }
-    kOptimizer->setLimits(1e-10,100.);
     setLearnType(parameters.l_type);
     int errorM = setMean(parameters.mean,dim);
     if (errorM)
   }
 
   NonParametricProcess::~NonParametricProcess()
-  {
-    delete kOptimizer;
-  }
+  {}
 
 
   NonParametricProcess* NonParametricProcess::create(size_t dim, 
   };
 
 
-  int NonParametricProcess::fitInitialSurrogate(bool learnTheta)
+  int NonParametricProcess::fitSurrogateModel()
+  {
+    int error = updateKernelParameters();
+    error += precomputeSurrogate();
+    return error;
+  };
+
+
+  int NonParametricProcess::precomputeSurrogate()
   {
     int error = -1;
-    if (learnTheta)
-      {
-	vectord optimalTheta = mKernel.getHyperParameters();
-	
-	FILE_LOG(logDEBUG) << "Computing kernel parameters. Seed: " 
-			   << optimalTheta;
-	kOptimizer->run(optimalTheta);
-	error = mKernel.setHyperParameters(optimalTheta);
-
-	if (error)
-	  {
-	    FILE_LOG(logERROR) << "Error updating kernel parameters.";
-	    exit(EXIT_FAILURE);
-	  }   
-
-	FILE_LOG(logDEBUG) << "Final kernel parameters: " << optimalTheta;	
-      }
-
     error = computeCholeskyCorrelation();
-
     if (error)
       {
 	FILE_LOG(logERROR) << "Error computing the correlation matrix";
       }   
 
     error = precomputePrediction(); 
-
     if (error)
       {
 	FILE_LOG(logERROR) << "Error pre-computing the prediction distribution";
 	exit(EXIT_FAILURE);
       }   
 
-    return 0; 
-  } // fitInitialSurrogate
+    return error; 
+  } // fitSurrogateModel
 
 
   int NonParametricProcess::updateSurrogateModel( const vectord &Xnew,
   } // updateSurrogateModel
 
 
-  int NonParametricProcess::fullUpdateSurrogateModel( const vectord &Xnew,
-						      double Ynew)
+  int NonParametricProcess::fitSurrogateModel( const vectord &Xnew,
+					       double Ynew)
   {
     assert( mGPXX[1].size() == Xnew.size() );
     addSample(Xnew,Ynew);
-    return fitInitialSurrogate();
-  } // fullUpdateSurrogateModel
+    return fitSurrogateModel();
+  } // fitSurrogateModel
 
 
   //////////////////////////////////////////////////////////////////////////////
     return setMean(vmu, smu, mean.name, dim);
   };
 
-
-  double NonParametricProcess::negativeCrossValidation()
-  {
-    // This is highly ineffient implementation for comparison purposes.
-    size_t n = mGPXX.size();
-    size_t last = n-1;
-    int error = 0;
-    double sum = 0.0;
-    vecOfvec tempXX(mGPXX);
-    vectord tempY(mGPY);
-    vectord tempM(mMeanV);
-    matrixd tempF(mFeatM);
-    for(size_t i = 0; i<n; ++i)
-      {
-	vectord x = mGPXX[0];  double y = mGPY(0);
-	double m = mMeanV(0);
-
-	mGPXX.erase(mGPXX.begin()); 
-	utils::erase(mGPY,mGPY.begin());
-	utils::erase(mMeanV,mMeanV.begin());
-	utils::erase_column(mFeatM,0);
-
-	fitInitialSurrogate(false);
-	ProbabilityDistribution* pd = prediction(x);
-	sum += log(pd->pdf(y));
-	mGPXX.push_back(x);     
-	mGPY.resize(mGPY.size()+1);  mGPY(mGPY.size()-1) = y;
-	mMeanV.resize(mGPY.size());  mMeanV(mGPY.size()-1) = m;
-	mFeatM.resize(mFeatM.size1(),mFeatM.size2()+1);  
-	mFeatM = tempF;
-      }
-      std::cout << "End" << mGPY.size();
-    return -sum;
-  }
-
-  double NonParametricProcess::negativeLogPrior()
-  {
-    return -mKernel.kernelLogPrior();
-  }
-
-  double NonParametricProcess::evaluateKernelParams(const vectord& query)
-  { 
-    int error = mKernel.setHyperParameters(query);
-    if (error) 
-      {
-	FILE_LOG(logERROR) << "Problem optimizing kernel parameters."; 
-	exit(EXIT_FAILURE);	
-      }
-
-    double result;
-    switch(mLearnType)
-      {
-      case L_ML:
-	result = negativeTotalLogLikelihood(); break;
-      case L_MAP:
-	result = negativeLogLikelihood()+negativeLogPrior();
-	break;
-      case L_LOO:
-	result = negativeCrossValidation(); break;
-      default:
-	FILE_LOG(logERROR) << "Learning type not supported";
-      }	  
-    return result;
-  }
-
-
   int NonParametricProcess::addNewPointToCholesky(const vectord& correlation,
 						  double selfcorrelation)
   {