1. Zoltán Szabó
  2. ITE


ITE / code / H_I_D_A_C / base_estimators / DEnergyDist_estimation.m

function [D] = DEnergyDist_estimation(Y1,Y2,co)
%Estimates the energy distance (D) using pairwise distances of the sample points.
%We use the naming convention 'D<name>_estimation' to ease embedding new divergence estimation methods.
%  Y1: Y1(:,t) is the t^th sample from the first distribution.
%  Y2: Y2(:,t) is the t^th sample from the second distribution.
%  co: divergence estimator object.
%   Gabor J. Szekely and Maria L. Rizzo. A new test for multivariate normality. Journal of Multivariate Analysis, 93:58-80, 2005. (metric space of negative type)
%   Gabor J. Szekely and Maria L. Rizzo. Testing for equal distributions in high dimension. InterStat, 5, 2004. (R^d)
%Copyright (C) 2012 Zoltan Szabo ("http://nipg.inf.elte.hu/szzoli", "szzoli (at) cs (dot) elte (dot) hu")
%This file is part of the ITE (Information Theoretical Estimators) Matlab/Octave toolbox.
%ITE is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by
%the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
%This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
%You should have received a copy of the GNU General Public License along with ITE. If not, see <http://www.gnu.org/licenses/>.


    [dY1,num_of_samplesY1] = size(Y1);
    [dY2,num_of_samplesY2] = size(Y2);

    if dY1~=dY2
        error('The dimension of the samples in Y1 and Y2 must be equal.');
%Euclidean distance:
squared_distances_Y1Y1 = sqrt(sqdistance(Y1));
squared_distances_Y2Y2 = sqrt(sqdistance(Y2));
squared_distances_Y1Y2 = sqrt(sqdistance(Y1,Y2));

D =  2 * sum(sum(squared_distances_Y1Y2)) / (num_of_samplesY1*num_of_samplesY2) -  sum(sum(squared_distances_Y1Y1)) / (num_of_samplesY1^2) -  sum(sum(squared_distances_Y2Y2)) / (num_of_samplesY2^2);