function [D] = DKL_kNN_k_estimation(X,Y,co)
%Estimates the Kullback-Leibler divergence (D) of X and Y (X(:,t), Y(:,t) is the t^th sample)
%using the kNN method (S={k}). The number of samples in X [=size(X,2)] and Y [=size(Y,2)] can be different. Cost parameters are provided in the cost object co.
%
%We make use of the naming convention 'D<name>_estimation', to ease embedding new divergence estimation methods.
%
%REFERENCE:
% Fernando Perez-Cruz. Estimation of Information Theoretic Measures for Continuous Random Variables. Advances in Neural Information Processing Systems (NIPS), pp. 1257-1264, 2008.
% Nikolai Leonenko, Luc Pronzato, and Vippal Savani. A class of Renyi information estimators for multidimensional densities. Annals of Statistics, 36(5):2153-2182, 2008.
% Quing Wang, Sanjeev R. Kulkarni, and Sergio Verdu. Divergence estimation for multidimensional densities via k-nearest-neighbor distances. IEEE Transactions on Information Theory, 55:2392-2405, 2009.
%
%Copyright (C) 2012 Zoltan Szabo ("http://nipg.inf.elte.hu/szzoli", "szzoli (at) cs (dot) elte (dot) hu")
%
%This file is part of the ITE (Information Theoretical Estimators) Matlab/Octave toolbox.
%
%ITE is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by
%the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
%
%This software is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
%MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
%
%You should have received a copy of the GNU General Public License along with ITE. If not, see <http://www.gnu.org/licenses/>.
%co.mult:OK.
[dX,num_of_samplesX] = size(X);
[dY,num_of_samplesY] = size(Y);
if dX~=dY
disp('Error: the dimension of X and Y must be equal.');
else
d = dX;
squared_distancesXX = kNN_squared_distances(X,X,co,1);
squared_distancesYX = kNN_squared_distances(Y,X,co,0);
dist_k_XX = sqrt(squared_distancesXX(end,:));
dist_k_YX = sqrt(squared_distancesYX(end,:));
D = d * mean(log(dist_k_YX./dist_k_XX)) + log(num_of_samplesY/(num_of_samplesX-1));
end