+# -*- coding: utf-8; mode: python -*-
+A Python 2 script to test kernels, for the Kernel Methods homework.
+Source : http://lear.inrialpes.fr/people/mairal/teaching/2015-2016/MVA/
+PDF: http://lear.inrialpes.fr/people/mairal/teaching/2015-2016/MVA/fichiers/homework_mva_2016.pdf
+- *Date:* Saturday 06 February 2016.
+- *Author:* Lilian Besson, for the MVA Master, (C) 2015-16.
+- *Licence:* MIT Licence (http://lbesson.mit-license.org).
+from __future__ import print_function, division # Python 2 compatibility if needed
+ from sympy import lcm, gcd
+ print("Error: sympy is not available, switching back to fractions implementation of lcm/gcd")
+ from fractions import gcd
+ """ lcm(x, y) : lowest common multiple, = x * y / gcd(x, y). """
+ return (x * y) / gcd(x, y)
+#: Number of random tests to do
+#: Lower bound for random number in IR
+#: Upper bound for random number in IR
+#: Upper bound for random number in IN
+#: Small upper bound for random number in IN
+SMALL_UPPER_BOUND_INT = 30
+#: Tolerance for real values tests to zero
+#: Tolerance for probabilities check to 100%
+#: Standard deviation of the weights
+#: Distribution for weights
+# WEIGHT_DISTRIBUTION = "NORMAL"
+# WEIGHT_DISTRIBUTION = "UNIFORM_0_1" # Useless!
+WEIGHT_DISTRIBUTION = "UNIFORM_-1_1"
+#: Map a domain to a random generator on this domain
+ lambda: np.random.uniform(LOWER_BOUND, UPPER_BOUND),
+ lambda: np.random.uniform(0.0, UPPER_BOUND),
+ lambda: np.random.randint(0, UPPER_BOUND_INT),
+ lambda: np.random.randint(0, SMALL_UPPER_BOUND_INT),
+ lambda: np.random.uniform(-1.0, 1.0)
+def test_kernel(K, N=N, T=T, verb=False):
+ Test if a function is a kernel on lots of random values (500 by default):
+ - test if K(x, y) = K(y, x) (symmetry),
+ - test if K(x, x) = 0 ==> x = 0 (definiteness),
+ - test if sum_ij a_i a_j K(x_i, x_j) >= 0 for 'all' t, and a_i in IR, x_i in DOMAIN (positiveness) (for all i,j in range(t), and t drawn in range(T) randomly).
+ Return numerical values, average of success rate on N = 500 tests.
+ # Get the domain of K and an appropriate random generator.
+ newvalue = random_generators[domain]
+ # Random generator for weights
+ if WEIGHT_DISTRIBUTION == "UNIFORM_0_1":
+ return np.random.rand(t) * WEIGHT_STD
+ if WEIGHT_DISTRIBUTION == "UNIFORM_-1_1":
+ return (2*np.random.rand(t) - 1) * (WEIGHT_STD / 2)
+ if WEIGHT_DISTRIBUTION == "NORMAL":
+ return WEIGHT_STD * np.random.randn(t)
+ # Test for definiteness
+ # Test for positiveness
+ print(" - Random values x = {}, y = {}.".format(x, y))
+ # Test : (A ==> B) <=> (B or not(A))
+ if (not (abs(K(x, x)) < REAL_TOLERANCE)) or (abs(x) > REAL_TOLERANCE):
+ if (not (abs(K(y, y)) < REAL_TOLERANCE)) or (abs(y) > REAL_TOLERANCE):
+ # Generate lots of samples for checking positiveness
+ t = np.random.randint(1, T)
+ X = [newvalue() for i in range(t)]
+ print(" - Random values t = {}, a = {}, X = {}.".format(t, a, X))
+ if sum(a[i] * a[j] * K(X[i], X[j]) for i in range(t) for j in range(t)) >= 0:
+ print(" - After {} random tests, sym = {:>3}, def = {:>3}, pos = {:>3}.".format(N, prop_sym, prop_def, prop_pos))
+ "symmetry": (prop_sym / N),
+ "definiteness": (prop_def / (2*N)),
+ "positiveness": (prop_pos / N),
+# %% Tweak to define a new kernel
+#: List of all kernels to test, dynamically created by @kernel decorator
+def kernel(domain="REAL"):
+ Decorator for a new kernel (just an elegant way to add it to all_kernels).
+ The parameter 'domain' can be:
+ - "REAL" for (-oo,+oo) = IR real number,
+ - "REALPLUS" for [0,+oo) = IR+ positive real number,
+ - "INTEGER" for [|0,+oo) = IN positive integer,
+ - "SMALLINT" for [|0,+oo) = IN (small) positive integer,
+ - "MONEONE" for (-1,+1) = UR the unit open disk of real values.
+ def kernel_decorator(func):
+ Decorator for a new kernel, with a certain domain.
+ return kernel_decorator
+# Classical example of kernel: linear kernel!
+ """ K(x, y) = < x, y > = x * y """
+ """ K(x, y) = 1 / (1 - x * y) """
+# Here we could have an overflow error, so using small integers
+def power_two_dot_k(x, y):
+ """ K(x, y) = 2 ** (x + y) """
+# Here we could have an overflow error, so using small integers
+def power_two_sum_k(x, y):
+ """ K(x, y) = 2 ** (x * y) """
+ """ K(x, y) = log(1 + x * y) """
+ return np.log(1 + x * y)
+ """ K(x, y) = exp(-(x-y)^2) """
+ return np.exp(-(x-y)**2)
+ """ K(x, y) = cos(x + y) """
+ """ K(x, y) = cos(x - y) """
+ """ K(x, y) = min(x, y) """
+ """ K(x, y) = max(x, y) """
+ """ K(x, y) = min(x, y) / max(x, y) """
+ if abs(max(x, y)) < REAL_TOLERANCE:
+ return min(x, y) / max(x, y)
+ """ K(x, y) = gcd(x, y) """
+ """ K(x, y) = lcm(x, y) """
+ """ K(x, y) = gcd(x, y) / lcm(x, y) """
+ if abs(lcm(x, y)) < REAL_TOLERANCE:
+ return gcd(x, y) / lcm(x, y)
+# %% Testing all the kernels
+def main(kernels_to_test=all_kernels, T=T, verb=False):
+ Test a lot of different kernels.
+ print("\nStarting to test {} kernels...".format(nb))
+ for i, K in enumerate(all_kernels):
+ print("\n- Testing the kernel {} {}:".format(K.__name__, K.__doc__))
+ print(" It has domain = {}.".format(K.domain))
+ result = test_kernel(K, N=N, T=T, verb=verb)
+ print(" Results of the N = {} tests:".format(N))
+ for test, rate in result.items():
+ print(" - Tested for {:^12} : success rate {:.2%}.".format(test, rate))
+ if rate < 1.0 - PROBA_TOLERANCE:
+ print(" ===> I guess this kernel {} {} is NOT a positive definite kernel !".format(K.__name__, K.__doc__))
+ not_kernels.append({'K': K, 'test': test, 'rate': rate})
+ # bad_test, min_rate = test, rate
+ print(" Finished for the kernel {}...".format(K.__name__))
+ print("\n\nDone testing some kernels...")
+ return results, not_kernels
+if __name__ == '__main__':
+ print("\nScript to test if functions are kernels, by Lilian Besson (C) 2016")
+ print("MIT Licensed (http://lbesson.mit-license.org)")
+ print("\n\nUsing N = {}, T = {}.".format(N, T))
+ print("For test domains:")
+ print(" - IR ~= [{}, {}],".format(LOWER_BOUND, UPPER_BOUND))
+ print(" - IR+ ~= [ 0, {}],".format(UPPER_BOUND))
+ print(" - IN ~= [ 0, {}],".format(UPPER_BOUND_INT))
+ print(" - IN ~= [ 0, {}] (small to reduce overflow),".format(SMALL_UPPER_BOUND_INT))
+ print("Tolerance for small real values = {}.".format(REAL_TOLERANCE))
+ print("Tolerance for success rates = {}.".format(PROBA_TOLERANCE))
+ if WEIGHT_DISTRIBUTION == "UNIFORM_0_1":
+ print("And sampling the a_i from uniform in [0, {}].".format(WEIGHT_STD))
+ if WEIGHT_DISTRIBUTION == "UNIFORM_-1_1":
+ print("And sampling the a_i from uniform in [-{}, {}].".format(WEIGHT_STD/2, WEIGHT_STD/2))
+ if WEIGHT_DISTRIBUTION == "NORMAL":
+ print("And sampling the a_i from a Gaussian of mean 0 and std {}.".format(WEIGHT_STD))
+ # results, not_kernels = main(kernels_to_test=all_kernels, verb=True)
+ results, not_kernels = main()
+ for item in not_kernels:
+ print("\n- Apparently, the kernel {} {} is NOT a positive definite kernel : because {} was only observed with probability {:.2%}.".format(K.__name__, K.__doc__, test, rate))
+ print("\n- All the other kernel appeared to be positive definite. Now prove it.")
+ print("\nFinished for this script 'test_kernel.py'.")