pythonwise /

#!/usr/bin/env python
'''Playing with genetic algorithms, see

The main idea that the "chromosome" represents variables in our algorithm and we
have a fitness function to check how good is it. For each generation we keep the
best and then mutate and crossover some of them.

Since the best chromosomes move from generation to generation, we cache the
fitness function results.

I'm pretty sure I got the basis for this from somewhere on the net, just don't
remeber where :)

from itertools import starmap
from random import random, randint, choice
from sys import stdout


def mutate_gene(n, range):
    if random() > MUTATE_PROBABILITY:
        return n

    while 1:
        # Make sure we mutated something
        new = randint(range[0], range[1])
        if new != n:
            return new

def mutate(chromosome, ranges):
    def mutate(gene, range):
        return mutate_gene(gene, range)

    while 1:
        new = tuple(starmap(mutate, zip(chromosome, ranges)))
        if new != chromosome:
            return new

def crossover(chromosome1, chromosome2):
    return tuple(map(choice, zip(chromosome1, chromosome2)))

def make_chromosome(ranges):
    return tuple(starmap(randint, ranges))

def breed(population, size, ranges):
    new = population[:]
    while len(new) < size:
        new.append(crossover(choice(population), choice(population)))
        new.append(mutate(choice(population), ranges))

    return new[:size]

def evaluate(fitness, chromosome, data, cache):
    if chromosome not in cache:
        cache[chromosome] = fitness(chromosome, data)

    return cache[chromosome]

def update_score_cache(population, fitness, data, cache):
    for chromosome in population:
        if chromosome in cache:
        cache[chromosome] = fitness(chromosome, data)

def find_solution(fitness, data, ranges, popsize, nruns, verbose=0):
    score_cache = {}
    population = [make_chromosome(ranges) for i in range(popsize)]
    for generation in xrange(nruns):
        update_score_cache(population, fitness, data, score_cache)
        population.sort(key=score_cache.get, reverse=1)
        if verbose:
            best = population[0]
            err = score_cache[best]
            print "%s: a=%s, b=%s, err=%s" % (generation, best[0], best[1], err)

        base = population[:popsize/4]
        population = breed(base, popsize, ranges)

    population.sort(key=score_cache.get, reverse=1)
    return population[0], score_cache[population[0]]

def test(show_graph=1):
    '''Try to find a linear equation a*x + b that is closest to log(x)'''
    from math import log
    xs = range(100)
    data = map(lambda i: log(i+1) * 100, xs)
    def fitness(chromosome, data):
        '''Calculate average error'''
        a, b = chromosome
        def f(x):
            return a * x + b
        values = map(f, xs)
        diffs = map(lambda i: abs(values[i] - data[i]), xs)

        # We want minimal error so return 1/error
        return 1 / (sum(diffs) / len(diffs))

    # Show a nice plot
    (a, b), err = find_solution(fitness, data, ((0, 100), (0, 100)), 10, 100, 1)
    print "best: a=%s, b=%s (error=%s)" % (a, b, err)

    data2 = map(lambda x: a * x + b, range(100))
    if not show_graph:

    import pylab
    l1, l2 = pylab.plot(xs, data, xs, data2)
    pylab.legend((l1, l2), ("log(x+1)", "%s * x + %s" % (a, b)))

if __name__ == "__main__":