Source

gumby / src / gumby / brain.py

Full commit
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
# (C) Copyright 2007-2008 Olivier Grisel
# Author: Olivier Grisel <olivier.grisel@ensta.org>

# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as published
# by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
# 02111-1307, USA.
#
# $Id$
"""The gumby brain: layered set of gumby nodes"""


from pysgd import SvmModel
#from gumby.model import SvmModel

from gumby.history import History
from gumby.odict import OrderedDict
from itertools import izip
from itertools import cycle
import logging
from numpy import array
from numpy import ndindex
from numpy import zeros
from numpy import float32
import random
from time import time


def sample_max(inputs, outputs, max_sample_size):
    sampled = random.sample(zip(inputs, outputs), max_sample_size)
    return [i for i, _ in sampled], [o for _, o in sampled]

def shuffle(inputs, outputs):
    shuffled = zip(inputs, outputs)
    random.shuffle(shuffled)
    return [i for i, _ in shuffled], [o for _, o in shuffled]


class Layer(object):
    """An array of gumby cells sharing the same predictive model"""

    logger = logging.getLogger("gumby.brain.Layer")

    # TODO: make the following properties to update the model size uppon
    # change

    # spatial neighborhood sizes accross all dimensions used for prediction
    # should be a tuple of odd integers (to allow for spatial centering)
    spatial_neighborhood = None

    # temporal neighborhood size used for prediction
    temporal_neighborhood = 3

    model = None

    dtype = float32

    # TODO: replace by a new connector adapter class that can handle all sorts
    # of mapping logics
    _target_layers = ()

    _has_history = False

    def __init__(self, dimensions=(10,), history_size=10,
                 spatial_neighborhood=None, temporal_neighborhood=3):
        self.dimensions = dimensions
        if spatial_neighborhood is None:
            self.spatial_neighborhood = (3,) * len(dimensions)
        else:
            assert len(spatial_neighborhood) == len(dimensions)
            self.spatial_neighborhood = spatial_neighborhood

        assert temporal_neighborhood > 0
        self.temporal_neighborhood = temporal_neighborhood

        self._update_model()

        self.history_size = history_size

        self._surprise_history = History(
            dimensions, self.spatial_neighborhood, history_size)
        self._prediction_history = History(
            dimensions, self.spatial_neighborhood, history_size)
        self._input_history = History(
            dimensions, self.spatial_neighborhood, history_size)

    def reset(self):
        """Set all historical data"""
        self._surprise_history.reset()
        self._prediction_history.reset()
        self._input_history.reset()
        self._has_history = False

    def _update_model(self):
        layers = (self,) + self._target_layers
        model_input_size = 0
        for layer in layers:
            l_model_input_size = reduce(lambda x, y: x * y,
                                        layer.spatial_neighborhood)
            if layer is self:
                l_model_input_size *= layer.temporal_neighborhood
            model_input_size += l_model_input_size

        assert model_input_size != 0, "model input_size cannot be 0"

        if self.model is None or self.model.input_size != model_input_size:
            # TODO: short term: reuse old model characteristics
            # TODO: medium term: make models able to increase
            # there input size without loosing existing support
            # vectors and parameters by padding the SVs with zeros
            #self.model = SvmModel(model_input_size)
            self.model = SvmModel()
            self.model.input_size = model_input_size
            self.model.w = zeros(model_input_size, dtype=self.dtype)
            self.model.bias = 0.0

    def __repr__(self):
        return "Layer(dimensions=%r, history_size=%r)" % (
            self.dimensions, self.history_size)

    def __len__(self):
        number_of_cells = reduce(lambda x, y: x * y,
                                  self.dimensions, 1)
        return number_of_cells * self.history_size

    def add_target_layer(self, target_layer):
        self._target_layers += (target_layer,)
        self._update_model()

    def get_surprise(self, time_index=0):
        """Return the stored surprise array of the given time index"""
        return self._surprise_history[time_index]

    def get_prediction(self, time_index=0):
        """Return the stored prediction array of the given time index"""
        return self._prediction_history[time_index]

    def get_input(self, time_index):
        """Return the stored input array of the given time index"""
        return self._input_history[time_index]

    def predict(self, inputs):
        """Compute the next prediction array

        Each elements is the result of the SVM prediction of the layer model fed
        with the values of the spatial and temporal neighborhood of the element.
        """
        t0 = time()
        if self._has_history:
            self._input_history.record(inputs)
        else:
            # build a fake input history with the current input
            self._input_history.set_all(inputs)
            self._has_history = True

        # compute the new value of the surprise and store it in the first
        # position of the history
        new_surprise = self._prediction_history[0] - inputs
        self._surprise_history.record(new_surprise)

        # create a new array to hold the future predicted values
        new_prediction = zeros(self.dimensions, dtype=self.dtype)

        # build the list of neighborhood inputs formatted for the model for
        # every cell of the layer
        model_inputs = []
        for position in ndindex(self.dimensions):
            model_inputs.append(self._build_model_input(position))

        # call the model with the list of inputs and place the results in
        # their respective position in the prediction array of the layer
        single_predictions = self.model.predict(model_inputs)
        if self.dimensions == (1,):
            single_predictions = single_predictions.reshape(self.dimensions)
        for pos, pred in izip(ndindex(self.dimensions), single_predictions):
            new_prediction[pos] = pred

        # record the new prediction in the layer history and return it
        # to the caller
        self._prediction_history.record(new_prediction)
        self.logger.info("prediction on layer %d [%0.3fs]", id(self), time()-t0)
        return new_prediction

    def get_squared_prediction_error(self, t=0):
        """Normalized squared prediction error of the layer"""
        surprise = self._surprise_history[t]
        return (surprise ** 2).mean()

    def get_mse(self):
        """Averaged squarred prediction error on all historical data"""
        return (array(list(self._surprise_history)) ** 2).mean()

    def train(self, find_parameters=False, max_sample_size=None,
              do_shuffle=True, **kw):
        """Train the embedded model against the layer historical data

        ``find_parameters``:
            if True, use cross validation to perform parameter
            estimation

        ``max_sample_size``:
            if not None and positive integer, randomly sample a subset with
            the given size to feed it to the learner so as to ensure bounded
            training time for instance by trading it for optimization error

        ``do_shuffle``:
            if True, shuffle the data set before feeding it to the model (some
            models such as only stochastic learners do not like datasets with
            high sequential dependency)

            TODO: verify this claim in a test case

        ``nr_folds``:
            number of folds to use in cross validations performed during
            parameters estimation
        """
        t0 = time()
        # collect data to build the training set
        m_inputs, m_outputs = self.build_training_set(
            max_sample_size, do_shuffle, **kw)

        # perform the actual model training
        if find_parameters:
            self.model.find_parameters(m_inputs, m_outputs, **kw)
        self.model.train(m_inputs, m_outputs, epochs=3,
                         compute_loss=True, stopping_tol=1e-4)
        self.logger.info("training on layer %d on %d data points [%0.3fs]",
                         id(self), len(m_inputs), time()-t0)

    def build_training_set(self, max_sample_size=None, do_shuffle=True, **kw):
        """Train build the training set out of the recorded histories

        ``find_parameters``:
            if True, use cross validation to perform parameter
            estimation

        ``nr_folds``:
            number of folds to use in cross validations performed during
            parameters estimation
        """
        m_outputs = []
        m_inputs = []
        for t in xrange(self.history_size - self.temporal_neighborhood - 1):
            for position in ndindex(self.dimensions):
                m_outputs.append(self._input_history[t][position])
                m_inputs.append(self._build_model_input(
                    position, time_origin=t + 1))

        if max_sample_size is not None and len(m_inputs) > max_sample_size:
            self.logger.info("sampling %d items out of %d", max_sample_size,
                              len(m_inputs))
            m_inputs, m_outputs = sample_max(
                m_inputs, m_outputs, max_sample_size)
            # no need to shuffle after sampling
            do_shuffle = False

        # some learning algorithms (e.g. stochastic gradient descent) better
        # perform with shuffled training set
        if do_shuffle:
            m_inputs, m_outputs = shuffle(m_inputs, m_outputs)

        return m_inputs, m_outputs

    def _build_model_input(self, position, time_origin=0):
        """Helper method to build an input vector for the model"""
        model_input = zeros(self.model.input_size, dtype=self.dtype)
        spatial_offset = len(model_input) / (self.temporal_neighborhood +
                                             len(self._target_layers))

        # local spatial neighborhoods for each time index in the local
        # temporal neighborhood
        for t in xrange(self.temporal_neighborhood):
            n = self._input_history.get_neighborhood(position, time_origin+t)
            model_input[t*spatial_offset:(t+1)*spatial_offset] = n.flat

        # last spatial neighborhood for each target layer
        i0 = self.temporal_neighborhood
        for i, layer in enumerate(self._target_layers):
            n = layer._prediction_history.get_neighborhood(
                position, time_origin)
            model_input[(i0+i)*spatial_offset:(i0+i+1)*spatial_offset] = n.flat

        return model_input


class Brain(object):
    """The gumby brain: layered set of gumby cells"""

    train_max_sample_size = None

    logger = logging.getLogger("gumby.brain.Brain")

    def __init__(self):
        self.layers = OrderedDict()
        self._source_layer = OrderedDict()
        self._target_layers = OrderedDict()
        self._invalidate_rank_caches()

    def reset(self):
        """Set all historical data in layers back to zero"""
        for layer in self.layers:
            layer.reset()

    def _invalidate_rank_caches(self):
        self.input_layers = []
        self._cached_ranked_layers = None

    #
    # Methods to introspect the brain structure
    #

    def __len__(self):
        return len(self.layers)

    def __iter__(self):
        return self.layers.iterkeys()

    def __contains__(self, layer):
        return layer in self.layers

    def get_size(self):
        """Total number of cells in the brain"""
        return reduce(lambda x, y: x + y, (len(l) for l in self.layers))

    def get_source_layer(self, layer):
        return self._source_layer.get(layer)

    def get_target_layers(self, layer):
        return self._target_layers.get(layer) or []

    def get_input_layers(self):
        return [l for l in self if self.get_source_layer(l) is None]

    #
    # Methods to build the brain structure
    #

    def add_layer(self, layer=None, dimensions=(10,), history_size=10,
                 spatial_neighborhood=None, temporal_neighborhood=3):
        """Add a new layer to the brain; creating if necessary"""

        assert layer not in self, "%r already in brain" % layer

        # invalidate the ranked nodes caches
        self._invalidate_rank_caches()

        # register the layer in the complete list of layers of the brain
        if layer is None:
            layer = Layer(dimensions, history_size=history_size,
                          spatial_neighborhood=spatial_neighborhood,
                          temporal_neighborhood=temporal_neighborhood)

        self.layers[layer] = 1
        self._source_layer[layer] = None
        self._target_layers[layer] = []
        return layer

    def connect(self, source_layer, target_layer):
        """Add a link between two layers"""
        self._source_layer[target_layer] = source_layer
        tl = self._target_layers.setdefault(source_layer, [])
        tl.append(target_layer)
        source_layer.add_target_layer(target_layer)

    #
    # Methods to perform inference on a trained brain
    #

    def get_ranked_layers(self):
        """Collect ranked layers according to their topology

        Source / bottom / input layers come first.
        """
        # TODO: order the layers according to connection topology
        return list(self.layers)

    def predict(self, inputs):
        """Set the input on the source layers and perform inference

        ``inputs`` should be a list of arrays matching the dimensions of the
        input layers.

        Each layer predict tries to predict the next state surprise of the
        layer below (or of the next brain input) while using the
        last predictions of the layers above.

        The prediction of the input layers are returned as an a array.
        """
        for layer in self.get_ranked_layers():
            t0 = time()
            source_layer = self._source_layer.get(layer)
            if source_layer is None:
                layer.predict(inputs.pop())
            else:
                layer.predict(source_layer.get_surprise(0))
        return self.get_prediction()

    def get_prediction(self, time_index=0):
        """Read the prediction on the input layers

        ``time_index == 0`` points to the last prediction
        """
        return [l.get_prediction(time_index) for l in self.get_input_layers()]

    def get_surprise(self, time_index=0):
        """Read the prediction on the input layers

        ``time_index == 0`` points to the last surprise
        """
        return [l.get_surprise(time_index) for l in self.get_input_layers()]

    def get_squared_prediction_error(self):
        """Return the sum of the squared prediction errors of the layers"""
        return array([l.get_squared_prediction_error() for l in self]).sum()

    #
    # Methods to train the models
    #

    def train(self, do_parameters=False):
        """Naive training method that iterates over the ranked layers"""
        # TODO: rewrite this method to be able to use a dataset as tracking
        # inputs to refill historical data after each layer training step
        for layer in self.get_ranked_layers():
            layer.train(
                do_parameters, max_sample_size=self.train_max_sample_size)

    def get_training_generator(self, do_parameters=False):
        """Naive training generator method that cycles over the ranked layers"""
        for layer in cycle(self.get_ranked_layers()):
            yield layer.train(
                do_parameters, max_sample_size=self.train_max_sample_size)


class MyBrainHurts(Exception):
    """Exception raised when the structure of the brain is fucked up"""
    pass