Commits

Olivier Grisel committed 898a220

make it possible to store the training set of a 1 layer experiment for later usage

Comments (0)

Files changed (1)

experiments/xp_movie_prediction.py

 import os
 import sys
+from cPickle import dump
+from cPickle import load
 from gumby.brain import Brain
 from gumby.adapter.movie import Movie
 from gumby.adapter.movie import load_movie
 logging.config.fileConfig("log.ini")
 
 
-TRAINING_CYCLES = 2
+CYCLES = 2
 
 if len(sys.argv) > 1:
     MOVIES_DIRECTORY = sys.argv[1]
 else:
     MOVIES_DIRECTORY = os.path.join(os.path.dirname(__file__), "sample-movies2")
 
-movies = [load_movie(os.path.join(MOVIES_DIRECTORY, fn))
-          for fn in os.listdir(MOVIES_DIRECTORY) if fn.endswith(".pickle")]
-BATCH_SIZE = sum(len(m) for m in movies)
-BATCH_SIZE = 10
+INPUTS_FILENAME = os.path.join(MOVIES_DIRECTORY, 'dataset_input.pickle')
+OUTPUTS_FILENAME = os.path.join(MOVIES_DIRECTORY, 'dataset_output.pickle')
 
-if not movies:
-    print ("unable to find sample movies in directory '%s', please run "
-           "xp_movie2.py first" % MOVIES_DIRECTORY)
-    sys.exit(1)
+if not os.path.exists(INPUTS_FILENAME) or not os.path.exists(OUTPUTS_FILENAME):
 
+    movies = [load_movie(os.path.join(MOVIES_DIRECTORY, fn))
+              for fn in os.listdir(MOVIES_DIRECTORY) if fn.endswith(".pickle")]
+    BATCH_SIZE = sum(len(m) for m in movies)
 
-b = Brain()
-l0 = b.add_layer(dimensions=(40, 30), temporal_neighborhood=5,
-                 spatial_neighborhood=(5, 4), history_size=BATCH_SIZE + 5)
+    if not movies:
+        print ("unable to find sample movies in directory '%s', please run "
+               "xp_movie2.py first" % MOVIES_DIRECTORY)
+        sys.exit(1)
 
-ma = MovieAdapter(movies, brain=b, dimensions=(40, 30))
+    b = Brain()
+    l0 = b.add_layer(dimensions=(40, 30), temporal_neighborhood=5,
+                     spatial_neighborhood=(5, 4), history_size=BATCH_SIZE + 5)
 
-for i in xrange(TRAINING_CYCLES):
-    print "## training cycle #%d" % i
-    ma.train()
-    print l0.get_mse()
-    ma.plot_mse("raw_mse_global_evolution.png", movavg_period=30)
-    ma.plot_mse("raw_mse_last_100_detailed.png", only_last=100)
-    #ma.plot_mse_gain_over_identity("gain_mse.png")
+    ma = MovieAdapter(movies, brain=b, dimensions=(40, 30))
 
+    # load the content of the movie data into a single layered brain so as to
+    # use it's API to build training data suitable for evaluating algorithms
+    for i in xrange(CYCLES):
+        ma.next()
+        #print "## training cycle #%d" % i
+        #ma.train()
+        #print l0.get_mse()
+        #ma.plot_mse("raw_mse_global_evolution.png", movavg_period=30)
+        #ma.plot_mse("raw_mse_last_100_detailed.png", only_last=100)
+        #ma.plot_mse_gain_over_identity("gain_mse.png")
 
+    # dump the data set on disk so as to be able to quickly reuse it later
+    # without going through brain adapting again
+    i, o = l0.build_training_set()
+    print "saving " + INPUTS_FILENAME
+    dump(i, file(INPUTS_FILENAME, 'wb'))
+
+    print "saving " + OUTPUTS_FILENAME
+    dump(o, file(OUTPUTS_FILENAME, 'wb'))
+
+else:
+    # reuse previously saved datasets
+    print "loading " + INPUTS_FILENAME
+    i = load(file(INPUTS_FILENAME))
+
+    print "loading " + OUTPUTS_FILENAME
+    o = load(file(OUTPUTS_FILENAME))
+