Commits

Matteo Bertini committed da04e72

Remove old stuff

Comments (0)

Files changed (69)

old/arima.py

-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright 2009 by Matteo Bertini
-
-from __future__ import division
-from __future__ import absolute_import
-
-try:
-    import rpy2.robjects as R
-    r = R.r
-except:
-    print "Unable to import rpy!!!"
-
-from .core import *
-
-
-class LearnARIMA(Node):
-    def __init__(self, train_timeseries, err, order, seasonal=False):
-        r.library('forecast')
-        self.train_timeseries = train_timeseries
-        self.err = err
-        self.order = r.c(*order)
-        if seasonal:
-            self.seasonal = r.list(order=r.c(*seasonal['order']), period=seasonal['period'])
-        else:
-            self.seasonal = seasonal
-        filename = mkdir(train_timeseries.name.replace('TimeSeries', 'LearnARIMA').replace('.gz', '.'+key(order, seasonal)+'.Rdata'))
-        super(LearnARIMA, self).__init__(filename, [train_timeseries])
-    def _run(self):
-        freq = int(60*24*7/self.train_timeseries.agg)
-        train_data = self.train_timeseries.load().data
-        rdata = r.ts(r.c(*train_data), freq=freq)
-        if self.seasonal:
-            modelt = r.Arima(rdata,
-                             order=self.order,
-                             seasonal=self.seasonal)
-        else:
-            modelt = r.Arima(rdata,
-                             order=self.order)
-        R.globalEnv['modelt'] = modelt
-        r("save(modelt, file={0!r})".format(self.name))
-    def _load(self):
-        r.load(self.name)
-        return r.modelt
-
-
-
-def error_ARIMA(train_timeseries, test_timeseries, err, seasonal=False):
-    freq = int(60*24*7/train_timeseries.agg)
-    train_data = train_timeseries.load().data
-    test_data = test_timeseries.load().data
-    rdata = r.c(*list(np.hstack([train_data, test_data])))
-    rdata = r.c(*train_data)
-    rdata = r.ts(rdata, freq=freq)
-    train_seq = r.seq(1,len(train_data))
-    test_seq = r.seq(len(train_data)+1,len(train_data)+len(test_data))
-    if seasonal:
-        arima = LearnARIMA(train_timeseries, err, order=(1,0,1), seasonal=dict(order=(0,1,1), period=96))
-        modelt = arima.load()
-        modelf = r.Arima(rdata,
-                         order=arima.order,
-                         seasonal=arima.seasonal,
-                         model=modelt)
-    else:
-        arima = LearnARIMA(train_timeseries, err, order=(3,1,3))
-        modelt = arima.load()
-        modelf = r.Arima(rdata,
-                         order=arima.order,
-                         model=modelt)
-    print seasonal, r.accuracy(modelf)
-    outofsample = r.ts(r.fitted(modelf).subset(test_seq))
-    return error_test(train_timeseries.agg, err)(np.array(list(rdata.subset(test_seq))), np.array(list(outofsample)))
-
-
-if __name__ == "__main__":
-    import sys
-    args = sys.argv[1:]
-    if "__file__" in globals():
-        if "eval" in args[:1]:
-            eval(" ".join(args[1:]))
-        else:
-            print "Running %r doctests..." % __file__
-            import doctest
-            doctest.testmod()
-            print "DONE!"
-
-

old/arma.py

-from __future__ import division
-
-import numpy as np
-import pylab as pl
-import scipy.optimize as opt
-from scipy.signal import lfilter
-import rpy2.robjects as R
-import rpy2.robjects.numpy2ri
-r = R.r
-
-# xpl to python translation from http://fedc.wiwi.hu-berlin.de/xplore.php
-
-def genar(innov, startx, phi):
-    """
-    >>> innov = [0.1, 0.2, 0.3, 0.2, 0.1]
-    >>> a = 0.5
-    >>> b = 0.3
-    >>> genar(innov, a, b)
-    array([ 0.5    ,  0.35   ,  0.405  ,  0.3215 ,  0.19645])
-    """
-    innov = np.hstack([innov])
-    startx = np.hstack([startx])
-    phi = np.hstack([phi])
-    assert startx.shape == phi.shape
-    p = len(phi)
-    n = len(innov)
-    y = np.zeros(innov.shape)
-    y[:p] = startx
-    for t in range(p, n):
-        #y[t] = innov[t] + sum(phi[i]*y[t-i-1] for i in range(p))
-        y[t] = innov[t] + np.dot(phi, y[t-p:t][::-1])
-    return y
-
-def genarma(ar, ma, eps):
-    x = r.filter(eps, np.hstack([1, ma]), sides=1)
-    x = np.array(r.filter(x, np.hstack([ar]), method='recursive'))
-    x[-np.isfinite(x)] = 0.0
-    return x
-
-def _genarma(phi, theta, eps):
-    """
-    >>> innov = [0.1, 0.2, 0.3, 0.2, 0.1]
-    >>> a = 0.5
-    >>> b = 0.3
-    >>> _genarma(a, b, innov)
-    array([ 0.1 ,  0.28,  0.5 ,  0.54,  0.43])
-    >>> lfilter([1, b], [1, -a], innov)
-    array([ 0.1 ,  0.28,  0.5 ,  0.54,  0.43])
-    """
-    phi = np.hstack([phi])
-    theta = np.hstack([theta])
-    eps = np.hstack([eps])
-
-    a = np.hstack([1, -phi])
-    b = np.hstack([1, theta])
-    return lfilter(b, a, eps)
-
-def __genarma(phi, theta, eps):
-    p = len(phi)
-    q = len(theta)
-    T = len(eps)
-    y = np.zeros(eps.shape)
-    ma = np.array(eps)
-    y[0] = eps[0]
-    m = max(p,q)
-    # start values
-    for i in range(m):
-        ma[i+1] = eps[i+1] + np.dot(theta[0:min(i+1,q)], eps[max(0,i-q+1):i+1][::-1])
-        y[i+1] = ma[i+1] + np.dot(phi[0:min(i+1,p)], y[max(0,i-p+1):i+1][::-1])
-    # moving average
-    for j in range(q):
-        ma[m+1:T] += theta[j] * eps[m-j:T-j-1]
-    return genar(ma, y[:p], phi)
-
-def armapq(zz, p, q, innov=False):
-    def _armapq(a):
-        assert len(a) == p+q
-        phi = a[:p]
-        theta = a[p:]
-        u = genarma(-theta, -phi, zz)
-        if innov:
-            return u
-        else:
-            ssr = np.dot(u,u)
-            return ssr
-    return _armapq
-
-def armacls(x, p, q, parms=None, maxiter=250, eps=1e-7):
-    n = len(x)
-    assert (p or q)
-    if parms == None:
-        parms = np.random.normal(1.0/(p+q), size=(p+q))
-    #best = opt.fmin(armapq(x, p, q), parms, maxiter=maxiter, ftol=eps)
-    #best = opt.fmin_powell(armapq(x, p, q), parms, maxiter=maxiter, ftol=eps)
-    best = opt.leastsq(armapq(x, p, q, innov=True), parms, maxfev=maxiter, ftol=eps)[0]
-    #best = opt.fmin_bfgs(armapq(x, p, q), parms, maxiter=maxiter, gtol=eps)
-    #best = opt.fmin_cg(armapq(x, p, q), parms, maxiter=maxiter, gtol=eps)
-    #best = opt.fmin_l_bfgs_b(armapq(x, p, q), parms, approx_grad=1, maxfun=maxiter, pgtol=eps)[0]
-    #best = opt.fmin_tnc(armapq(x, p, q), parms, approx_grad=1, maxfun=maxiter, pgtol=eps)[0]
-    ssr = armapq(x, p, q)(best)
-    wnv = ssr/(n-p-q)
-    return best, wnv
-
-def demo(n=500):
-    np.random.seed(0)
-    eps = np.random.normal(1.0, size=n)
-    #eps = np.array([0.1, 0.2, 0.3, 0.2, 0.1])
-    x = genarma([0.5], [0.3], eps)
-
-    def evaluate(x):
-        np.random.seed()
-        parms, wnv = armacls(x, 1, 1)#, np.array([0.5, 0.3]))
-        return parms, wnv
-
-    evs = [evaluate(x) for i in range(10)]
-    for e in evs[:10]:
-        print e
-    pl.plot([e[0] for e in evs])
-    pl.show()
-    return locals()
-
-# untested stuff ##################################
-
-def rfilter():
-    """
-    Note that there is an implied coefficient 1 at lag 0
-    in the recursive filter, which gives
-    y[i] = x[i] + f[1]*y[i-1] + ... + f[p]*y[i-p]
-    """
-
-def arima_sim(innov, start_innov, (phi, d, psi)):
-    phi = np.hstack([phi])
-    psi = np.hstack([psi])
-    innov = np.hstack([innov])
-    start_innov = np.hstack([start_innov])
-    p = len(phi)
-    q = len(psi)
-    if p:
-        minroots = np.min(np.abs(np.roots(np.hstack([1, -phi]))))
-        if minroots <= 1:
-            print ValueError("'ar' part of model is not stationary!")
-    if len(start_innov) < p + q:
-        print ValueError("burn-in 'start_innov' must be as long as 'ar + ma'")
-    if (d != int(d) or d < 0):
-        print ValueError("number of differences must be a positive integer")
-    x = innov
-    x = lfilter(np.hstack([1, phi]), np.hstack([1, psi]), innov, zi=start_innov[-max(p,q):])[0]
-    if d > 0:
-        x = diffinv(x, np.zeros(d))
-    return x
-
-def arma_loglik(x, p, s=1):
-    n = len(x)
-    ss = n/2*np.log(2*np.pi*s**2) + 1/2*np.log(s**2)
-    ar = lambda t: sum(p[j]*p[t-j] for j in range(t))
-    ma = 1/(2*s**2) * sum((x[t] - ar(t))**2 for t in range(n))
-    return -ss -ma
-
-def diffinv(x, n, x0=None):
-    """
-    >>> diffinv(np.diff([1,2,3,5]), 1)
-    array([ 1.,  2.,  3.,  5.])
-    """
-    x = np.hstack([x])
-    if x0 == None:
-        x0 = np.zeros(n)
-    else:
-        x0 = np.hstack([x0])
-    if len(x0) != n:
-        raise ValueError("len(x0) != n, {0} != {1}".format(len(x0), n))
-    if len(x0) == 1:
-        res = np.hstack([x0, np.zeros(x.shape)])
-        for i in range(len(x)):
-            res[i+1] = res[i] + x[i]
-        return res
-    else:
-        return diffinv(diffinv(x, n-1, x0[1:]), 1, x0[:1])
-

old/arma2.py

-import numpy as np
-
-def ar(phi, w, t):
-    p = len(phi)
-    return w[t+1] + sum(phi[j]*w[t-j] for j in range(p))
-
-def ma(psi, x, t):
-    q = len(psi)
-    return x[t+1] + sum(psi[j]*x[t-j] for j in range(q))
-
-def demo(phi, psi, w=None):
-    if w == None:
-        w = np.random.normal(phi, size=100)
-    x = np.zeros(100)
-    for i in range(100-1):
-        x[i+1] = ar(phi, w, i)
-        w[i+1] = ma(psi, x, i)
-    return x, w
-
-
-def wn(x, w, theta):
-    cache = {}
-    n = len(x)
-    q = len(theta)
-    def _wn(t):
-        if t in cache:
-            return cache[t]
-        if t <= 0 or t > n:
-            return 0
-        if t == 1:
-            wt = x[t]
-        else:
-            wnn = [_wn(t-k) for k in range(1,q+1)]
-            wt = x[t] - np.dot(theta[:len(wnn)], wnn)
-        cache[t] = wt
-        return wt
-    return _wn
-
-def xn(x, wnt, phi):
-    cache = {}
-    n = len(x)
-    p = len(phi)
-    def _xn(t):
-        if t in cache:
-            return cache[t]
-        if t <= 0:
-            return 0
-        elif 1 <= t <= n:
-            xt = x[t]
-        else:
-            xnn = [x[t-k] for k in range(1,p+1)]
-            wnn = [wnt(t-k) for k in range(1,q+1)]
-            xt = np.dot(phi[:len(xnn)], xnn) + np.dot(theta[:len(wnn)], wnn)
-        cache[t] = xt
-        return xt
-    return _xn
-
-size = 10
-x = np.random.normal(size=size)#np.sin(np.linspace(0, 2*np.pi, size))
-w = np.random.normal(size=size)
-theta = np.array([0.5, 0.5])
-phi = np.array([0.5, 0.5])
-wt = wn(x, w, theta)
-xt = xn(x, wt, phi)
-for t in range(10):
-    wtt = wt(t)
-    x[t] = xt(t)
-    print t, x
-

old/armonia-sync.py

-#!/bin/env python
-
-import os
-import sys
-import time
-import signal
-signal.signal(signal.SIGINT, signal.SIG_DFL)
-
-from PyQt4.Qt import *
-
-
-config = {os.path.abspath('.'): 'unison -batch -path timeseries'}
-
-class QDirSync(QObject):
-    def __init__(self, config):
-        self.config = config
-        self.fswatcher = QFileSystemWatcher(config.keys())
-        self.connect(self.fswatcher, SIGNAL('directoryChanged(const QString &)'), self.sync)
-        self.connect(self.fswatcher, SIGNAL('fileChanged(const QString &)'), self.sync)
-        super(QDirSync, self).__init__()
-        print "watched dirs", [d for d in self.fswatcher.directories()]
-        print "watched files", [f for f in self.fswatcher.files()]
-    def sync(self, path):
-        print time.time(), "sync", path
-
-
-if __name__ == "__main__":
-    app = QCoreApplication(sys.argv)
-    watcher = QDirSync(config)
-    sys.exit(app.exec_())

old/best_params.py

-import optfunc
-
-@optfunc.main
-def best_params(filename, score='acc'):
-    data = {}
-    execfile(filename, {}, data)
-
-    folds = set(x['fold'] for x in data['gridsearch'])
-
-    for fold in folds:
-        selected = [s for s in data['gridsearch'] if s['fold'] == fold]
-        best = sorted(selected, key=lambda x: x[score])[-1]
-        print best

old/bestn.py

-import sys
-
-bestn = None
-for line in sys.stdin:
-    if '#' in line:
-        if bestn != None:
-            sys.stdout.write(' '+bestn)
-        bestn = line
-    else:
-        sys.stdout.write(line.strip().split('\t')[-1])
-sys.stdout.write(' '+bestn)

old/col_stats.sh

-echo images/day-716084-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-716084-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-716091-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-716091-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-716092-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-716092-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-716875-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-716875-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717087-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717087-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717093-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717093-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717095-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717095-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717110-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717110-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717119-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717119-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717123-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717123-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717125-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717125-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717322-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717322-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717355-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717355-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717357-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717357-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-717954-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-717954-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-718127-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-718127-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-759576-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-759576-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py
-echo images/day-763468-speed-20080301-20081001-workdays-a15-s15-t50.txt; cat images/day-763468-speed-20080301-20081001-workdays-a15-s15-t50.txt | python sandbox/column_stats.py

old/conditional_arma.py

-
-import numpy as np
-import scipy.optimize as opt
-from scipy.signal import lfilter
-
-def genarma(phi, theta, eps):
-    """
-    >>> innov = [0.1, 0.2, 0.3, 0.2, 0.1]
-    >>> a = 0.5
-    >>> b = 0.3
-    >>> genarma(a, b, innov)
-    array([ 0.1 ,  0.28,  0.5 ,  0.54,  0.43])
-    """
-    phi = np.hstack([phi])
-    theta = np.hstack([theta])
-    eps = np.hstack([eps])
-
-    a = np.hstack([1, -phi])
-    b = np.hstack([1, theta])
-    return lfilter(b, a, eps)
-
-def demo_arma():
-    print "demo_arma"
-    eps0 = np.random.normal(0.1, size=2000)
-    phi = [0.5]
-    theta = [0.3]
-
-    y = genarma(phi, theta, eps0)
-    eps = np.zeros(y.shape)
-    p = len(phi)
-    q = len(theta)
-    bootstrap = len(eps)/10 + max(p,q)
-
-    def sqerr(args, p=p, q=q, y=y, eps=eps, bootstrap=bootstrap):
-        _phi = args[:p][::-1]
-        _theta = args[p:][::-1]
-        for t in xrange(max(p,q), len(eps)):
-            eps[t] = y[t] - np.dot(y[t-p:t], _phi) - np.dot(eps[t-q:t], _theta)
-        return np.dot(eps[bootstrap:], eps[bootstrap:]) / len(eps[bootstrap:])
-
-    args = np.array(phi+theta)
-    print args
-    print sqerr(args)
-    print "optimize"
-    best = opt.fmin(sqerr, [0.5]*2)
-    print best
-
-def ssqerr(ar, ma, y, eps, bootstrap):
-    assert len(y) == len(eps)
-    def _ssqerr(args, innov=False):
-        assert len(args) == len(ar) + len(ma)
-        _ar = dict(zip(ar, args[:len(ar)]))
-        _ma = dict(zip(ma, args[len(ar):]))
-        for t in xrange(max(max(ar or [0]), max(ma or [0])), len(eps)):
-            eps[t] = y[t] - np.sum(y[t-i]*_ar[i] for i in _ar) - np.sum(eps[t-i]*_ma[i] for i in _ma)
-        if not innov:
-            sigma = np.var(eps)
-            T = len(eps[bootstrap:])
-            ssr = np.dot(eps[bootstrap:], eps[bootstrap:])
-            return T * np.log(sigma) + ssr / (2*sigma**2) 
-            return np.dot(eps[bootstrap:], eps[bootstrap:]) / len(eps[bootstrap:])
-        else:
-            return eps
-    return _ssqerr
-
-def arima_squared_error(y, eps, order, sorder, period, bootstrap=None):
-    assert len(y) == len(eps)
-    if bootstrap == None:
-        bootstrap = len(eps)/10 + max(order) + max(sorder)*period
-    def _ssqerr(args, innov=False):
-        assert len(args) == order[0]+order[-1] + sorder[0]+sorder[-1]
-        ar = dict(zip(range(1,order[0]+1)+list(np.arange(1,sorder[0]+1)*period), args[:order[0]+sorder[0]]))
-        ma = dict(zip(range(1,order[-1]+1)+list(np.arange(1,sorder[-1]+1)*period), args[order[0]+sorder[0]:]))
-        if np.random.normal() > 0.9:
-            print ar
-            print ma
-        if (order[1],sorder[1]) == (0,0):
-            D = lambda i: y[i]
-        elif (order[1],sorder[1]) == (1,0): # (1 - B)
-            D = lambda i: y[i] - y[i-1]
-        elif (order[1],sorder[1]) == (0,1): # (1 - B^s)
-            D = lambda i: y[i] - y[i-period]
-        elif (order[1],sorder[1]) == (1,1): # (1 - B)(1 - B^s) = (1 - B - B^s + B^(s+1))
-            D = lambda i: y[i] - y[i-1] - y[i-period] + y[i-period-1]
-        for t in xrange(max(max(order[0],order[-1]), max(sorder[0], sorder[-1])*period) + order[1] + sorder[1]*period, len(eps)):
-            eps[t] = D(t) - np.sum(D(t-i)*ar[i] for i in ar) - np.sum(eps[t-i]*ma[i] for i in ma)
-            if not np.isfinite(eps[t]):
-                eps[t] = 0
-        reps = np.ma.masked_values(eps, 0)
-        if not innov:
-            T = len(reps[bootstrap:])
-            ssr = np.ma.dot(reps[bootstrap:], reps[bootstrap:])
-            sigma2 = ssr/T
-            return T / 2 * np.log(sigma2) + ssr / (2*sigma2) 
-        else:
-            return reps
-    return _ssqerr
-
-def demo_sarma():
-    print "demo_sarma"
-    phi = [0.5]+[0.]*9+[0.1]
-    theta = [0.3]+[0.]*9+[0.6]
-    y = genarma(phi, theta, eps0)
-    eps = np.zeros(y.shape)
-    ar = (1,11)
-    ma = (1,11)
-    bootstrap = len(eps)/10 + max(max(ar or [0]), max(ma or [0]))
-    args = np.array([phi[i-1] for i in ar]+[theta[i-1] for i in ma])
-    print args
-    print ssqerr(ar, ma, y, eps, bootstrap)(args)
-    print "optimize"
-    best = opt.fmin(ssqerr(ar, ma, y, eps, bootstrap), [0.5]*4)
-    print best
-
-import pems.dataset
-
-flow1 = pems.dataset.TimeSeries(717954, 'flow', '20080301-20080401', 15, 15).load().data[1:-1]
-flow2 = pems.dataset.TimeSeries(717954, 'flow', '20080401-20080501', 15, 15).load().data[1:-1]
-for i in range(1, len(flow1)):
-    if not np.isfinite(flow1[i]):
-        if np.isfinite(flow1[i-1]):
-            flow1[i] = flow1[i-1]
-flow2[-np.isfinite(flow2)] = flow2[np.isfinite(flow2)].mean()
-y = np.diff(flow1)
-y_test = np.diff(flow2)
-"""
-y = flow1
-y_test = flow2
-"""
-eps = np.zeros(y.shape)
-eps_test = np.zeros(y_test.shape)
-ar = (1,7*24*60/15)
-ma = (1,7*24*60/15)
-bootstrap = len(eps)/10 + max(max(ar or [0]), max(ma or [0]))
-
-errf = ssqerr(ar, ma, y, eps, bootstrap)
-errf_test = ssqerr(ar, ma, y_test, eps_test, bootstrap)
-
-flow1 = pems.dataset.TimeSeries(717954, 'flow', '20080301-20080401', 15, 15).load().data
-flow2 = pems.dataset.TimeSeries(717954, 'flow', '20080401-20080501', 15, 15).load().data
-eps1 = np.zeros(flow1.shape)
-eps2 = np.zeros(flow2.shape)
-errf1 = arima_squared_error(flow1, eps1, order=(1,0,1), sorder=(0,1,1), period=7*24*60/15)
-errf2 = arima_squared_error(flow2, eps2, order=(1,0,1), sorder=(0,1,1), period=7*24*60/15)

old/count_column.py

-import sys
-from collections import defaultdict
-import pyximport
-pyximport.install()
-import count_column_c
-
-def count_column(lines, cols):
-    counts = defaultdict(int)
-    for line in lines:
-        if line.startswith('|'):
-            continue
-        row = [c.strip() for c in line.split(",")]
-        cols = [(i if i>=0 else len(row)+i) for i in cols]
-        key = ','.join([c for i,c in enumerate(row) if i in cols])
-        counts[key] += 1
-    for key in sorted(counts):
-        yield "{key}: {count}\n".format(key=key, count=counts[key])
-
-if __name__ == '__main__':
-    if len(sys.argv) != 2:
-        print "Usage:"
-        print "  cat file | count_column cols"
-        sys.exit(1)
-    cols = [int(x) for x in sys.argv[1].split(',')]
-    #sys.stdout.writelines(count_column(sys.stdin, cols))
-    count_column_c.count_column(sys.stdin, cols)

old/count_column_c.pyx

-#cython: infer_types=True
-
-import sys
-from collections import defaultdict
-
-
-def count_column(lines, cols):
-    counts = defaultdict(int)
-    for line in lines:
-        if line.startswith('|'):
-            continue
-        row = [c.strip() for c in line.split(",")]
-        cols = [(i if i>=0 else len(row)+i) for i in cols]
-        key = ','.join([c for i,c in enumerate(row) if i in cols])
-        counts[key] += 1
-    for key in sorted(counts):
-        sys.stdout.write("{key}: {count}\n".format(key=key, count=counts[key]))
-

old/do_benchmarks_delta_ssamm.sh

-PYTHONPATH=. python2.6 sandbox/ssamm_report.py eval "do_benchmarks_flow(None, delta=delta)" 
-PYTHONPATH=. python2.6 sandbox/ssamm_report.py eval "do_benchmarks_flow(None, delta=delta*2)"
-PYTHONPATH=. python2.6 sandbox/ssamm_report.py eval "do_benchmarks_flow(None, delta=delta*3)"
-
-PYTHONPATH=. python2.6 sandbox/ssamm_report.py eval "do_benchmarks_traveltime(None, delta=delta)" 
-PYTHONPATH=. python2.6 sandbox/ssamm_report.py eval "do_benchmarks_traveltime(None, delta=delta*2)"
-PYTHONPATH=. python2.6 sandbox/ssamm_report.py eval "do_benchmarks_traveltime(None, delta=delta*3)"

old/doc/Traffic-Forecast

-\documentclass[10pt,a4paper,english]{article}
-\usepackage{babel}
-\usepackage{ae}
-\usepackage{aeguill}
-\usepackage{shortvrb}
-\usepackage[latin1]{inputenc}
-\usepackage{tabularx}
-\usepackage{longtable}
-\setlength{\extrarowheight}{2pt}
-\usepackage{amsmath}
-\usepackage{graphicx}
-\usepackage{color}
-\usepackage{multirow}
-\usepackage{ifthen}
-\usepackage[DIV12]{typearea}
-% generated by Docutils <http://docutils.sourceforge.net/>
-\newlength{\admonitionwidth}
-\setlength{\admonitionwidth}{0.9\textwidth}
-\newlength{\docinfowidth}
-\setlength{\docinfowidth}{0.9\textwidth}
-\newlength{\locallinewidth}
-\newcommand{\optionlistlabel}[1]{\bf #1 \hfill}
-\newenvironment{optionlist}[1]
-{\begin{list}{}
-  {\setlength{\labelwidth}{#1}
-   \setlength{\rightmargin}{1cm}
-   \setlength{\leftmargin}{\rightmargin}
-   \addtolength{\leftmargin}{\labelwidth}
-   \addtolength{\leftmargin}{\labelsep}
-   \renewcommand{\makelabel}{\optionlistlabel}}
-}{\end{list}}
-\newlength{\lineblockindentation}
-\setlength{\lineblockindentation}{2.5em}
-\newenvironment{lineblock}[1]
-{\begin{list}{}
-  {\setlength{\partopsep}{\parskip}
-   \addtolength{\partopsep}{\baselineskip}
-   \topsep0pt\itemsep0.15\baselineskip\parsep0pt
-   \leftmargin#1}
- \raggedright}
-{\end{list}}
-% begin: floats for footnotes tweaking.
-\setlength{\floatsep}{0.5em}
-\setlength{\textfloatsep}{\fill}
-\addtolength{\textfloatsep}{3em}
-\renewcommand{\textfraction}{0.5}
-\renewcommand{\topfraction}{0.5}
-\renewcommand{\bottomfraction}{0.5}
-\setcounter{totalnumber}{50}
-\setcounter{topnumber}{50}
-\setcounter{bottomnumber}{50}
-% end floats for footnotes
-% some commands, that could be overwritten in the style file.
-\newcommand{\rubric}[1]{\subsection*{~\hfill {\it #1} \hfill ~}}
-\newcommand{\titlereference}[1]{\textsl{#1}}
-% end of "some commands"
-\ifthenelse{\isundefined{\hypersetup}}{
-\usepackage[colorlinks=true,linkcolor=blue,urlcolor=blue]{hyperref}
-}{}
-\title{Traffic Forecasting\\
-\large{Matteo Bertini, Marco Lippi}
-}
-\author{}
-\date{}
-\hypersetup{
-pdftitle={Traffic Forecasting}
-}
-\raggedbottom
-\begin{document}
-\maketitle
-
-\setlength{\locallinewidth}{\linewidth}
-
-\textbf{Abstract}: This article presents a comparative benchmark between various traffic flow forecasting methods. The presented method are applied to both traffic data from high volume California highways and medium volume Italian highways highlighting common characteristics and differences.
-
-
-%___________________________________________________________________________
-
-\hypertarget{introduction}{}
-\pdfbookmark[0]{Introduction}{introduction}
-\section*{Introduction}
-\label{introduction}
-
-In the last century the focus in transportation has shifted from construction of the infrastructure where it was missing to the development of the infrastructure guided by a good operational efficiency. An infrastructure developed with this new goal is commonly referred as Intelligent Transport Systems (ITSs).
-
-
-%___________________________________________________________________________
-
-\hypertarget{performance-metrics}{}
-\pdfbookmark[1]{Performance metrics}{performance-metrics}
-\subsection*{Performance metrics}
-\label{performance-metrics}
-
-The operational efficiency of a road infrastructure can be observed throughout two main quantities:
-\begin{itemize}
-\item {} 
-\emph{Vehicle Miles Travelled} (VMT)
-
-\item {} 
-\emph{Vehicle Hours Travelled} (VHT)
-
-\item {} 
-\emph{Travel Time variability}
-
-\end{itemize}
-
-A good transport system presents a hight VMT to VHT ratio and a good predictability of travel time.
-
-
-%___________________________________________________________________________
-
-\hypertarget{short-term-prediction-problem}{}
-\pdfbookmark[0]{Short-Term prediction problem}{short-term-prediction-problem}
-\section*{Short-Term prediction problem}
-\label{short-term-prediction-problem}
-
-A common task in traffic forecasts if a short-term prediction. Given a time series of samples make a forecast \emph{k} steps in the future.
-Let $V_t$ be a discrete time series of vehicular traffic flow rates at a specific detection station. The univariate short-term traffic flow prediction problem is:
-\begin{quote}
-
-$\hat{V}_{t+k} = f(V_t, V_{t-1}, V_{t-2}, ...), \hspace{5ex} k = 1,2,3,...$
-\end{quote}
-
-where $\hat{V}_{t+k}$ is the prediction of $V_{t+k}$ computed at time $t$.
-
-
-%___________________________________________________________________________
-
-\hypertarget{data-sources}{}
-\pdfbookmark[1]{Data sources}{data-sources}
-\subsection*{Data sources}
-\label{data-sources}
-
-Fixed location roadway detection systems usually provide three basic traffic stream measurements: flow, speed and occupancy.
-Usually two of these quantities are measured and the third is computed.
-
-A common source of information are magnetic loops, in this situation flow and occupancy are measured, speed can be computed throughout the g-factor \cite{gfactor01} (estimate the unknown length of the vehicles). The g-factor can be estimated using other sources like double loops or laser speed sensors.
-
-In toll highways the identity of the traveling vehicle is known, so it's possible to use exact counts and travel-times and estimate the occupancy from the ticket vehicle-class.
-
-
-%___________________________________________________________________________
-
-\hypertarget{forecasting-methods}{}
-\pdfbookmark[0]{Forecasting methods}{forecasting-methods}
-\section*{Forecasting methods}
-\label{forecasting-methods}
-
-
-%___________________________________________________________________________
-
-\hypertarget{heuristics}{}
-\pdfbookmark[1]{Heuristics}{heuristics}
-\subsection*{Heuristics}
-\label{heuristics}
-
-To make a reasonable benchmark we start presenting some easy to understand heuristic techniques requiring minimal configuration that will be the baseline performance for our predictors.
-
-
-%___________________________________________________________________________
-
-\hypertarget{random-walk}{}
-\pdfbookmark[2]{Random Walk}{random-walk}
-\subsubsection*{Random Walk}
-\label{random-walk}
-
-If we consider the traffic condition data stream a 2D random walk, then the best forecast for the next observation is simply the most recent observation $\hat{V}_{t+1} = V_t$. This is not the case of a real traffic flow, but changes in a small time lapse are often small and this approach can give a reasonable prediction.
-
-
-%___________________________________________________________________________
-
-\hypertarget{historical-average}{}
-\pdfbookmark[2]{Historical Average}{historical-average}
-\subsubsection*{Historical Average}
-\label{historical-average}
-
-Considering the daily and weekly periodic patterns exhibited by the traffic condition data stream, another reasonable heuristic can be an historical average with exponential smoothing.
-\begin{quote}
-
-$\hat{V}_{t+T} = \alpha V_t + (1-\alpha) \hat{V}_{t}$
-\end{quote}
-
-If the parameter $\alpha$ is estimated from the data the results will be similar to an ARIMA(0,1,1) model, so we use an expert estimated parameter $\alpha \approx 0.2$ (like in \cite{williams:664}).
-\begin{center}\begin{sffamily}
-\fbox{\parbox{\admonitionwidth}{
-\textbf{\large Note}
-\vspace{2mm}
-
-Questa euristica � un po' inutile, � meno potente di un ARIMA e arbitrariamente Williams decide di non ottimizzare $\alpha$ sui dati...
-}}
-\end{sffamily}
-\end{center}
-
-
-%___________________________________________________________________________
-
-\hypertarget{deviation-from-historical-average}{}
-\pdfbookmark[2]{Deviation from Historical Average}{deviation-from-historical-average}
-\subsubsection*{Deviation from Historical Average}
-\label{deviation-from-historical-average}
-
-This last method combines the previous two. The smoothed historical average is computed by
-\begin{quote}
-
-$S_{t} = \alpha V_t + (1-\alpha) S_{t-T}$
-\end{quote}
-
-and the one-step prediction is
-\begin{quote}
-
-$\hat{V}_{t+1} = \frac{V_t}{S_t} \cdot S_{t-T}$
-\end{quote}
-
-
-%___________________________________________________________________________
-
-\hypertarget{seasonal-arima-process}{}
-\pdfbookmark[1]{Seasonal ARIMA Process}{seasonal-arima-process}
-\subsection*{Seasonal ARIMA Process}
-\label{seasonal-arima-process}
-
-Williams in \cite{williams:664} shows that is reasonable to model an univariate traffic condition data stream as a seasonal autoregressive integrated moving average model.
-\begin{quote}
-
-$ARIMA (p,d,q) (P,D,Q)_s$
-\end{quote}
-
-The relative goodness of fit of the models in the estimation period can be examined using either the Akaike Information Criterion (AIC) or the Bayesian Information Criterion (BIC). The two selection rules may be calculated as:
-\begin{quote}
-
-$AIC = -2 \log(L) + 2m$
-\end{quote}
-
-where $L$ is the likelihood and $m$ is the number of parameters of the model.
-\begin{quote}
-
-$BIC = -2 \log(L) + m \log(n)$
-\end{quote}
-
-where $n$ is the number of observation in the series.
-
-Under the assumption that the model errors or disturbances are normally distributed, the likelihood becomes
-\begin{quote}
-
-$-2 \log(L) \approx n(1+\log(2\pi)) + n \log(s^2)$
-\end{quote}
-
-where $s^2$ is the variance of the residuals after fitting the model.
-
-
-%___________________________________________________________________________
-
-\hypertarget{holidays-effect}{}
-\pdfbookmark[2]{Holidays effect}{holidays-effect}
-\subsubsection*{Holidays effect}
-\label{holidays-effect}
-
-A day-of-week correlation plot shows an high cross-correlation between workdays (Lun-Fri) and a low cross-correlation between workdays and weekends.
-Given this we decided to split the dataset in workdays an holidays and consider only the daily period.
-
-
-%___________________________________________________________________________
-
-\hypertarget{in-sample-and-off-sample}{}
-\pdfbookmark[2]{In-sample and off-sample}{in-sample-and-off-sample}
-\subsubsection*{In-sample and off-sample}
-\label{in-sample-and-off-sample}
-
-Using these criterion is possible to fit the model both in-sample and off-sample.
-The same is true in the forecasting phase, is possible to make a one-step forecast (using known past) or a recursive forecast (using estimations).
-
-
-%___________________________________________________________________________
-
-\hypertarget{svr-regression}{}
-\pdfbookmark[1]{SVR Regression}{svr-regression}
-\subsection*{SVR Regression}
-\label{svr-regression}
-
-SVR regression is a classical regression model in machine learning. The method is commonly used off-sample to avoid overfitting.
-The kernel used in test is the RBF kernel.
-The prediction is usually done with a one-step forecast.
-
-
-%___________________________________________________________________________
-
-\hypertarget{time-of-day-kernel}{}
-\pdfbookmark[2]{Time-of-day kernel}{time-of-day-kernel}
-\subsubsection*{Time-of-day kernel}
-\label{time-of-day-kernel}
-
-A first variation on the theme is to add a periodic time-of-day feature. This can be viewed as an exogenous input that guide the traffic flow evolution during the day.
-
-
-%___________________________________________________________________________
-
-\hypertarget{relational-features}{}
-\pdfbookmark[2]{Relational features}{relational-features}
-\subsubsection*{Relational features}
-\label{relational-features}
-
-Other experiments have been done using the relational nature of the traffic flow. There is a relation between what is happening in a particular location A and what is happening in another near location B in a near time (see velocity fields in \cite{948660} for an intuitive view of the relation).
-A simple idea is to use as features for node A the data flow of the node B too.
-
-
-%___________________________________________________________________________
-
-\hypertarget{learning-curve}{}
-\pdfbookmark[2]{Learning curve}{learning-curve}
-\subsubsection*{Learning curve}
-\label{learning-curve}
-
-The traffic data stream is not strictly neither weakly stationary but the drift is not too fast. We can find an optimal dataset size that gives the model enough information without invalidating too much the stationarity hypothesis.
-
-
-%___________________________________________________________________________
-
-\hypertarget{predictive-performances}{}
-\pdfbookmark[0]{Predictive Performances}{predictive-performances}
-\section*{Predictive Performances}
-\label{predictive-performances}
-
-Besides standard error functions like RMSE and MAD, MAPE is error function that fits well with traffic flow.
-
-$\mbox{MAPE} = \frac{1}{n}\sum_{t=1}^n \left| \frac{A_t-F_t}{A_t}\right|$
-
-A commonly used variant of the plain formula is to exclude from the computation all the samples where $A_t \le K$. This avoids a possible division by zero and considering the traffic flow, excludes from the error computation all the samples where the flow is below the given threshold.
-
-\bibliographystyle{plain}
-\bibliography{tf}
-
-\end{document}

old/doc/Traffic-Forecast.aux

-\relax 
-\ifx\hyper@anchor\@undefined
-\global \let \oldcontentsline\contentsline
-\gdef \contentsline#1#2#3#4{\oldcontentsline{#1}{#2}{#3}}
-\global \let \oldnewlabel\newlabel
-\gdef \newlabel#1#2{\newlabelxx{#1}#2}
-\gdef \newlabelxx#1#2#3#4#5#6{\oldnewlabel{#1}{{#2}{#3}}}
-\AtEndDocument{\let \contentsline\oldcontentsline
-\let \newlabel\oldnewlabel}
-\else
-\global \let \hyper@last\relax 
-\fi
-
-\citation{gfactor01}
-\select@language{english}
-\@writefile{toc}{\select@language{english}}
-\@writefile{lof}{\select@language{english}}
-\@writefile{lot}{\select@language{english}}
-\newlabel{introduction}{{}{1}{Introduction\relax }{section*.1}{}}
-\newlabel{performance-metrics}{{}{1}{Performance metrics\relax }{section*.2}{}}
-\newlabel{short-term-prediction-problem}{{}{1}{Short-Term prediction problem\relax }{section*.3}{}}
-\newlabel{data-sources}{{}{1}{Data sources\relax }{section*.4}{}}
-\citation{williams:664}
-\citation{williams:664}
-\newlabel{forecasting-methods}{{}{2}{Forecasting methods\relax }{section*.5}{}}
-\newlabel{heuristics}{{}{2}{Heuristics\relax }{section*.6}{}}
-\newlabel{random-walk}{{}{2}{Random Walk\relax }{section*.7}{}}
-\newlabel{historical-average}{{}{2}{Historical Average\relax }{section*.8}{}}
-\newlabel{deviation-from-historical-average}{{}{2}{Deviation from Historical Average\relax }{section*.9}{}}
-\newlabel{seasonal-arima-process}{{}{2}{Seasonal ARIMA Process\relax }{section*.10}{}}
-\citation{948660}
-\bibstyle{plain}
-\bibdata{tf}
-\bibcite{gfactor01}{1}
-\bibcite{948660}{2}
-\bibcite{williams:664}{3}
-\newlabel{holidays-effect}{{}{3}{Holidays effect\relax }{section*.11}{}}
-\newlabel{in-sample-and-off-sample}{{}{3}{In-sample and off-sample\relax }{section*.12}{}}
-\newlabel{svr-regression}{{}{3}{SVR Regression\relax }{section*.13}{}}
-\newlabel{time-of-day-kernel}{{}{3}{Time-of-day kernel\relax }{section*.14}{}}
-\newlabel{relational-features}{{}{3}{Relational features\relax }{section*.15}{}}
-\newlabel{learning-curve}{{}{3}{Learning curve\relax }{section*.16}{}}
-\newlabel{predictive-performances}{{}{3}{Predictive Performances\relax }{section*.17}{}}

old/doc/Traffic-Forecast.bbl

-\begin{thebibliography}{1}
-
-\bibitem{gfactor01}
-Z.~Jia, C.~Chen, B.~Coifman, and P.~Varaiya.
-\newblock The pems algorithms for accurate, real-time estimates of g-factors
-  and speeds from single-loop detectors.
-\newblock {\em IEEE 4th International ITS Conference}, February 2001.
-
-\bibitem{948660}
-J.~Rice and E.~van Zwet.
-\newblock A simple and effective method for predicting travel times on
-  freeways.
-\newblock In {\em Intelligent Transportation Systems, 2001. Proceedings. 2001
-  IEEE}, pages 227--232, 2001.
-
-\bibitem{williams:664}
-Billy~M. Williams and Lester~A. Hoel.
-\newblock Modeling and forecasting vehicular traffic flow as a seasonal arima
-  process: Theoretical basis and empirical results.
-\newblock {\em Journal of Transportation Engineering}, 129(6):664--672, 2003.
-
-\end{thebibliography}

old/doc/Traffic-Forecast.blg

-This is BibTeX, Version 0.99c (Web2C 7.5.6)
-The top-level auxiliary file: Traffic-Forecast.aux
-The style file: plain.bst
-Database file #1: tf.bib
-You've used 3 entries,
-            2118 wiz_defined-function locations,
-            516 strings with 4598 characters,
-and the built_in function-call counts, 1092 in all, are:
-= -- 103
-> -- 49
-< -- 1
-+ -- 21
-- -- 16
-* -- 73
-:= -- 180
-add.period$ -- 9
-call.type$ -- 3
-change.case$ -- 17
-chr.to.int$ -- 0
-cite$ -- 3
-duplicate$ -- 44
-empty$ -- 86
-format.name$ -- 16
-if$ -- 233
-int.to.chr$ -- 0
-int.to.str$ -- 3
-missing$ -- 3
-newline$ -- 18
-num.names$ -- 6
-pop$ -- 18
-preamble$ -- 1
-purify$ -- 14
-quote$ -- 0
-skip$ -- 36
-stack$ -- 0
-substring$ -- 66
-swap$ -- 13
-text.length$ -- 1
-text.prefix$ -- 0
-top$ -- 0
-type$ -- 12
-warning$ -- 0
-while$ -- 9
-width$ -- 4
-write$ -- 34

old/doc/Traffic-Forecast.out

-\BOOKMARK [0][-]{introduction.0}{Introduction}{}
-\BOOKMARK [1][-]{performance-metrics.1}{Performance metrics}{introduction.0}
-\BOOKMARK [0][-]{short-term-prediction-problem.0}{Short-Term prediction problem}{}
-\BOOKMARK [1][-]{data-sources.1}{Data sources}{short-term-prediction-problem.0}
-\BOOKMARK [0][-]{forecasting-methods.0}{Forecasting methods}{}
-\BOOKMARK [1][-]{heuristics.1}{Heuristics}{forecasting-methods.0}
-\BOOKMARK [2][-]{random-walk.2}{Random Walk}{heuristics.1}
-\BOOKMARK [2][-]{historical-average.2}{Historical Average}{heuristics.1}
-\BOOKMARK [2][-]{deviation-from-historical-average.2}{Deviation from Historical Average}{heuristics.1}
-\BOOKMARK [1][-]{seasonal-arima-process.1}{Seasonal ARIMA Process}{forecasting-methods.0}
-\BOOKMARK [2][-]{holidays-effect.2}{Holidays effect}{seasonal-arima-process.1}
-\BOOKMARK [2][-]{in-sample-and-off-sample.2}{In-sample and off-sample}{seasonal-arima-process.1}
-\BOOKMARK [1][-]{svr-regression.1}{SVR Regression}{forecasting-methods.0}
-\BOOKMARK [2][-]{time-of-day-kernel.2}{Time-of-day kernel}{svr-regression.1}
-\BOOKMARK [2][-]{relational-features.2}{Relational features}{svr-regression.1}
-\BOOKMARK [2][-]{learning-curve.2}{Learning curve}{svr-regression.1}
-\BOOKMARK [0][-]{predictive-performances.0}{Predictive Performances}{}

old/doc/Traffic-Forecast.pdf

Binary file removed.

old/doc/Traffic-Forecast.tex

-\documentclass[10pt,a4paper,english]{article}
-\usepackage{babel}
-\usepackage{ae}
-\usepackage{aeguill}
-\usepackage{shortvrb}
-\usepackage[latin1]{inputenc}
-\usepackage{tabularx}
-\usepackage{longtable}
-\setlength{\extrarowheight}{2pt}
-\usepackage{amsmath}
-\usepackage{graphicx}
-\usepackage{color}
-\usepackage{multirow}
-\usepackage{ifthen}
-\usepackage[DIV12]{typearea}
-% generated by Docutils <http://docutils.sourceforge.net/>
-\newlength{\admonitionwidth}
-\setlength{\admonitionwidth}{0.9\textwidth}
-\newlength{\docinfowidth}
-\setlength{\docinfowidth}{0.9\textwidth}
-\newlength{\locallinewidth}
-\newcommand{\optionlistlabel}[1]{\bf #1 \hfill}
-\newenvironment{optionlist}[1]
-{\begin{list}{}
-  {\setlength{\labelwidth}{#1}
-   \setlength{\rightmargin}{1cm}
-   \setlength{\leftmargin}{\rightmargin}
-   \addtolength{\leftmargin}{\labelwidth}
-   \addtolength{\leftmargin}{\labelsep}
-   \renewcommand{\makelabel}{\optionlistlabel}}
-}{\end{list}}
-\newlength{\lineblockindentation}
-\setlength{\lineblockindentation}{2.5em}
-\newenvironment{lineblock}[1]
-{\begin{list}{}
-  {\setlength{\partopsep}{\parskip}
-   \addtolength{\partopsep}{\baselineskip}
-   \topsep0pt\itemsep0.15\baselineskip\parsep0pt
-   \leftmargin#1}
- \raggedright}
-{\end{list}}
-% begin: floats for footnotes tweaking.
-\setlength{\floatsep}{0.5em}
-\setlength{\textfloatsep}{\fill}
-\addtolength{\textfloatsep}{3em}
-\renewcommand{\textfraction}{0.5}
-\renewcommand{\topfraction}{0.5}
-\renewcommand{\bottomfraction}{0.5}
-\setcounter{totalnumber}{50}
-\setcounter{topnumber}{50}
-\setcounter{bottomnumber}{50}
-% end floats for footnotes
-% some commands, that could be overwritten in the style file.
-\newcommand{\rubric}[1]{\subsection*{~\hfill {\it #1} \hfill ~}}
-\newcommand{\titlereference}[1]{\textsl{#1}}
-% end of "some commands"
-\ifthenelse{\isundefined{\hypersetup}}{
-\usepackage[colorlinks=true,linkcolor=blue,urlcolor=blue]{hyperref}
-}{}
-\title{Traffic Forecasting\\
-\large{Matteo Bertini, Marco Lippi}
-}
-\author{}
-\date{}
-\hypersetup{
-pdftitle={Traffic Forecasting}
-}
-\raggedbottom
-\begin{document}
-\maketitle
-
-\setlength{\locallinewidth}{\linewidth}
-
-\textbf{Abstract}: This article presents a comparative benchmark between various traffic flow forecasting methods. The presented method are applied to both traffic data from high volume California highways and medium volume Italian highways highlighting common characteristics and differences.
-
-
-%___________________________________________________________________________
-
-\hypertarget{introduction}{}
-\pdfbookmark[0]{Introduction}{introduction}
-\section*{Introduction}
-\label{introduction}
-
-In the last century the focus in transportation has shifted from construction of the infrastructure where it was missing to the development of the infrastructure guided by a good operational efficiency. An infrastructure developed with this new goal is commonly referred as Intelligent Transport Systems (ITSs).
-
-
-%___________________________________________________________________________
-
-\hypertarget{performance-metrics}{}
-\pdfbookmark[1]{Performance metrics}{performance-metrics}
-\subsection*{Performance metrics}
-\label{performance-metrics}
-
-The operational efficiency of a road infrastructure can be observed throughout two main quantities:
-\begin{itemize}
-\item {} 
-\emph{Vehicle Miles Travelled} (VMT)
-
-\item {} 
-\emph{Vehicle Hours Travelled} (VHT)
-
-\item {} 
-\emph{Travel Time variability}
-
-\end{itemize}
-
-A good transport system presents a hight VMT to VHT ratio and a good predictability of travel time.
-
-
-%___________________________________________________________________________
-
-\hypertarget{short-term-prediction-problem}{}
-\pdfbookmark[0]{Short-Term prediction problem}{short-term-prediction-problem}
-\section*{Short-Term prediction problem}
-\label{short-term-prediction-problem}
-
-A common task in traffic forecasts if a short-term prediction. Given a time series of samples make a forecast \emph{k} steps in the future.
-Let $V_t$ be a discrete time series of vehicular traffic flow rates at a specific detection station. The univariate short-term traffic flow prediction problem is:
-\begin{quote}
-
-$\hat{V}_{t+k} = f(V_t, V_{t-1}, V_{t-2}, ...), \hspace{5ex} k = 1,2,3,...$
-\end{quote}
-
-where $\hat{V}_{t+k}$ is the prediction of $V_{t+k}$ computed at time $t$.
-
-
-%___________________________________________________________________________
-
-\hypertarget{data-sources}{}
-\pdfbookmark[1]{Data sources}{data-sources}
-\subsection*{Data sources}
-\label{data-sources}
-
-Fixed location roadway detection systems usually provide three basic traffic stream measurements: flow, speed and occupancy.
-Usually two of these quantities are measured and the third is computed.
-
-A common source of information are magnetic loops, in this situation flow and occupancy are measured, speed can be computed throughout the g-factor \cite{gfactor01} (estimate the unknown length of the vehicles). The g-factor can be estimated using other sources like double loops or laser speed sensors.
-
-In toll highways the identity of the traveling vehicle is known, so it's possible to use exact counts and travel-times and estimate the occupancy from the ticket vehicle-class.
-
-
-%___________________________________________________________________________
-
-\hypertarget{forecasting-methods}{}
-\pdfbookmark[0]{Forecasting methods}{forecasting-methods}
-\section*{Forecasting methods}
-\label{forecasting-methods}
-
-
-%___________________________________________________________________________
-
-\hypertarget{heuristics}{}
-\pdfbookmark[1]{Heuristics}{heuristics}
-\subsection*{Heuristics}
-\label{heuristics}
-
-To make a reasonable benchmark we start presenting some easy to understand heuristic techniques requiring minimal configuration that will be the baseline performance for our predictors.
-
-
-%___________________________________________________________________________
-
-\hypertarget{random-walk}{}
-\pdfbookmark[2]{Random Walk}{random-walk}
-\subsubsection*{Random Walk}
-\label{random-walk}
-
-If we consider the traffic condition data stream a 2D random walk, then the best forecast for the next observation is simply the most recent observation $\hat{V}_{t+1} = V_t$. This is not the case of a real traffic flow, but changes in a small time lapse are often small and this approach can give a reasonable prediction.
-
-
-%___________________________________________________________________________
-
-\hypertarget{historical-average}{}
-\pdfbookmark[2]{Historical Average}{historical-average}
-\subsubsection*{Historical Average}
-\label{historical-average}
-
-Considering the daily and weekly periodic patterns exhibited by the traffic condition data stream, another reasonable heuristic can be an historical average with exponential smoothing.
-\begin{quote}
-
-$\hat{V}_{t+T} = \alpha V_t + (1-\alpha) \hat{V}_{t}$
-\end{quote}
-
-If the parameter $\alpha$ is estimated from the data the results will be similar to an ARIMA(0,1,1) model, so we use an expert estimated parameter $\alpha \approx 0.2$ (like in \cite{williams:664}).
-\begin{center}\begin{sffamily}
-\fbox{\parbox{\admonitionwidth}{
-\textbf{\large Note}
-\vspace{2mm}
-
-Questa euristica � un po' inutile, � meno potente di un ARIMA e arbitrariamente Williams decide di non ottimizzare $\alpha$ sui dati...
-}}
-\end{sffamily}
-\end{center}
-
-
-%___________________________________________________________________________
-
-\hypertarget{deviation-from-historical-average}{}
-\pdfbookmark[2]{Deviation from Historical Average}{deviation-from-historical-average}
-\subsubsection*{Deviation from Historical Average}
-\label{deviation-from-historical-average}
-
-This last method combines the previous two. The smoothed historical average is computed by
-\begin{quote}
-
-$S_{t} = \alpha V_t + (1-\alpha) S_{t-T}$
-\end{quote}
-
-and the one-step prediction is
-\begin{quote}
-
-$\hat{V}_{t+1} = \frac{V_t}{S_t} \cdot S_{t-T}$
-\end{quote}
-
-
-%___________________________________________________________________________
-
-\hypertarget{seasonal-arima-process}{}
-\pdfbookmark[1]{Seasonal ARIMA Process}{seasonal-arima-process}
-\subsection*{Seasonal ARIMA Process}
-\label{seasonal-arima-process}
-
-Williams in \cite{williams:664} shows that is reasonable to model an univariate traffic condition data stream as a seasonal autoregressive integrated moving average model.
-\begin{quote}
-
-$ARIMA (p,d,q) (P,D,Q)_s$
-\end{quote}
-
-The relative goodness of fit of the models in the estimation period can be examined using either the Akaike Information Criterion (AIC) or the Bayesian Information Criterion (BIC). The two selection rules may be calculated as:
-\begin{quote}
-
-$AIC = -2 \log(L) + 2m$
-\end{quote}
-
-where $L$ is the likelihood and $m$ is the number of parameters of the model.
-\begin{quote}
-
-$BIC = -2 \log(L) + m \log(n)$
-\end{quote}
-
-where $n$ is the number of observation in the series.
-
-Under the assumption that the model errors or disturbances are normally distributed, the likelihood becomes
-\begin{quote}
-
-$-2 \log(L) \approx n(1+\log(2\pi)) + n \log(s^2)$
-\end{quote}
-
-where $s^2$ is the variance of the residuals after fitting the model.
-
-
-%___________________________________________________________________________
-
-\hypertarget{holidays-effect}{}
-\pdfbookmark[2]{Holidays effect}{holidays-effect}
-\subsubsection*{Holidays effect}
-\label{holidays-effect}
-
-A day-of-week correlation plot shows an high cross-correlation between workdays (Lun-Fri) and a low cross-correlation between workdays and weekends.
-Given this we decided to split the dataset in workdays an holidays and consider only the daily period.
-
-
-%___________________________________________________________________________
-
-\hypertarget{in-sample-and-off-sample}{}
-\pdfbookmark[2]{In-sample and off-sample}{in-sample-and-off-sample}
-\subsubsection*{In-sample and off-sample}
-\label{in-sample-and-off-sample}
-
-Using these criterion is possible to fit the model both in-sample and off-sample.
-The same is true in the forecasting phase, is possible to make a one-step forecast (using known past) or a recursive forecast (using estimations).
-
-
-%___________________________________________________________________________
-
-\hypertarget{svr-regression}{}
-\pdfbookmark[1]{SVR Regression}{svr-regression}
-\subsection*{SVR Regression}
-\label{svr-regression}
-
-SVR regression is a classical regression model in machine learning. The method is commonly used off-sample to avoid overfitting.
-The kernel used in test is the RBF kernel.
-The prediction is usually done with a one-step forecast.
-
-
-%___________________________________________________________________________
-
-\hypertarget{time-of-day-kernel}{}
-\pdfbookmark[2]{Time-of-day kernel}{time-of-day-kernel}
-\subsubsection*{Time-of-day kernel}
-\label{time-of-day-kernel}
-
-A first variation on the theme is to add a periodic time-of-day feature. This can be viewed as an exogenous input that guide the traffic flow evolution during the day.
-
-
-%___________________________________________________________________________
-
-\hypertarget{relational-features}{}
-\pdfbookmark[2]{Relational features}{relational-features}
-\subsubsection*{Relational features}
-\label{relational-features}
-
-Other experiments have been done using the relational nature of the traffic flow. There is a relation between what is happening in a particular location A and what is happening in another near location B in a near time (see velocity fields in \cite{948660} for an intuitive view of the relation).
-A simple idea is to use as features for node A the data flow of the node B too.
-
-
-%___________________________________________________________________________
-
-\hypertarget{learning-curve}{}
-\pdfbookmark[2]{Learning curve}{learning-curve}
-\subsubsection*{Learning curve}
-\label{learning-curve}
-
-The traffic data stream is not strictly neither weakly stationary but the drift is not too fast. We can find an optimal dataset size that gives the model enough information without invalidating too much the stationarity hypothesis.
-
-
-%___________________________________________________________________________
-
-\hypertarget{predictive-performances}{}
-\pdfbookmark[0]{Predictive Performances}{predictive-performances}
-\section*{Predictive Performances}
-\label{predictive-performances}
-
-Besides standard error functions like RMSE and MAD, MAPE is error function that fits well with traffic flow.
-
-$\mbox{MAPE} = \frac{1}{n}\sum_{t=1}^n \left| \frac{A_t-F_t}{A_t}\right|$
-
-A commonly used variant of the plain formula is to exclude from the computation all the samples where $A_t \le K$. This avoids a possible division by zero and considering the traffic flow, excludes from the error computation all the samples where the flow is below the given threshold.
-
-\bibliographystyle{plain}
-\bibliography{tf}
-
-\end{document}

old/doc/Traffic-Forecast.txt

-
-.. role:: raw-math(raw)
-  :format: latex html
-
-.. default-role:: raw-math
-
-===================
-Traffic Forecasting
-===================
-
----------------------------
-Matteo Bertini, Marco Lippi
----------------------------
-
-**Abstract**: This article presents a comparative benchmark between various traffic flow forecasting methods. The presented method are applied to both traffic data from high volume California highways and medium volume Italian highways highlighting common characteristics and differences.
-
-
-Introduction
-============
-
-In the last century the focus in transportation has shifted from construction of the infrastructure where it was missing to the development of the infrastructure guided by a good operational efficiency. An infrastructure developed with this new goal is commonly referred as Intelligent Transport Systems (ITSs).
-
-
-Performance metrics
--------------------
-
-The operational efficiency of a road infrastructure can be observed throughout two main quantities:
-
-- *Vehicle Miles Travelled* (VMT)
-- *Vehicle Hours Travelled* (VHT)
-- *Travel Time variability*
-
-A good transport system presents a hight VMT to VHT ratio and a good predictability of travel time.
-
-
-Short-Term prediction problem
-=============================
-
-A common task in traffic forecasts if a short-term prediction. Given a time series of samples make a forecast *k* steps in the future.
-Let `$V_t$` be a discrete time series of vehicular traffic flow rates at a specific detection station. The univariate short-term traffic flow prediction problem is:
-
-  `$\hat{V}_{t+k} = f(V_t, V_{t-1}, V_{t-2}, ...), \hspace{5ex} k = 1,2,3,...$`
-
-where `$\hat{V}_{t+k}$` is the prediction of `$V_{t+k}$` computed at time `$t$`.
-
-
-Data sources
-------------
-
-
-Fixed location roadway detection systems usually provide three basic traffic stream measurements: flow, speed and occupancy.
-Usually two of these quantities are measured and the third is computed.
-
-A common source of information are magnetic loops, in this situation flow and occupancy are measured, speed can be computed throughout the g-factor `\cite{gfactor01}` (estimate the unknown length of the vehicles). The g-factor can be estimated using other sources like double loops or laser speed sensors.
-
-In toll highways the identity of the traveling vehicle is known, so it's possible to use exact counts and travel-times and estimate the occupancy from the ticket vehicle-class.
-
-Forecasting methods
-===================
-
-Heuristics
-----------
-
-To make a reasonable benchmark we start presenting some easy to understand heuristic techniques requiring minimal configuration that will be the baseline performance for our predictors.
-
-Random Walk
-~~~~~~~~~~~
-
-If we consider the traffic condition data stream a 2D random walk, then the best forecast for the next observation is simply the most recent observation `$\hat{V}_{t+1} = V_t$`. This is not the case of a real traffic flow, but changes in a small time lapse are often small and this approach can give a reasonable prediction.
-
-Historical Average
-~~~~~~~~~~~~~~~~~~
-
-Considering the daily and weekly periodic patterns exhibited by the traffic condition data stream, another reasonable heuristic can be an historical average with exponential smoothing.
-
-  `$\hat{V}_{t+T} = \alpha V_t + (1-\alpha) \hat{V}_{t}$`
-
-If the parameter `$\alpha$` is estimated from the data the results will be similar to an ARIMA(0,1,1) model, so we use an expert estimated parameter `$\alpha \approx 0.2$` (like in `\cite{williams:664}`).
-
-.. note::
-  Questa euristica è un po' inutile, è meno potente di un ARIMA e arbitrariamente Williams decide di non ottimizzare `$\alpha$` sui dati...
-
-Deviation from Historical Average
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-This last method combines the previous two. The smoothed historical average is computed by
-
-  `$S_{t} = \alpha V_t + (1-\alpha) S_{t-T}$`
-
-and the one-step prediction is
-
-  `$\hat{V}_{t+1} = \frac{V_t}{S_t} \cdot S_{t-T}$`
-
-Seasonal ARIMA Process
-----------------------
-
-Williams in `\cite{williams:664}` shows that is reasonable to model an univariate traffic condition data stream as a seasonal autoregressive integrated moving average model.
-
-  `$ARIMA (p,d,q) (P,D,Q)_s$`
-
-The relative goodness of fit of the models in the estimation period can be examined using either the Akaike Information Criterion (AIC) or the Bayesian Information Criterion (BIC). The two selection rules may be calculated as:
-
-  `$AIC = -2 \log(L) + 2m$`
-
-where `$L$` is the likelihood and `$m$` is the number of parameters of the model.
-
-  `$BIC = -2 \log(L) + m \log(n)$`
-
-where `$n$` is the number of observation in the series.
-
-Under the assumption that the model errors or disturbances are normally distributed, the likelihood becomes 
-
-  `$-2 \log(L) \approx n(1+\log(2\pi)) + n \log(s^2)$`
-
-where `$s^2$` is the variance of the residuals after fitting the model.
-
-Holidays effect
-~~~~~~~~~~~~~~~
-
-A day-of-week correlation plot shows an high cross-correlation between workdays (Lun-Fri) and a low cross-correlation between workdays and weekends.
-Given this we decided to split the dataset in workdays an holidays and consider only the daily period.
-
-In-sample and off-sample
-~~~~~~~~~~~~~~~~~~~~~~~~
-
-Using these criterion is possible to fit the model both in-sample and off-sample.
-The same is true in the forecasting phase, is possible to make a one-step forecast (using known past) or a recursive forecast (using estimations).
-
-SVR Regression
---------------
-
-SVR regression is a classical regression model in machine learning. The method is commonly used off-sample to avoid overfitting.
-The kernel used in test is the RBF kernel.
-The prediction is usually done with a one-step forecast.
-
-Time-of-day kernel
-~~~~~~~~~~~~~~~~~~
-
-A first variation on the theme is to add a periodic time-of-day feature. This can be viewed as an exogenous input that guide the traffic flow evolution during the day.
-
-Relational features
-~~~~~~~~~~~~~~~~~~~
-
-Other experiments have been done using the relational nature of the traffic flow. There is a relation between what is happening in a particular location A and what is happening in another near location B in a near time (see velocity fields in `\cite{948660}` for an intuitive view of the relation).
-A simple idea is to use as features for node A the data flow of the node B too.
-
-Learning curve
-~~~~~~~~~~~~~~
-
-The traffic data stream is not strictly neither weakly stationary but the drift is not too fast. We can find an optimal dataset size that gives the model enough information without invalidating too much the stationarity hypothesis.
-
-Predictive Performances
-=======================
-
-Besides standard error functions like RMSE and MAD, MAPE is error function that fits well with traffic flow.
-
-`$\mbox{MAPE} = \frac{1}{n}\sum_{t=1}^n \left| \frac{A_t-F_t}{A_t}\right|$`
-
-A commonly used variant of the plain formula is to exclude from the computation all the samples where `$A_t \le K$`. This avoids a possible division by zero and considering the traffic flow, excludes from the error computation all the samples where the flow is below the given threshold.
-
-
-`\bibliographystyle{plain}`
-`\bibliography{tf}`
-

old/doc/gdoc.py

-#!/usr/bin/env python
-# Copyright 2009 Matteo Bertini
-
-import re
-import urllib
-import subprocess
-import gdata.docs.service
-
-NULL = open("/dev/null", "w")
-
-# Create a client class which will make HTTP requests with Google Docs server.
-client = gdata.docs.service.DocsService()
-# get google password from keychain
-username = "matteo.bertini@gmail.com"
-stdout, stderr = subprocess.Popen(["security", "find-internet-password", "-a", username, "-g"], stdout=NULL, stderr=subprocess.PIPE).communicate()
-password = re.findall('password: "([^"]*)"', stderr)[0]
-# Authenticate using your Google Docs email address and password.
-client.ClientLogin(username, password)
-
-rst_path = 'Traffic-Forecast.txt'
-tex_path = rst_path.replace(".txt", "")
-print 'Downloading document to %s...' % (rst_path,)
-client.DownloadDocument('document:dg4jnjs4_381gmwtvvgb', rst_path)
-print "Downloading bibliografy..."
-urllib.urlretrieve("http://www.bibsonomy.org/bib/user/naufraghi/traffic-forecast-2009", "tf.bib")
-
-print "Generating latex..."
-subprocess.check_call(["rst2latex-2.6.py", "--output-encoding-error-handler", "replace", rst_path], stdout=open(tex_path+".tex", "w+"))
-print "Generating pdf..."
-subprocess.check_call(["/opt/local/bin/pdflatex", tex_path], stdout=NULL)
-subprocess.check_call(["/opt/local/bin/bibtex", tex_path], stdout=NULL)
-subprocess.check_call(["/opt/local/bin/pdflatex", tex_path], stdout=NULL)
-subprocess.check_call(["/opt/local/bin/pdflatex", tex_path], stdout=NULL)
-print "Done!"
-

old/doc/tf.bib

-@inproceedings{948660,
-        title = {A simple and effective method for predicting travel times on
-freeways},
-        author = {J. Rice and E. van Zwet},
-        booktitle = {Intelligent Transportation Systems, 2001. Proceedings. 2001 IEEE},
-        pages = {227-232},
-        year = 2001,
-        description = {Welcome to IEEE Xplore 2.0: A simple and effective method for predicting travel times onfreeways},
-	abstract = {We present a method to predict the time that will be needed to
-traverse a certain stretch of freeway when departure is at a certain
-time in the future. The prediction is done on the basis of the current
-traffic situation in combination with historical data. We argue that,
-for our purpose, the current situation of a stretch of freeway is well
-summarized by the 'current status travel time'. This is the travel time
-that would result if one were to depart immediately and no significant
-changes in the traffic would occur. This current status travel time can
-be estimated from single or double loop detectors, video data, probe
-vehicles or by any other means. Our prediction method arises from the
-empirical fact that there exists a linear relationship between any
-future travel time and the current status travel time. The slope and
-intercept of this relationship is observed to change subject to the time
-of day and the time until departure. This naturally leads to a
-prediction scheme by means of linear regression with time varying
-coefficients},
-	biburl = {http://www.bibsonomy.org/bibtex/25a00d723fa86b33a5e60a442cb28540f/naufraghi},
-	keywords = {analysiscurrent coefficients freeway linear prediction regression road statistical status theory time traffic traffic-forecast-2009 travel varying},
-    issn = {}, doi = {10.1109/ITSC.2001.948660}}
-
-@article{williams:664,
-        title = {Modeling and Forecasting Vehicular Traffic Flow as a Seasonal ARIMA Process: Theoretical Basis and Empirical Results},
-        author = {Billy M. Williams and Lester A. Hoel},
-        journal = {Journal of Transportation Engineering},
-        number = 6,
-        pages = {664-672},
-        publisher = {ASCE},
-        volume = 129,
-        year = 2003,
-        url = {http://link.aip.org/link/?QTE/129/664/1},
-        description = {Modeling and Forecasting Vehicular Traffic Flow as a Seasonal ARIMA Process: Theoretical Basis and Empirical Results},
-	biburl = {http://www.bibsonomy.org/bibtex/2d5e37f06cba05eb42337155fa11c00c6/naufraghi},
-	keywords = {data_analysis flow intelligent_structures road_traffic road_vehicles traffic-forecast-2009},
-    collaboration = {}, doi = {10.1061/(ASCE)0733-947X(2003)129:6(664)}}
-
-@article{gfactor01,
-        title = {The PeMS algorithms for accurate, real-time estimates of g-factors and speeds from single-loop detectors},
-        author = {Z. Jia and C. Chen and B. Coifman and P. Varaiya},
-        journal = {IEEE 4th International ITS Conference},
-        year = 2001,
-        url = {http://pems.eecs.berkeley.edu/Papers/gfactoritsc.pdf},
-        
-      month = Feb,
-    abstract = {This paper presents the PeMS algorithms for the accurate, adaptive, real-time estimantio of the g-factor and vehicle speeds from single-loop detector data.},
-	biburl = {http://www.bibsonomy.org/bibtex/2b9d121baa587863994748218d63dc2c7/naufraghi},
-	keywords = {estimation real-time traffic traffic-forecast-2009},
-    }
-

old/error_location.py

-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Copyright 2009 by Matteo Bertini
-
-import os
-import sys
-from contextlib import contextmanager
-
-from tsutils import *
-import plots
-from task import mkdir, Node
-import dataset
-import learn
-import pems.dataset
-
-import features_selection
-import model_selection
-
-
-agg = 15
-step = 15
-delta = 15
-wsize = 180
-wstep = 15
-C = 2.0
-gamma = 0.1
-hour_gamma = 24
-epsilon = 0.01
-err = 'MAPE100'
-
-
-def get_periods():
-    train_period, test_period = features_selection.follow_periods(4,11)[0]
-    return train_period, test_period
-
-def get_stations(train_period, test_period, max_missing=0.1):
-    max_stations = 50
-    stations = set(pems.dataset.ObservedStations(train_period, agg, step, min_observed=75, max_missing=max_missing, max_stations=max_stations).load())
-    stations = stations.intersection(pems.dataset.ObservedStations(test_period, agg, step, min_observed=75, max_missing=max_missing, max_stations=max_stations).load())
-    return list(stations)
-
-def day_mean(station, train_period, test_period, weekdays='holidays', kernel='RBF'):
-    assert weekdays in ('holidays', 'workdays')
-    assert kernel in ('RBF', 'HoursRBF')
-    train_timeseries = pems.dataset.TimeSeries(station, 'flow', train_period, agg, step)
-    train_window = dataset.TimeSeriesWindow(train_timeseries, wsize=180, wstep=15)
-    train_features = learn.QuantityFeatures(train_window, delta=15)
-    test_timeseries = pems.dataset.TimeSeries(station, 'flow', test_period, agg, step)
-    test_window = dataset.TimeSeriesWindow(test_timeseries, wsize=180, wstep=15)
-    test_features = learn.QuantityFeatures(test_window, delta=15, stats=train_window.stats.load())
-
-    if kernel == 'RBF':
-        model = learn.LearnRBF(train_features, C, gamma, epsilon)
-    else:
-        model = learn.LearnHoursRBF(train, C, gamma, hour_gamma, epsilon)
-    predict = learn.PredictSVM(model, test_features)
-    dates, predicted_labels, test_labels = zip(*predict.load())
-
-    p = ts.time_series(predicted_labels, dates=dates, freq='T')
-    l = ts.time_series(test_labels, dates=dates, freq='T')
-    err = (p-l).fill_missing_dates()[::wstep]
-
-    if weekdays == 'holidays':
-        selected = err[err.weekday >= 5]
-    else:
-        selected = err[err.weekday < 5]
-    wdays = np.ma.masked_invalid([selected[i*96:(i+1)*96] for i in range(len(selected)/96)])
-    wmean = np.ma.abs(wdays).mean(axis=0)
-    return wmean
-
-
-if __name__ == "__main__":
-    import sys
-    args = sys.argv[1:]
-    if "__file__" in globals():
-        if "eval" in args[:1]:
-            eval(" ".join(args[1:]))
-        elif "profile" in args[:1]:
-            import cProfile, pstats
-            filename = '/tmp/{__file__}.profile'.format(**locals())
-            cProfile.run(" ".join(args[1:]), filename)
-            s = pstats.Stats(filename)
-            s.sort_stats('cumulative')
-            s.print_stats(0.1)
-        else:
-            print "Running %r doctests..." % __file__
-            import doctest
-            doctest.testmod()
-            print "DONE!"
-
-

old/euler-213-2.py

-import collections
-import random
-import decimal
-
-def init(size) :
-	d = collections.defaultdict(int)
-	one = decimal.Decimal(1)
-	for i in range(size) :
-		for j in range(size) :
-			d[i,j] = one
-	return d
-
-def neighbors_of(i, j, size) :
-	def legal(i, j) :
-		return (0 <= i < size) and (0 <= j < size)
-	results = []
-	for p in [(-1,0), (1,0), (0,-1), (0,1)] :
-		newi, newj = i + p[0], j + p[1]
-		if legal(newi, newj) :
-			results.append((newi, newj))
-	return results
-
-def odds_of(i, j, size) :
-	def iscorner(i, j) :
-		return i in [0, size-1] and j in [0, size-1]
-	def isedge(i, j) :
-		return i in [0, size-1] or j in [0, size-1]
-	if iscorner(i, j) : return decimal.Decimal("0.5")
-	elif isedge(i, j) : return decimal.Decimal(1)/decimal.Decimal(3)
-	else : return decimal.Decimal("0.25")
-
-def euler213(size, iterations) :
-	d = init(size)
-	neighbors = dict()
-	odds = dict()
-	for i in range(size) :
-		for j in range(size) :
-			neighbors[i, j] = neighbors_of(i, j, size)
-			odds[i, j] = odds_of(i, j, size)
-
-	for m in range(iterations) :
-		dst = dict()
-		for i in range(size) :
-			for j in range(size) :
-				dst[i, j] = sum(odds[n] * d[n] for n in neighbors[i, j])
-		d = dst
-	return sum(max(0, 1 - d[k]) for k in d)
-
-if __name__ == "__main__" :
-	import time
-
-	s = time.clock()
-	size = 30
-	trial_iters = 50
-	c = euler213(size, trial_iters)
-	print ("{0} {1}".format(size, c))
-	e = time.clock()
-	print ("Elapsed: {0}".format(e - s))
-

old/euler-213.py

-import collections
-import random
-
-def init(size) :
-	d = collections.defaultdict(int)
-	for i in range(size) :
-		for j in range(size) :
-			d[i,j] = 1
-	return d
-
-LEFT, RIGHT, UP, DOWN = range(4)
-alldirs = (LEFT, RIGHT, UP, DOWN)
-
-def directions(i, j, size) :
-	if i not in (0, size-1) and j not in (0, size-1) :
-		return alldirs
-	else :
-		results = set(alldirs)
-		if i == 0 :
-			results -= set([UP])
-		if i == size-1 :
-			results -= set([DOWN])
-		if j == 0 :
-			results -= set([LEFT])
-		if j == size-1 :
-			results -= set([RIGHT])
-		return tuple(ss for ss in results)
-
-def newPos(i, j, jump, size) :
-	if jump == LEFT :
-		j -= 1
-	elif jump == RIGHT :
-		j += 1
-	elif jump == UP :
-		i -= 1
-	elif jump == DOWN :
-		i += 1
-	return (i, j)
-
-def euler213(size, iterations) :
-	d = init(size)
-	for m in range(iterations) :
-		dst = collections.defaultdict(int)
-		for i in range(size) :
-			for j in range(size) :
-				dirs = directions(i, j, size)
-				for x in range(d[i,j]) :
-					jumpint = random.randint(0, len(dirs)-1)
-					newi, newj = newPos(i, j, dirs[jumpint], size)
-					dst[newi, newj] += 1
-		d = dst
-	return size*size - len(d)
-
-if __name__ == "__main__" :
-	import time
-	iters, trial_iters = 1000, 50
-	
-	for size in range(6, 24, 2) :
-		s = time.clock()
-		c = sum(euler213(size, trial_iters) for _ in range(iters))
-		print ("{0} {1} {2}".format(size, c, iters))
-		e = time.clock()
-		print ("Elapsed: {0}".format(e - s))
-

old/extract.py

-
-
-txt = """
-riga 1
-riga 2
-START
-riga 3
-riga 4
-riga 5
-END
-riga 6
-riga 7
-START
-riga 8
-END
-riga 9
-"""
-
-lines = txt.strip().splitlines()
-
-def fromSTART(ilist):
-    while 'START' not in ilist.next():
-        continue
-    return ilist
-
-def toEND(ilist):
-    for row in ilist:
-        if 'END' in row:
-            return
-        yield row
-
-ilines = iter(lines)
-print list(toEND(fromSTART(ilines)))
-print lines[lines.index('START')+1:lines.index('END')]
-
-def start2end(lines):
-    ilines = iter(lines)
-    while 1:
-        yield list(toEND(fromSTART(ilines)))
-
-print list(start2end(lines))
-
-import itertools
-def extract(lines):
-    tokens = ['START', 'END']
-    c = itertools.cycle(tokens)
-    token = 'END'
-    for l in lines:
-        if l in tokens:
-            token = c.next()
-        elif token == 'START':
-            yield l
-
-print list(extract(lines))

old/gp1.py

-from pymc.gp import * 
-# Generate mean 
-def quadfun(x, a, b, c): 
-    return(a*x**2+b*x+c) 
-M = Mean(eval_fun=quadfun, a=1., b=.5, c=2.) 
-####-Plot-#### 
-if __name__ == '__main__': 
-    from pylab import* 
-    x = arange(-1., 1.,0.1) 
-    clf() 
-    plot(x,M(x),'k-') 
-    xlabel('x') 
-    ylabel('M(x)') 
-    axis('tight') 
-    show() 
-

old/move-downloads.sh

-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-01/715938-2008-01.flow.gz pems_data/DownloadPeriod/715938/715938-2008-01.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-02/715938-2008-02.flow.gz pems_data/DownloadPeriod/715938/715938-2008-02.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-03/715938-2008-03.flow.gz pems_data/DownloadPeriod/715938/715938-2008-03.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-04/715938-2008-04.flow.gz pems_data/DownloadPeriod/715938/715938-2008-04.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-05/715938-2008-05.flow.gz pems_data/DownloadPeriod/715938/715938-2008-05.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-06/715938-2008-06.flow.gz pems_data/DownloadPeriod/715938/715938-2008-06.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-07/715938-2008-07.flow.gz pems_data/DownloadPeriod/715938/715938-2008-07.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-08/715938-2008-08.flow.gz pems_data/DownloadPeriod/715938/715938-2008-08.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-09/715938-2008-09.flow.gz pems_data/DownloadPeriod/715938/715938-2008-09.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-10/715938-2008-10.flow.gz pems_data/DownloadPeriod/715938/715938-2008-10.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-11/715938-2008-11.flow.gz pems_data/DownloadPeriod/715938/715938-2008-11.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+flow/period+2008-12/715938-2008-12.flow.gz pems_data/DownloadPeriod/715938/715938-2008-12.flow.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-01/715938-2008-01.speed.gz pems_data/DownloadPeriod/715938/715938-2008-01.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-02/715938-2008-02.speed.gz pems_data/DownloadPeriod/715938/715938-2008-02.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-03/715938-2008-03.speed.gz pems_data/DownloadPeriod/715938/715938-2008-03.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-04/715938-2008-04.speed.gz pems_data/DownloadPeriod/715938/715938-2008-04.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-05/715938-2008-05.speed.gz pems_data/DownloadPeriod/715938/715938-2008-05.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-06/715938-2008-06.speed.gz pems_data/DownloadPeriod/715938/715938-2008-06.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-07/715938-2008-07.speed.gz pems_data/DownloadPeriod/715938/715938-2008-07.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-08/715938-2008-08.speed.gz pems_data/DownloadPeriod/715938/715938-2008-08.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-09/715938-2008-09.speed.gz pems_data/DownloadPeriod/715938/715938-2008-09.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-10/715938-2008-10.speed.gz pems_data/DownloadPeriod/715938/715938-2008-10.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-11/715938-2008-11.speed.gz pems_data/DownloadPeriod/715938/715938-2008-11.speed.gz
-mkdirhier pems_data/DownloadPeriod/715938 && ln -v pems_data/DownloadPeriod/station+715938/quantity+speed/period+2008-12/715938-2008-12.speed.gz pems_data/DownloadPeriod/715938/715938-2008-12.speed.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-01/715944-2008-01.flow.gz pems_data/DownloadPeriod/715944/715944-2008-01.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-02/715944-2008-02.flow.gz pems_data/DownloadPeriod/715944/715944-2008-02.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-03/715944-2008-03.flow.gz pems_data/DownloadPeriod/715944/715944-2008-03.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-05/715944-2008-05.flow.gz pems_data/DownloadPeriod/715944/715944-2008-05.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-06/715944-2008-06.flow.gz pems_data/DownloadPeriod/715944/715944-2008-06.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-07/715944-2008-07.flow.gz pems_data/DownloadPeriod/715944/715944-2008-07.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-08/715944-2008-08.flow.gz pems_data/DownloadPeriod/715944/715944-2008-08.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-09/715944-2008-09.flow.gz pems_data/DownloadPeriod/715944/715944-2008-09.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-10/715944-2008-10.flow.gz pems_data/DownloadPeriod/715944/715944-2008-10.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-11/715944-2008-11.flow.gz pems_data/DownloadPeriod/715944/715944-2008-11.flow.gz
-mkdirhier pems_data/DownloadPeriod/715944 && ln -v pems_data/DownloadPeriod/station+715944/quantity+flow/period+2008-12/715944-2008-12.flow.gz pems_data/DownloadPeriod/715944/715944-2008-12.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-01/715947-2008-01.flow.gz pems_data/DownloadPeriod/715947/715947-2008-01.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-02/715947-2008-02.flow.gz pems_data/DownloadPeriod/715947/715947-2008-02.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-03/715947-2008-03.flow.gz pems_data/DownloadPeriod/715947/715947-2008-03.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-04/715947-2008-04.flow.gz pems_data/DownloadPeriod/715947/715947-2008-04.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-05/715947-2008-05.flow.gz pems_data/DownloadPeriod/715947/715947-2008-05.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-06/715947-2008-06.flow.gz pems_data/DownloadPeriod/715947/715947-2008-06.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-07/715947-2008-07.flow.gz pems_data/DownloadPeriod/715947/715947-2008-07.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-08/715947-2008-08.flow.gz pems_data/DownloadPeriod/715947/715947-2008-08.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-09/715947-2008-09.flow.gz pems_data/DownloadPeriod/715947/715947-2008-09.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-10/715947-2008-10.flow.gz pems_data/DownloadPeriod/715947/715947-2008-10.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-11/715947-2008-11.flow.gz pems_data/DownloadPeriod/715947/715947-2008-11.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+flow/period+2008-12/715947-2008-12.flow.gz pems_data/DownloadPeriod/715947/715947-2008-12.flow.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-01/715947-2008-01.speed.gz pems_data/DownloadPeriod/715947/715947-2008-01.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-02/715947-2008-02.speed.gz pems_data/DownloadPeriod/715947/715947-2008-02.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-03/715947-2008-03.speed.gz pems_data/DownloadPeriod/715947/715947-2008-03.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-04/715947-2008-04.speed.gz pems_data/DownloadPeriod/715947/715947-2008-04.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-05/715947-2008-05.speed.gz pems_data/DownloadPeriod/715947/715947-2008-05.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-06/715947-2008-06.speed.gz pems_data/DownloadPeriod/715947/715947-2008-06.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-07/715947-2008-07.speed.gz pems_data/DownloadPeriod/715947/715947-2008-07.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-08/715947-2008-08.speed.gz pems_data/DownloadPeriod/715947/715947-2008-08.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-09/715947-2008-09.speed.gz pems_data/DownloadPeriod/715947/715947-2008-09.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-10/715947-2008-10.speed.gz pems_data/DownloadPeriod/715947/715947-2008-10.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-11/715947-2008-11.speed.gz pems_data/DownloadPeriod/715947/715947-2008-11.speed.gz
-mkdirhier pems_data/DownloadPeriod/715947 && ln -v pems_data/DownloadPeriod/station+715947/quantity+speed/period+2008-12/715947-2008-12.speed.gz pems_data/DownloadPeriod/715947/715947-2008-12.speed.gz
-mkdirhier pems_data/DownloadPeriod/716078 && ln -v pems_data/DownloadPeriod/station+716078/quantity+flow/period+2008-01/716078-2008-01.flow.gz pems_data/DownloadPeriod/716078/716078-2008-01.flow.gz
-mkdirhier pems_data/DownloadPeriod/716078 && ln -v pems_data/DownloadPeriod/station+716078/quantity+flow/period+2008-02/716078-2008-02.flow.gz pems_data/DownloadPeriod/716078/716078-2008-02.flow.gz
-mkdirhier pems_data/DownloadPeriod/716078 && ln -v pems_data/DownloadPeriod/station+716078/quantity+flow/period+2008-03/716078-2008-03.flow.gz pems_data/DownloadPeriod/716078/716078-2008-03.flow.gz
-mkdirhier pems_data/DownloadPeriod/716078 && ln -v pems_data/DownloadPeriod/station+716078/quantity+flow/period+2008-04/716078-2008-04.flow.gz pems_data/DownloadPeriod/716078/716078-2008-04.flow.gz
-mkdirhier pems_data/DownloadPeriod/716078 && ln -v pems_data/DownloadPeriod/station+716078/quantity+flow/period+2008-05/716078-2008-05.flow.gz pems_data/DownloadPeriod/716078/716078-2008-05.flow.gz
-mkdirhier pems_data/DownloadPeriod/716078 && ln -v pems_data/DownloadPeriod/station+716078/quantity+flow/period+2008-06/716078-2008-06.flow.gz pems_data/DownloadPeriod/716078/716078-2008-06.flow.gz