Paw Analysis /

import numpy as np

import data
import analysis

def main():
    # Read through all data and select measurements where the trapezoid
    # pattern algorithm thinks it correctly classified the paw impacts,
    # then use these data to build up a set of "eigenpaws" (basis_vecs)
    paw_data, average_paw, codes, basis_vecs = make_training_dataset()

    # Project the standardized paw data into the space specified by the
    # basis_vecs (i.e. project them into eigenpaw-space)
    paw_scores = (paw_data - average_paw).dot(basis_vecs)

    # In order for the distance classification to be equally sensitive to
    # all dimensions of the score vector, we need to rescale things by the
    # standard deviation in each dimension. 
    basis_stds = paw_scores.std(axis=0)
    paw_scores /= basis_stds

    # Group the paw scores by code and determine what the average score vector
    # looks like for each leg (i.e. left front, right hind, etc)
    template_paws = []
    for i in range(4):
        leg_data = paw_scores[codes == i]
    template_paws = np.vstack(template_paws)

    #-- Save the key data that we need to do this same classification
    # on any given paw....'average_paw.npy', average_paw)'basis_stds.npy', basis_stds)'basis_vecs.npy', basis_vecs)'template_paws.npy', template_paws)

    #- Optionally, we can save the raw training data for later analysis...'training_paw_data.npy', paw_data)'training_paw_codes.npy', codes)

    # Reload the classification data...

    # Check to see how many of the training dataset paws were identified
    matches = 0
    for code, paw in zip(codes, paw_data):
        pattern_result = analysis.PAW_CODE_LUT[code]
        eigenpaw_result = analysis.classify(paw)
        if eigenpaw_result == pattern_result:
            matches += 1
    print 'Classifed {0} of the {1} training paws correctly'.format(matches, codes.size)

def make_training_dataset():
    paw_data, codes = [], []
    for run in data.measurements:
        print 'Processing: ',,
        result = process_measurement_for_training(run)
        if result is not None:
            print '    Using this measurement in the training dataset'
    paw_data = np.vstack(paw_data)
    codes = np.array(codes)
    num_basis_vecs = 200
    average_paw = paw_data.mean(axis=0)
    basis_vecs = make_eigenpaws(paw_data - average_paw, num_basis_vecs)
    return paw_data, average_paw, codes, basis_vecs

def process_measurement_for_training(run):
    data_slices = [impact.data_slice for impact in run.impacts]
    labels, problems = analysis.pattern_classification(data_slices)
    paws, codes = [], []
    if not problems:
        # Always discard the last impact, as we can't ID it reliably
        for impact, label in zip(run.impacts[:-1], labels[:-1]):
            # Discard impacts that touch the edge of the sensor pad
            if not touches_edges(impact, 
        return paws, codes
        return None

def touches_edges(impact, data):
    nx, ny, nt = data.shape
    xtouch = (impact.x.min() == 0) or (impact.x.max() == nx)
    ytouch = (impact.y.min() == 0) or (impact.y.max() == ny)
    return xtouch or ytouch

def make_eigenpaws(paw_data, num_basis_vecs=50):
    """Creates a set of eigenpaws based on paw_data.
    paw_data is a numdata by numdimensions matrix of all of the observations.
    The mean (average_paw) must have been removed from paw_data prior to 
    calling this function."""
    # Determine the eigenvectors of the covariance matrix of the data
    cov = np.cov(paw_data.T)
    eigvals, eigvecs = np.linalg.eig(cov)

    # Sort the eigenvectors by ascending eigenvalue (largest is last)
    eig_idx = np.argsort(eigvals)
    sorted_eigvecs = eigvecs[:,eig_idx]
    sorted_eigvals = eigvals[:,eig_idx]

    # Now choose a cutoff number of eigenvectors to use 
    basis_vecs = sorted_eigvecs[:,-num_basis_vecs:]

    return basis_vecs

if __name__ == '__main__':