Anonymous avatar Anonymous committed 6bc8ff5

Initial commit of version 0.7.2 from SourceForge.

Comments (0)

Files changed (1017)

+
+PyML -- a machine learning library in python.
+
+Copyright (C) 2004-2007 Asa Ben-Hur
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+
+Metadata-Version: 1.0
+Name: PyML
+Version: 0.7.2
+Summary: PyML - a Python machine learning package
+Home-page: http://pyml.sourceforge.net
+Author: Asa Ben-Hur
+Author-email: myfirstname@cs.colostate.edu
+License: GPL
+Description: UNKNOWN
+Platform: UNKNOWN

Binary file added.

+
+__version__ = '0.7.0'
+
+from PyML.containers import *
+from PyML.classifiers import *
+from PyML.preproc import *
+from PyML.demo import *
+from PyML.feature_selection import *
Add a comment to this file

PyML/base/._pymlObject.py

Binary file added.

PyML/base/__init__.py

+
+#PyMLobject = __import__('base.pymlObject').pymlObject.PyMLobject

PyML/base/pymlObject.py

+
+class PyMLobject (object) :
+
+    def __init__(self, arg, **args) :
+        """
+        Takes care of keyword arguments that are defined in the attributes
+        class attribute
+        """
+        if not hasattr(self, 'attributes') : return
+        if self.__class__ == arg.__class__ :
+            for attribute in self.attributes :
+                setattr(self, attribute, getattr(arg, attribute))
+        else :
+            for attribute in self.attributes :
+                if attribute in args :
+                    setattr(self, attribute, args[attribute])
+                else :
+                    setattr(self, attribute, self.attributes[attribute])
+    
Add a comment to this file

PyML/classifiers/._baseClassifiers.py

Binary file added.

Add a comment to this file

PyML/classifiers/._composite.py

Binary file added.

Add a comment to this file

PyML/classifiers/._knn.py

Binary file added.

Add a comment to this file

PyML/classifiers/._libsvmWrap.py

Binary file added.

Add a comment to this file

PyML/classifiers/._multi.py

Binary file added.

Add a comment to this file

PyML/classifiers/._platt.py

Binary file added.

Add a comment to this file

PyML/classifiers/._ridgeRegression.py

Binary file added.

Add a comment to this file

PyML/classifiers/._svm.py

Binary file added.

PyML/classifiers/__init__.py

+
+SVM = __import__('PyML.classifiers.svm', fromlist=['']).SVM
+KNN = __import__('PyML.classifiers.knn', fromlist=['']).KNN
+RidgeRegression = __import__('PyML.classifiers.ridgeRegression', fromlist=['']).RidgeRegression
+import modelSelection
+

PyML/classifiers/baseClassifiers.py

+import numpy
+import time
+import copy
+
+from PyML.evaluators import assess
+from PyML.utils import misc
+from PyML.base.pymlObject import PyMLobject
+
+"""base class for for PyML classifiers"""
+
+__docformat__ = "restructuredtext en"
+
+containersRequiringProjection = ['VectorDataSet', 'PyVectorDataSet']
+
+
+class Classifier (PyMLobject) :
+
+    """base class for PyML classifiers, specifying the classifier api"""
+    
+    type = 'classifier'
+    deepcopy = False
+
+    # the type of Results object returned by testing a classifier:
+    resultsObject = assess.ClassificationResults
+    
+    test = assess.test
+    cv = assess.cv
+    stratifiedCV = assess.stratifiedCV
+    loo = assess.loo
+    trainTest = assess.trainTest
+    nCV = assess.nCV
+    
+    def __init__(self, arg = None, **args) :
+
+        PyMLobject.__init__(self, arg, **args)
+        if type(arg) == type('') :
+            self.load(arg)
+        self.log = misc.Container()
+
+    def logger(self) :
+
+        pass
+
+    def __repr__(self) :
+
+        return '<' + self.__class__.__name__ + ' instance>\n'
+
+    def project(self, data) :
+        """
+        project a test dataset to the training data features.
+        """
+
+        if data.__class__.__name__ not in containersRequiringProjection :
+            return
+        if misc.listEqual(self.featureID, data.featureID) :
+            return
+
+        raise ValueError, 'use SparseDataSet container when doing feature selection'
+
+        print 'featureID:'
+        print self.featureID
+        print self.featureDict
+        print 'projecting:'
+        if len(misc.intersect(self.featureID, data.featureID)) != len(self.featureID) :
+            raise ValueError, 'missing features in test data'
+
+        featuresToEliminate = [i for i in range(data.numFeatures)
+                               if data.featureID[i] not in self.featureDict]
+        print 'featuresToEliminate'
+        print featuresToEliminate
+        data.eliminateFeatures(featuresToEliminate)
+        print data.featureID
+            
+    def save(self, fileHandle) :
+
+        raise NotImplementedError, 'your classifier does not implement this function'
+
+    def train(self, data, **args) :
+
+        # store the current cpu time:
+        self._clock = time.clock()
+
+        if not data.labels.numericLabels :
+            # check if there is a class that is not represented in the training data:
+            if min(data.labels.classSize) == 0 :
+                raise ValueError, 'there is a class with no data'
+
+            # store just as much about the labels as is needed:
+            self.labels = misc.Container()
+            self.labels.addAttributes(data.labels, ['numClasses', 'classLabels'])
+        if data.__class__.__name__ in containersRequiringProjection :
+            self.featureID = data.featureID[:]
+            print 'base training'
+            print self.featureID
+            self.featureDict = data.featureDict.copy()
+            
+        data.train(**args)
+        # if there is some testing done on the data, it requires the training data:
+        if data.testingFunc is not None :
+            self.trainingData = data
+
+    def trainFinalize(self) :
+
+        self.log.trainingTime = self.getTrainingTime()
+
+    def getTrainingTime(self) :
+
+        return time.clock() - self._clock
+
+    def classify(self, data, i) :
+
+        raise NotImplementedError
+
+    def twoClassClassify(self, data, i) :
+
+        val = self.decisionFunc(data, i)
+        if val > 0 :
+            return (1, val)
+        else:
+            return (0, val)
+
+class IteratorClassifier (Classifier) :
+
+    def __iter__(self) :
+
+        self._classifierIdx = -1
+        return self
+
+    def getClassifier(self) :
+
+        if self._classifierIdx < 0 :
+            return None
+        return self.classifiers[self._classifierIdx]
+
+    def next(self) :
+
+        self._classifierIdx += 1
+        if self._classifierIdx == len(self.classifiers) :
+            raise StopIteration
+        func = getattr(self.classifiers[self._classifierIdx], self._method)
+
+        return func(self._data, **self._args)
+
+    def test(self, data, **args) :
+
+        self._method = 'test'
+        self._data = data
+        self._args = args
+        return iter(self)
+
+    def cv(self, data, **args) :
+
+        self._method = 'cv'
+        self._data = data
+        self._args = args
+        return iter(self)
+
+    def stratifiedCV(self, data, **args) :
+
+        self._method = 'stratifiedCV'
+        self._data = data
+        self._args = args
+        return iter(self)
+
+    def loo(self, data, **args) :
+
+        self._method = 'loo'
+        self._data = data
+        self._args = args
+        return iter(self)

PyML/classifiers/composite.py

+
+import numpy
+import math
+
+from PyML.utils import misc
+from PyML.datagen import sample
+from PyML.evaluators import assess
+from baseClassifiers import Classifier, IteratorClassifier
+import svm
+
+__docformat__ = "restructuredtext en"
+
+class CompositeClassifier (Classifier) :
+
+    '''A base class for creating composite classifiers
+    
+    A composite classifier has an attribute called "classifier", and by default
+    requests are forwarded to the appropriate function of the classifier
+    (including the "test" function).
+    For logging purposes, use the log attribute of the classifier rather
+    than the composite log.
+    See for example the FeatureSelect object.'''
+
+    deepcopy = True
+
+    def __init__(self, classifier, **args) :
+
+        Classifier.__init__(self, classifier, **args)
+	if type(classifier) == type('') : return
+        if (not hasattr(classifier, 'type')) or classifier.type != 'classifier' :
+            raise ValueError, 'argument should be a classifier'
+        if classifier.__class__ == self.__class__ :
+            self.classifier = classifier.classifier.__class__(
+                classifier.classifier)
+        else :
+            self.classifier = classifier.__class__(classifier)
+         
+    def __repr__(self) :
+        rep = '<' + self.__class__.__name__ + ' instance>\n'
+        rep += 'Classifier:\n'
+        rep += self.classifier.__repr__()
+        
+        return rep
+
+    def preproject(self, data) :
+
+        self.classifier.preproject(data)
+
+    def decisionFunc(self, data, i) :
+
+        return self.classifier.decisionFunc(data, i)
+
+    def classify(self, data, i) :
+
+        return self.classifier.classify(data, i)
+
+
+    #def preprocess(self, data) :
+
+    #    self.classifier.preprocess(data)
+
+    def getTest(self) :
+
+        return self.classifier.test
+
+    def setTest(self) :
+
+        raise ValueError, 'do not call this function'
+
+    # if the classifier used by the composite implements a test function -
+    # use it rather than the default assess.test
+    test = property (getTest, setTest,
+                     None, 'the test function of the underlying classifier')
+
+    
+
+class Chain (CompositeClassifier) :
+    '''A chain is a list of actions to be performed on a dataset,
+    the last of which is assumed to be a classifier.
+    The actions can be for example a chain of preprocessing steps or
+    a step of feature selection (same as using the FeatureSelect class)
+    Each action in the chain is assumed to have a "train" method and is
+    assumed to have a copy constructor'''
+
+    deepcopy = True
+    
+    def __init__(self, arg) :
+        """
+        :Parameters:
+          - `arg` - a Chain object of a list of objects, each of which implements
+            a 'train', 'test' and has a copy constructor
+        
+        """
+        Classifier.__init__(self)
+
+        if arg.__class__ == self.__class__ :
+            other = arg
+            self.classifier = other.classifier.__class__(other.classifier)
+            self.chain = [component.__class__(component)
+                          for component in other.chain]
+            
+        elif type(arg) == type([]) :
+            self.classifier = arg[-1].__class__(arg[-1])
+            self.chain = [arg[i].__class__(arg[i])
+                          for i in range(len(arg) - 1)]
+            
+
+    def train(self, data, **args) :
+
+        Classifier.train(self, data, **args)
+        
+        for component in self.chain :
+            component.train(data, **args)
+
+        self.classifier.train(data, **args)
+        self.log.trainingTime = self.getTrainingTime()
+        
+    def test(self, data, **args) :
+
+        for component in self.chain :
+            component.test(data, **args)
+
+        print 'I am testing',self.classifier
+        print 'testing function', self.classifier.test
+        print 'the data is :', data
+        return self.classifier.test(data, **args)
+
+class FeatureSelect (CompositeClassifier) :
+
+    """A method for combining a feature selector and classifier;
+    training consists of performing feature selection and afterwards training
+    the classifier on the selected features;
+    use this classifier to test the accuracy of a feature selector/classifier
+    combination.
+    USAGE:
+    construction :
+    featureSelect(classifier, featureSelector)
+    featureSelect(otherFeatureSelectInstance) - copy construction
+    """
+    
+    deepcopy = True
+    
+    def __init__(self, arg1, arg2 = None) :
+
+        Classifier.__init__(self)
+
+        if arg1.__class__ == self.__class__ :
+            other = arg1
+            self.classifier = other.classifier.__class__(other.classifier)
+            self.featureSelector = other.featureSelector.__class__(
+                other.featureSelector)
+        else :
+            for arg in (arg1, arg2) :
+                if arg.type == 'classifier' :
+                    self.classifier = arg.__class__(arg)
+                elif arg.type == 'featureSelector' :
+                    self.featureSelector = arg.__class__(arg)
+                else :
+                    raise ValueError, \
+                          'argument should be either classifier or featureSelector'
+
+
+    def __repr__(self) :
+        
+        rep = '<' + self.__class__.__name__ + ' instance>\n'
+        if hasattr(self, 'numFeatures') :
+            rep += 'number of features trained on:' + str(self.numFeatures) + '\n'
+        rep += 'Classifier:\n'
+        rep += self.classifier.__repr__()
+        rep += 'Feature Selector:\n'
+        rep += self.featureSelector.__repr__()
+            
+        return rep
+
+            
+    def train(self, data, **args) :
+
+        Classifier.train(self, data, **args)
+
+        self.featureSelector.select(data, **args)
+        #self.numFeatures = data.numFeatures
+        self.classifier.log.numFeatures = data.numFeatures
+        self.classifier.log.features = data.featureID[:]
+        
+        self.classifier.train(data, **args)
+        self.classifier.log.trainingTime = self.getTrainingTime()
+
+
+class FeatureSelectAll (IteratorClassifier) :
+
+    '''A method for combining a feature selector and classifier;
+    the difference from FeatureSelect is that it is specifically
+    designed for computing the accuracy while varying the 
+    number of features.
+    '''
+    
+    deepcopy = True
+
+    def __init__(self, arg1, arg2 = None) :
+
+        Classifier.__init__(self)
+
+        if arg1.__class__ == self.__class__ :
+            other = arg1
+            self.classifier = other.classifier.__class__(other.classifier)
+            self.featureSelector = other.featureSelector.__class__(
+                other.featureSelector)
+        else :
+            for arg in (arg1, arg2) :
+                if arg.type == 'classifier' :
+                    self.classifier = arg.__class__(arg)
+                elif arg.type == 'featureSelector' :
+                    self.featureSelector = arg.__class__(arg)
+                else :
+                    raise ValueError, \
+                          'argument should be either classifier or featureSelector'
+
+
+    def train(self, data, **args) :
+
+        Classifier.train(self, data, **args)
+
+        numFeatures = []
+        n = 1
+        while n < data.numFeatures :
+            numFeatures.append(n)
+            n *=2
+
+        self.classifiers = [self.classifier.__class__(self.classifier)
+                            for i in range(len(numFeatures))]
+
+        featureSelector = self.featureSelector.__class__(self.featureSelector)
+        rankedFeatures = featureSelector.rank(data)
+	
+        for i in range(len(numFeatures)) :
+            selectedData = data.__class__(data)
+            selectedData.keepFeatures(rankedFeatures[:numFeatures[i]])
+            self.classifiers[i].train(selectedData)
+            self.classifiers[i].log.numFeatures = selectedData.numFeatures
+
+        self.classifier.log.trainingTime = self.getTrainingTime()
+
+
+
+class AggregateClassifier (Classifier) :
+
+    """
+    classifier combines the predictions of classifiers trained on
+    different datasets.
+    The datasets are presented as a DataAggregate dataset container.
+    """
+
+    def __init__ (self, arg) :
+
+        Classifier.__init__(self)
+        if arg.__class__ == self.__class__ :
+            self.classifiers = [classifier.__class__(classifier)
+                                for classifier in arg.classifiers]
+        elif type(arg) == type([]) :
+            self.classifiers = [classifier.__class__(classifier)
+                                for classifier in arg]
+
+    def train(self, data, **args) :
+
+        Classifier.train(self, data, **args)
+        if not data.__class__.__name__ == 'DataAggregate' :
+            raise ValueError, 'train requires a DataAggregate dataset'
+
+        for i in range(len(self.classifiers)) :
+            self.classifiers[i].train(data.datas[i], **args)
+        self.log.trainingTime = self.getTrainingTime()
+        
+    def classify(self, data, p) :
+
+        if not data.__class__.__name__ == 'DataAggregate' :
+            raise ValueError, 'classify requires a DataAggregate dataset'
+
+        decisionFuncs = [self.classifiers[i].decisionFunc(data.datas[i], p)
+                         for i in range(len(self.classifiers))]
+        #decisionFunc = numpy.sum(decisionFuncs)
+        #if decisionFunc > 0 :
+        #    return (1, decisionFunc)
+        #else :
+        #    return (0, decisionFunc)
+        if decisionFuncs[0] > 0 and decisionFuncs[1] > 0 :
+            return 1, numpy.sum(decisionFuncs)
+        else :
+            return 0, min(decisionFuncs)
+        
+            
Add a comment to this file

PyML/classifiers/ext/._KNN.cpp

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._KNN.h

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._KNN.i

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._Makefile

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._SMO.h

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._SMO.i

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._SVModel.cpp

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._SVModel.h

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._SVModel.i

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._libsvm.cpp

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._libsvm.h

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._libsvm.i

Binary file added.

Add a comment to this file

PyML/classifiers/ext/._mylibsvm.cpp

Binary file added.