Commits

Aleš Erjavec committed 6265b5f

Added "Sample on any change" check box. Code cleanup.

Comments (0)

Files changed (1)

Orange/OrangeWidgets/Data/OWDataSampler.py

 import random
+
 import Orange
+from Orange.data import sample
+
 import OWGUI
 from OWWidget import *
 
 
 
 class OWDataSampler(OWWidget):
-    settingsList=["Stratified", "Repeat", "UseSpecificSeed", "RandomSeed",
-    "GroupSeed", "outFold", "Folds", "SelectType", "useCases", "nCases",
-    "selPercentage", "LOO",
-    "CVFolds", "CVFoldsInternal", "nGroups", "pGroups", "GroupText"]
-    
-    contextHandlers = {"":DomainContextHandler("", ["nCases","selPercentage"])}
+    settingsList = [
+        "Stratified", "Repeat", "UseSpecificSeed", "RandomSeed",
+        "GroupSeed", "outFold", "Folds", "SelectType", "useCases", "nCases",
+        "selPercentage", "CVFolds", "nGroups",
+        "pGroups", "GroupText", "autocommit"]
+
+    contextHandlers = {
+        "": DomainContextHandler("", ["nCases", "selPercentage"])
+    }
+
     def __init__(self, parent=None, signalManager=None):
-        OWWidget.__init__(self, parent, signalManager, 'SampleData', wantMainArea = 0)
+        OWWidget.__init__(self, parent, signalManager, 'SampleData',
+                          wantMainArea=0)
 
         self.inputs = [("Data", ExampleTable, self.setData)]
-        self.outputs = [("Data Sample", ExampleTable), ("Remaining Data", ExampleTable)]
+        self.outputs = [("Data Sample", ExampleTable),
+                        ("Remaining Data", ExampleTable)]
 
         # initialization of variables
         self.data = None                        # dataset (incoming stream)
         self.indices = None                     # indices that control sampling
-        self.ind = None                         # indices that control sampling
 
         self.Stratified = 1                     # use stratified sampling if possible?
         self.Repeat = 0                         # can elements repeat in a sample?
         self.useCases = 0                       # use a specific number of cases?
         self.nCases = 25                        # number of cases to use
         self.selPercentage = 30                 # sample size in %
-        self.LOO = 1                            # use LOO?
         self.CVFolds = 10                       # number of CV folds
-        self.CVFoldsInternal = 10               # number of CV folds (for internal use)
         self.nGroups = 3                        # number of groups
-        self.pGroups = [0.1,0.25,0.5]           # sizes of groups
+        self.pGroups = [0.1, 0.25, 0.5]         # sizes of groups
         self.GroupText = '0.1,0.25,0.5'         # assigned to Groups Control (for internal use)
+        self.autocommit = False
+
+        # Invalidated settings flag.
+        self.outputInvalidateFlag = False
 
         self.loadSettings()
+
         # GUI
-        
+
         # Info Box
         box1 = OWGUI.widgetBox(self.controlArea, "Information", addSpace=True)
+        # Input data set info
         self.infoa = OWGUI.widgetLabel(box1, 'No data on input.')
+        # Sampling type/parameters info
         self.infob = OWGUI.widgetLabel(box1, ' ')
+        # Output data set info
         self.infoc = OWGUI.widgetLabel(box1, ' ')
-        
+
         # Options Box
         box2 = OWGUI.widgetBox(self.controlArea, 'Options', addSpace=True)
-        OWGUI.checkBox(box2, self, 'Stratified', 'Stratified (if possible)', callback=self.settingsChanged)
-        OWGUI.checkWithSpin(box2, self, 'Set random seed:', 0, 32767, 'UseSpecificSeed', 'RandomSeed', checkCallback=self.settingsChanged, spinCallback=self.settingsChanged)
+        OWGUI.checkBox(box2, self, 'Stratified', 'Stratified (if possible)',
+                       callback=self.settingsChanged)
+
+        OWGUI.checkWithSpin(
+            box2, self, 'Set random seed:', 0, 32767,
+            'UseSpecificSeed',
+            'RandomSeed',
+            checkCallback=self.settingsChanged,
+            spinCallback=self.settingsChanged
+        )
 
         # Sampling Type Box
         self.s = [None, None, None, None]
-        self.sBox = OWGUI.widgetBox(self.controlArea, "Sampling type", addSpace=True)
+        self.sBox = OWGUI.widgetBox(self.controlArea, "Sampling type",
+                                    addSpace=True)
         self.sBox.buttons = []
 
         # Random Sampling
-        self.s[0] = OWGUI.appendRadioButton(self.sBox, self, "SelectType", 'Random sampling')
-        
-        # indent 
-        indent = sep=OWGUI.checkButtonOffsetHint(self.s[0])
+        self.s[0] = OWGUI.appendRadioButton(self.sBox, self, "SelectType",
+                                            'Random sampling')
+
+        # indent
+        indent = OWGUI.checkButtonOffsetHint(self.s[0])
         # repeat checkbox
-        self.h1Box = OWGUI.indentedBox(self.sBox, sep=indent, orientation = "horizontal")
-        OWGUI.checkBox(self.h1Box, self, 'Repeat', 'With replacement', callback=self.settingsChanged)
+        self.h1Box = OWGUI.indentedBox(self.sBox, sep=indent,
+                                       orientation="horizontal")
+        OWGUI.checkBox(self.h1Box, self, 'Repeat', 'With replacement',
+                       callback=self.settingsChanged)
 
         # specified number of elements checkbox
-        self.h2Box = OWGUI.indentedBox(self.sBox, sep=indent, orientation = "horizontal")
-        OWGUI.checkWithSpin(self.h2Box, self, 'Sample size (instances):', 1, 1000000000, 'useCases', 'nCases', checkCallback=[self.uCases, self.settingsChanged], spinCallback=self.settingsChanged)
-        OWGUI.rubber(self.h2Box)
-        
+        self.h2Box = OWGUI.indentedBox(self.sBox, sep=indent,
+                                       orientation="horizontal")
+        check, _ = OWGUI.checkWithSpin(
+            self.h2Box, self, 'Sample size (instances):', 1, 1000000000,
+            'useCases', 'nCases',
+            checkCallback=self.settingsChanged,
+            spinCallback=self.settingsChanged
+        )
+
         # percentage slider
-        self.h3Box = OWGUI.indentedBox(self.sBox, sep=indent, orientation = "horizontal")
+        self.h3Box = OWGUI.indentedBox(self.sBox, sep=indent)
         OWGUI.widgetLabel(self.h3Box, "Sample size:")
-        self.slidebox = OWGUI.indentedBox(self.sBox, sep=indent, orientation = "horizontal")
-        OWGUI.hSlider(self.slidebox, self, 'selPercentage', minValue=1, maxValue=100, step=1, ticks=10, labelFormat="   %d%%", callback=self.settingsChanged)
 
-        # Cross Validation
-        self.s[1] = OWGUI.appendRadioButton(self.sBox, self, "SelectType", 'Cross validation')
-        
-        box = OWGUI.indentedBox(self.sBox, sep=indent, orientation = "horizontal")
-        OWGUI.spin(box, self, 'CVFolds', 2, 100, step=1, label='Number of folds:  ', callback=[self.changeCombo, self.settingsChanged])
-        OWGUI.rubber(box)
+        self.slidebox = OWGUI.widgetBox(self.h3Box, orientation="horizontal")
+        OWGUI.hSlider(self.slidebox, self, 'selPercentage',
+                      minValue=1, maxValue=100, step=1, ticks=10,
+                      labelFormat="   %d%%",
+                      callback=self.settingsChanged)
+
+        # Sample size (instances) check disables the Percentage slider.
+        # TODO: Should be an exclusive option (radio buttons)
+        check.disables.extend([(-1, self.h3Box)])
+        check.makeConsistent()
+
+        # Cross Validation sampling options
+        self.s[1] = OWGUI.appendRadioButton(self.sBox, self, "SelectType",
+                                            "Cross validation")
+
+        box = OWGUI.indentedBox(self.sBox, sep=indent,
+                                orientation="horizontal")
+        OWGUI.spin(box, self, 'CVFolds', 2, 100, step=1,
+                   label='Number of folds:  ',
+                   callback=self.settingsChanged)
 
         # Leave-One-Out
-        self.s[2] = OWGUI.appendRadioButton(self.sBox, self, "SelectType", 'Leave-one-out')
+        self.s[2] = OWGUI.appendRadioButton(self.sBox, self, "SelectType",
+                                            "Leave-one-out")
 
         # Multiple Groups
-        self.s[3] = OWGUI.appendRadioButton(self.sBox, self, "SelectType", 'Multiple subsets')
-        gbox = OWGUI.indentedBox(self.sBox, sep=indent, orientation = "horizontal")
-        OWGUI.lineEdit(gbox, self, 'GroupText', label='Subset sizes (e.g. "0.1, 0.2, 0.5"):', callback=self.multipleChanged)
+        self.s[3] = OWGUI.appendRadioButton(self.sBox, self, "SelectType",
+                                            'Multiple subsets')
+        gbox = OWGUI.indentedBox(self.sBox, sep=indent,
+                                 orientation="horizontal")
+        OWGUI.lineEdit(gbox, self, 'GroupText',
+                       label='Subset sizes (e.g. "0.1, 0.2, 0.5"):',
+                       callback=self.multipleChanged)
 
         # Output Group Box
-        self.foldcombo = OWGUI.comboBox(self.controlArea, self, "outFold", 'Output Data for Fold / Group', 'Fold / group:', orientation = "horizontal", items = range(1,101), callback = self.foldChanged, sendSelectedValue = 1, valueType = int)
-        self.foldcombo.setEnabled(False)
+        box = OWGUI.widgetBox(self.controlArea, 'Output Data for Fold / Group',
+                              addSpace=True)
+        self.foldcombo = OWGUI.comboBox(
+            box, self, "outFold", items=range(1, 101),
+            label='Fold / group:', orientation="horizontal",
+            sendSelectedValue=1, valueType=int,
+            callback=self.invalidate
+        )
+        self.foldcombo.setEnabled(self.SelectType != 0)
 
-        # Select Data Button
+        # Sample Data box
         OWGUI.rubber(self.controlArea)
-        self.sampleButton = OWGUI.button(self.controlArea, self, 'Sample &Data', callback = self.process, addToLayout=False, default=True)
-        self.buttonBackground.layout().setDirection(QBoxLayout.TopToBottom)
-        self.buttonBackground.layout().insertWidget(0, self.sampleButton)
-        self.buttonBackground.show()
-        self.s[self.SelectType].setChecked(True)    # set initial radio button on (default sample type)
+        box = OWGUI.widgetBox(self.controlArea, "Sample Data")
+        cb = OWGUI.checkBox(box, self, "autocommit", "Sample on any change")
+        self.sampleButton = OWGUI.button(box, self, 'Sample &Data',
+                                         callback=self.sdata, default=True)
+        OWGUI.setStopper(self, self.sampleButton, cb, "outputInvalidateFlag",
+                         callback=self.sdata)
 
-        # CONNECTIONS
-        # set connections for RadioButton (SelectType)
-        self.dummy1 = [None]*len(self.s)
-        for i in range(len(self.s)):
-            self.dummy1[i] = lambda x, v=i: self.sChanged(x, v)
-            self.connect(self.s[i], SIGNAL("toggled(bool)"), self.dummy1[i])
+        # set initial radio button on (default sample type)
+        self.s[self.SelectType].setChecked(True)
 
-        # final touch
+        # Connect radio buttons (SelectType)
+        for i, button in enumerate(self.s):
+            button.toggled[bool].connect(
+                lambda state, i=i: self.samplingTypeChanged(state, i)
+            )
+
+        self.process()
+
         self.resize(200, 275)
 
     # CONNECTION TRIGGER AND GUI ROUTINES
     # enables RadioButton switching
-    def sChanged(self, value, id):
-        self.SelectType = id
-        self.process()
+    def samplingTypeChanged(self, value, i):
+        """Sampling type changed."""
+        self.SelectType = i
+        self.settingsChanged()
 
     def multipleChanged(self):
+        """Multiple subsets (Groups) changed."""
+        self.error(1)
         try:
             self.pGroups = [float(x) for x in self.GroupText.split(',')]
             self.nGroups = len(self.pGroups)
-            self.error(1)
         except:
-            self.error("Invalid specification for sizes of subsets.", 1)
-        self.changeCombo()
-        self.settingsChanged()
+            self.error(1, "Invalid specification for sizes of subsets.")
+        else:
+            self.settingsChanged()
 
-    # reflect user's actions that change combobox contents
-    def changeCombo(self):
-        # refill combobox
+    def updateFoldCombo(self):
+        """Update the 'Folds' combo box contents."""
+        fold = self.outFold
         self.Folds = 1
         if self.SelectType == 1:
             self.Folds = self.CVFolds
                 self.Folds = 1
         elif self.SelectType == 3:
             self.Folds = self.nGroups
+
         self.foldcombo.clear()
         for x in range(self.Folds):
-            self.foldcombo.addItem(str(x+1))
+            self.foldcombo.addItem(str(x + 1))
+        self.outFold = min(fold, self.Folds)
 
-    # triggered on change in output fold combobox
-    def foldChanged(self):
-        #self.outFold = int(ix+1)
-        if self.data:
-            self.sdata()
-
-    # switches between cases and percentage (random sampling)
-    def uCases(self):
-        if self.useCases == 1:
-            self.h3Box.setEnabled(False)
-            self.slidebox.setEnabled(False)
-        else:
-            self.h3Box.setEnabled(True)
-            self.slidebox.setEnabled(True)
-
-    # I/O STREAM ROUTINES
-    # handles changes of input stream
     def setData(self, dataset):
+        """Set the input data set."""
         self.closeContext()
-        if dataset:
-            self.infoa.setText('%d instances in input data set.' % len(dataset))
+        if dataset is not None:
+            self.infoa.setText('%d instances in input data set.' %
+                               len(dataset))
             self.data = dataset
             self.openContext("", dataset)
             self.process()
+            self.sdata()
         else:
             self.infoa.setText('No data on input.')
             self.infob.setText('')
 
     # feeds the output stream
     def sdata(self):
+        if not self.data:
+            return
+
         # select data
         if self.SelectType == 0:
             if self.useCases == 1 and self.Repeat == 1:
-                sample = orange.ExampleTable(self.data.domain)
-                for x in range(self.nCases):
-                    sample.append(self.data[random.randint(0,len(self.data)-1)])
+                indices = self.indices(self.data)
+                sample = [self.data[i] for i in indices]
+                sample = Orange.data.Table(self.data.domain, sample)
                 remainder = None
-                self.infob.setText('Random sampling with repetitions, %d instances.' % self.nCases)
             else:
-                sample = self.data.select(self.ind, 0)
-                remainder = self.data.select(self.ind, 1)
+                indices = self.indices(self.data)
+                sample = self.data.select(indices, 0)
+                remainder = self.data.select(indices, 1)
             self.infoc.setText('Output: %d instances.' % len(sample))
         elif self.SelectType == 3:
-            self.ind = self.indices(self.data, p0 = self.pGroups[self.outFold-1])
-            sample = self.data.select(self.ind, 0)
-            remainder = self.data.select(self.ind, 1)
-            self.infoc.setText('Output: subset %(outFold)d of %(folds)d, %(instances)d instance(s).' % {"outFold": self.outFold, "folds": self.Folds, "instances": len(sample)})
+            indices = self.indices(self.data, p0=self.pGroups[self.outFold - 1])
+            sample = self.data.select(indices, 0)
+            remainder = self.data.select(indices, 1)
+            self.infoc.setText(
+                'Output: subset %(fold)d of %(folds)d, %(len)d instance(s).' %
+                {"fold": self.outFold, "folds": self.Folds, "len": len(sample)}
+            )
         else:
-            sample = self.data.select(self.ind, self.outFold-1)
-            remainder = self.data.select(self.ind, self.outFold-1, negate=1)
-            self.infoc.setText('Output: fold %(outFold)d of %(folds)d, %(instances)d instance(s).' % {"outFold": self.outFold, "folds": self.Folds, "instances": len(sample)})
-        # set name (by PJ)
-        if sample:
+            # CV/LOO
+            indices = self.indices(self.data)
+            sample = self.data.select(indices, self.outFold - 1)
+            remainder = self.data.select(indices, self.outFold - 1, negate=1)
+            self.infoc.setText(
+                'Output: fold %(fold)d of %(folds)d, %(len)d instance(s).' %
+                {"fold": self.outFold, "folds": self.Folds, "len": len(sample)}
+            )
+
+        if sample is not None:
             sample.name = self.data.name
-        if remainder:
+        if remainder is not None:
             remainder.name = self.data.name
+
         # send data
         self.nSample = len(sample)
         self.nRemainder = len(remainder) if remainder is not None else 0
         self.send("Data Sample", sample)
         self.send("Remaining Data", remainder)
-        
-        self.sampleButton.setEnabled(False)
 
-    # MAIN SWITCH
-    # processes data after the user requests it
+        self.outputInvalidateFlag = False
+
     def process(self):
-        # reset errors, fold selected
         self.error(0)
-        self.outFold = 1
+        self.warning(0)
 
-        # check for data
-        if self.data == None:
-            return
-        else:
-            self.infob.setText('')
-            self.infoc.setText('')
+        self.infob.setText('')
 
-        # Random Selection
         if self.SelectType == 0:
-            # apply selected options
-            if self.useCases == 1 and self.Repeat != 1:
-                if self.nCases > len(self.data):
-                    self.error(0, "Sample size (w/o repetitions) larger than dataset.")
-                    return
-                self.indices = orange.MakeRandomIndices2(p0=int(self.nCases))
-                self.infob.setText('Random sampling, using exactly %d instances.' % self.nCases)
+            # Random Selection
+            if self.useCases == 1:
+                ncases = self.nCases
+                if self.Repeat == 0:
+                    ncases = self.nCases
+                    if self.data is not None and ncases > len(self.data):
+                        self.warning(0, "Sample size (w/o repetitions) larger than dataset.")
+                        ncases = len(self.data)
+                    p0 = ncases + 1e-7 if ncases == 1 else ncases
+                    self.indices = sample.SubsetIndices2(p0=p0)
+                    self.infob.setText('Random sampling, using exactly %d instances.' % ncases)
+                else:
+                    p0 = ncases + 1e-7 if ncases == 1 else ncases
+                    self.indices = sample.SubsetIndicesMultiple(p0=p0)
+                    self.infob.setText('Random sampling with repetitions, %d instances.' % ncases)
             else:
                 if self.selPercentage == 100:
-                    self.indices = orange.MakeRandomIndices2(p0=int(len(self.data)))
+                    p0 = len(self.data) if self.data is not None else 1.0
                 else:
-                    self.indices = orange.MakeRandomIndices2(p0=float(self.selPercentage/100.0))
+                    p0 = float(self.selPercentage) / 100.0
+                self.indices = sample.SubsetIndices2(p0=p0)
                 self.infob.setText('Random sampling, %d%% of input instances.' % self.selPercentage)
-            if self.Stratified == 1: self.indices.stratified = self.indices.StratifiedIfPossible
-            else:                    self.indices.stratified = self.indices.NotStratified
-            if self.UseSpecificSeed == 1: self.indices.randseed = self.RandomSeed
-            else:                         self.indices.randomGenerator = orange.RandomGenerator(random.randint(0,65536))
-
-            # call output stream handler to send data
-            self.ind = self.indices(self.data)
+            if self.Stratified == 1:
+                self.indices.stratified = self.indices.StratifiedIfPossible
+            else:
+                self.indices.stratified = self.indices.NotStratified
+            if self.UseSpecificSeed == 1:
+                self.indices.randseed = self.RandomSeed
+            else:
+                self.indices.randomGenerator = Orange.misc.Random(random.randint(0,65536))
 
         # Cross Validation / LOO
         elif self.SelectType == 1 or self.SelectType == 2:
             # apply selected options
             if self.SelectType == 2:
-                self.CVFoldsInternal = len(self.data)
+                folds = len(self.data) if self.data is not None else 1
                 self.infob.setText('Leave-one-out.')
             else:
-                self.CVFoldsInternal = self.CVFolds
+                folds = self.CVFolds
                 self.infob.setText('%d-fold cross validation.' % self.CVFolds)
-            self.indices = orange.MakeRandomIndicesCV(folds = self.CVFoldsInternal)
+            self.indices = sample.SubsetIndicesCV(folds=folds)
             if self.Stratified == 1:
                 self.indices.stratified = self.indices.StratifiedIfPossible
             else:
                 self.indices.stratified = self.indices.NotStratified
             if self.UseSpecificSeed == 1:
-                #self.indices.randomGenerator = orange.RandomGenerator(random.randint(0,65536))
                 self.indices.randseed = self.RandomSeed
             else:
-                #self.indices.randomGenerator = orange.RandomGenerator(random.randint(0,65536))
-                self.indices.randseed = random.randint(0,65536)
-
-            # call output stream handler to send data
-            self.ind = self.indices(self.data)
+                self.indices.randseed = random.randint(0, 65536)
 
         # MultiGroup
         elif self.SelectType == 3:
             self.infob.setText('Multiple subsets.')
-            #parse group specification string
+            #prepare indices generator
+            self.indices = sample.SubsetIndices2()
+            if self.Stratified == 1:
+                self.indices.stratified = self.indices.StratifiedIfPossible
+            else:
+                self.indices.stratified = self.indices.NotStratified
+            if self.UseSpecificSeed == 1:
+                self.indices.randseed = self.RandomSeed
+            else:
+                self.indices.randomGenerator = Orange.misc.Random(random.randint(0,65536))
 
-            #prepare indices generator
-            self.indices = orange.MakeRandomIndices2()
-            if self.Stratified == 1: self.indices.stratified = self.indices.StratifiedIfPossible
-            else:                    self.indices.stratified = self.indices.NotStratified
-            if self.UseSpecificSeed == 1: self.indices.randseed = self.RandomSeed
-            else:                         self.indices.randomGenerator = orange.RandomGenerator(random.randint(0,65536))
-
+    def settingsChanged(self):
         # enable fold selection and fill combobox if applicable
         if self.SelectType == 0:
             self.foldcombo.setEnabled(False)
         else:
             self.foldcombo.setEnabled(True)
-            self.changeCombo()
+            self.updateFoldCombo()
 
-        # call data output routine
-        self.sdata()
-        
-    def settingsChanged(self):
-        self.sampleButton.setEnabled(True)
+        self.process()
+        self.invalidate()
+
+    def invalidate(self):
+        """Invalidate current output."""
+        self.infoc.setText('...')
+        if self.autocommit:
+            self.sdata()
+        else:
+            self.outputInvalidateFlag = True
 
     def sendReport(self):
         if self.SelectType == 0:
         else:
             self.reportSettings("Data", [("Input", "None")])
 
-##############################################################################
-# Test the widget, run from prompt
 
-if __name__=="__main__":
+if __name__ == "__main__":
     appl = QApplication(sys.argv)
     ow = OWDataSampler()
     data = Orange.data.Table('iris.tab')
     ow.setData(data)
     ow.show()
     appl.exec_()
-    ow.saveSettings()
+    ow.saveSettings()