Commits

Aleš Erjavec committed 92c97c3

Moving _differentiation into orangecontrib.bio namespace

  • Participants
  • Parent commits 1918556

Comments (0)

Files changed (32)

 include *.rst *.txt
-include _differentiation/widgets/icons/*.{svg,png}
+recursive-include orangecontrib/bio/differentiation *.svg *.png

File _differentiation/__init__.py

-import difscale

File _differentiation/difscale.py

-from __future__ import absolute_import
-
-import random
-from math import log
-from operator import itemgetter
-
-import numpy
-
-import Orange
-from Orange.bio import obiGEO
-from Orange.bio.obiExpression import ExpressionSignificance_Test
-
-
-# Utility functiions
-
-log2 = lambda x: log(x, 2.)
-
-def my_ratio(x, y):
-    """ compute the log-ratio """
-    return log2(x/y)
-
-def sign(x):
-    return cmp(x, 0)
-
-def common_domain(data1, data2):
-    """Use only attributes that are in both data sets"""
-    atts = sorted(set([a.name for a in data1.domain.attributes]).intersection(
-               [a.name for a in data2.domain.attributes]))
-    new1 = Orange.data.Table(Orange.data.Domain(atts + [data1.domain.classVar], data1.domain), data1)
-    new2 = Orange.data.Table(Orange.data.Domain(atts + [data2.domain.classVar], data2.domain), data2)
-    return new1, new2
-
-def common_genes(data1, data2, att='gene'):
-    common_set = list(set(ex[att].value for ex in data1).intersection(ex[att].value for ex in data2))
-    return data1.filter(**{att: common_set}), data2.filter(**{att: common_set})
-
-# Normalization
-
-def quantilenorm(data):
-    """normalization of microarray data to obtain the same distribution in all chips"""
-    chips = data.domain.attributes
-    genes = [str(d["gene"]) for d in data]
-    d_sort = {}
-    for c in chips:
-        dc = [(float(d[c]), str(d["gene"])) for d in data]
-        dc.sort()
-        d_sort[c.name] = dc
-    d_sort2 = dict([(str(d["gene"]),{}) for d in data])
-    for i in range(len(data)):
-        genes_c = [(d_sort[c.name][i][1], c.name) for c in chips]
-        mean_row = numpy.mean([d_sort[c.name][i][0] for c in chips])
-        for g, d in genes_c:
-            d_sort2.get(g, {}).update(dict([(d, mean_row)]))
-    data_norm = Orange.data.Table(data.domain)
-    for i, d in enumerate(data):
-        g = str(d["gene"])
-        ex = [d_sort2[g][c.name] for c in chips]
-        data_norm.append(ex)
-        data_norm[i]["gene"] = g
-    return data_norm
-
-def get_mediannorm(modeldata):
-    """returns the function for median scale normalization"""
-    globalmedian = numpy.median([float(d[c]) for d in modeldata for c in modeldata.domain.attributes])
-    def normalize(data):
-        chips = data.domain.attributes
-        medians = [numpy.median([float(d[c]) for d in data]) for c in chips]
-        data_norm = Orange.data.Table(data.domain)
-        data_norm.domain.add_metas(data.domain.get_metas())
-        for i, d in enumerate(data):
-            ex = [(d[c.name].value*globalmedian)/medians[k] for k,c in enumerate(chips)]
-            data_norm.append(ex)
-            for m in data.domain.get_metas():
-                data_norm[i][m] = data[i][m]
-        return data_norm
-    return normalize
-
-def medianscalenorm(data, newsamples=None, intersection=True):
-    """normalization of microarray data to center the distributions in all chips on the same global median"""
-    if not newsamples:
-        n = get_mediannorm(data)
-        return n(data), None
-    else:
-        if intersection:
-            idata, inewsamples = common_genes(data, newsamples)
-        n = get_mediannorm(idata)
-        return n(idata), n(inewsamples)
-
-def normalize(data1, data2=None, type='median'):
-    if type == 'quantile':
-        ndata1 = quantilenorm(data1)
-        return ndata1, medianscalenorm(ndata1, data2)[1] if data2 else None
-    elif type == 'median':
-        return medianscalenorm(data1, data2)
-    else:
-        return Error
-
-
-# Gene filtering
-
-def compute_diff_pairs(at_a, d):
-    """ computes the pairwise differences between the replicates """
-    differ = []
-    for i in range(len (at_a)-1):
-        for j in range(i+1, len(at_a)):
-            differ.append(log2(d[at_a[i]])-log2(d[at_a[j]]))
-    return differ
-
-def costruct_series(data, attr_set, differences=False):
-    """ Constructs the time series by averaging the replicates of the same time point """
-    serie = dict([(str(d["gene"]), [numpy.mean([d[at] for at in at_samples])
-                    for at_name, at_samples in attr_set]) for d in data])
-    if differences:
-        """store the differences between replicates while creating the time series"""
-        differences = []
-        r = [[differences.extend(compute_diff_pairs(at_samples, d))
-              for at_name, at_samples in attr_set] for d in data]
-        return serie, differences
-    else:
-        return serie
-
-def costruct_control_series(serie, num_points, control):
-    """ Creation of the control series (for each gene) as a constant profile equal to expression at time 0 """
-    serie_0 = {}
-    if control == "avg":
-        serie_0.update(dict([(g,[numpy.mean(serie[g]) for i in range(num_points)]) for g in serie]))
-    elif control == "t0":
-        serie_0.update(dict([(g,[serie[g][0] for i in range(num_points)]) for g in serie]))
-    return serie_0
-
-def compute_area(vals, t, baseline=0.):
-    return sum(a_pair((t[i], vals[i]), (t[i+1], vals[i+1]), baseline) \
-        for i in xrange(len(vals)-1))
-
-def a_pair(p1, p2, baseline):
-    """Area under the line bounded by a pair of two points (x,y) with
-    respect to baseline. Both parts around the diagonal are
-    positive."""
-    x1,y1 = p1
-    x2,y2 = p2
-    x2 = x2-x1 #same start
-    x1 = 0.0
-    a = y1-baseline
-    b = y2-baseline
-    if a*b >= 0: #both on one side
-        return abs(x2*(b+a)/2.0)
-    else:
-        xp = -a * x2 / float(y2-y1)
-        return (abs(xp * a) + abs((x2-xp)*b)) / 2.0
-
-def uniform_time_scale(attr_set):
-    """ Obtains time points with a unique measure (the lowest among [min,h,d]) present in data"""
-    ord_measures = ["min","h","d"]
-    converter = {"min":{"h":60, "d":24*60}, "h": {"d":24}}
-    measures = list(set([t.split(" ")[1] for t,s in attr_set]))
-    if len(measures) == 1:
-        time_points = [float(t.split(" ")[0]) for t,s in attr_set]
-    else:
-        first_measure = min([(ord_measures.index(m),m) for m in measures])[1]
-        time_points = [float(t.split(" ")[0]) for t, s in attr_set
-                       if t.split(" ")[1] == first_measure]
-        time_points.extend([float(t.split(" ")[0]) * converter[first_measure][t.split(" ")[1]]
-                            for t, s in attr_set if t.split(" ")[1] != first_measure])
-    return time_points
-
-def AREA(data, attr_set, control='t0', weighted=False, auto=False, perc=99):
-    """ AREA (Area Under the Curve) filtering method """
-    from matplotlib.mlab import prctile
-    if weighted:
-        time_points = uniform_time_scale(attr_set)
-    else:
-        time_points = range(len(attr_set))
-
-    # Monte Carlo approach to create the null distribution of the areas
-    if auto: # null distribution
-        serie, differences = costruct_series(data, attr_set, auto)
-        serie_campionata = {}
-        area = []
-        for i in range(20000):
-            serie_campionata = random.sample(differences, len(time_points))
-            area.append(compute_area(serie_campionata, time_points))
-        area_threshold = prctile(area, perc)
-    else:
-        serie = costruct_series(data, attr_set, auto)
-
-    serie_0 = costruct_control_series(serie, len(time_points), control)
-
-    # Gene filtering
-    areas = []
-    for g in serie:
-        diff_s = [log2(serie[g][i]) - log2(serie_0[g][i]) for i in range(len(time_points))]
-        area_diff = compute_area(diff_s, time_points);
-        if not auto or area_diff > area_threshold:
-            areas.append((g, area_diff))
-    return areas
-
-class ExpressionSignificance_AREA(ExpressionSignificance_Test):
-    def __call__(self, target=None):
-        attr_set = {}
-        for a in self.data.domain.attributes:
-            attr_set[a.attributes['time']] = attr_set.get(a.attributes['time'], []) + [a.name]
-        scores = AREA(self.data, sorted(attr_set.items()))
-        gene2ind = dict((g, i) for i,g in enumerate(ex['gene'].value for ex in self.data))
-        return [(gene2ind[g], s) for g, s in scores]
-
-def FC(data, attr_set, control='t0', thr=2, auto=False, p_thr=0.2):
-    """ Gene filtering based on the number of FC of all time points with the control series > thr """
-    serie = costruct_series(data, attr_set, False)
-    num_points = len(serie.values()[0])
-    serie_0 = costruct_control_series(serie, num_points, control)
-    fc = [(g, len([0 for i in range(num_points) if abs(my_ratio(s[i], serie_0[g][i])) >= thr]))
-          for g, s in serie.items()]
-    if auto:
-        thr_points = round(p_thr*num_points)
-        fc = [(g, v) for g, v in fc if v >= thr_points]
-    return fc
-
-class ExpressionSignificance_FCts(ExpressionSignificance_Test):
-    def __call__(self, target=None):
-        attr_set = {}
-        for a in self.data.domain.attributes:
-            attr_set[a.attributes['time']] = attr_set.get(a.attributes['time'], []) + [a.name]
-        scores = FC(self.data, sorted(attr_set.items()))
-        return zip(self.keys, map(itemgetter(1), scores))
-
-def spearmanr_filter(data, limit=1000):
-    """ Spearman ranks gene filtering """
-    from scipy.stats import spearmanr
-    time = [a.attributes['time'] for a in data.domain.attributes]
-    exs = sorted(data, reverse=True, key=lambda ex: abs(spearmanr([a.value for a in ex], time)[0]))
-    return [str(ex['gene']) for ex in exs[:limit]]
-
-
-def compute_ssf(pcascores, thr):
-    pcascores.sort(reverse=True)
-    stot = sum([el[0] for el in pcascores])
-    pp=[a[0] / stot for a in pcascores]
-    x = numpy.cumsum(pp)
-    return [pcascores[i][1] for i, el in enumerate(x) if el <= thr]
-
-def ssf(data, samples, classifier, thr=0.995, merge=True):
-    stages = [s.attributes['time'] for s in samples]
-    st2smp = {}
-    for ss in set(stages):
-        st2smp[ss] = [sp for isp, sp in enumerate(samples) if stages[isp] == ss]
-    genelists = {}
-    weights = classifier.pca.projection[0]
-    for st, smpl in st2smp.items():
-        score_list = []
-        for s in smpl:
-            score_list.append([(abs(i[s].value * w), i['gene'].value) for i,w in zip(data, weights)])
-        if merge:
-            list_s = []
-            for i, t in enumerate(score_list[0]):
-                list_s.append((numpy.median([s[i][0] for s in score_list]), t[1]))
-            genelists[st] = compute_ssf(list_s, thr)
-        else:
-            for s, list_s in zip(smpl, score_list):
-                genelists[s] = compute_ssf(list_s, thr)
-    return genelists
-
-def ssf_table(genelists):
-    merge = not isinstance(genelists.keys()[0], Orange.feature.Descriptor)
-    dom = Orange.data.Domain([Orange.feature.String('gene')], False)
-    time = Orange.feature.String('time')
-    dom.add_meta(Orange.feature.Descriptor.new_meta_id(), time)
-    if not merge:
-        sample = Orange.feature.String('sample')
-        dom.add_meta(Orange.feature.Descriptor.new_meta_id(), sample)
-    dt = Orange.data.Table(dom)
-    for label, genes in genelists.items():
-        for gene in genes:
-            dt.append([gene])
-            if merge:
-                dt[-1][time] = str(label)
-            else:
-                dt[-1][time] = str(label.attributes['time'])
-                dt[-1][sample] = label.name
-    return dt
-
-
-def signed_PCA(data):
-    pca = Orange.projection.linear.PCA(data, standardize=False, max_components=1)
-    classifier = lambda X: [x[0].value for x in pca(X)]
-    predictions = classifier(data)
-    classes = [ex.getclass().value for ex in data]
-    n = 0
-    for i1,c1 in enumerate(classes):
-        for i2,c2 in enumerate(classes[:i1]):
-            n += cmp(c1,c2) * cmp(predictions[i1], predictions[i2])
-    if n < 0:
-        def invert(X):
-            y = classifier(X)
-            return -y if type(y) == float else [-x for x in y]
-        spca = invert
-    else:
-        spca = classifier
-    spca.pca = pca
-    return spca
-
-signed_PCA.name = 'PCA'
-
-
-def conttime(data, d):
-    for a in data.domain.attributes:
-        a.attributes['time'] = d[a.attributes['time']]
-
-def conv(attr_set, ticks=True):
-    """Obtain time points with a unique measure (the lowest among [min,h,d]) present in data"""
-    ord_measures = ["min","h","d"]
-    converter = {"min":{"h":60, "d":24*60}, "h": {"d":24}}
-    measures = list(set([t.split(" ")[1] for t in attr_set]))
-    if len(measures) == 1:
-        time_points = [(t, float(t.split(" ")[0])) for t in attr_set]
-    else:
-        first_measure = min([(ord_measures.index(m),m) for m in measures])[1]
-        time_points = [(t, float(t.split(" ")[0])) for t in attr_set if t.split(" ")[1] == first_measure]
-        time_points.extend([(t, float(t.split(" ")[0]) * converter[first_measure][t.split(" ")[1]])
-                            for t in attr_set if t.split(" ")[1] != first_measure])
-    time_points.sort(key=itemgetter(1))
-    if ticks:
-        time_points = [(t[0],float(i)) for i,t in enumerate(time_points)]
-    return dict(time_points)
-
-
-def get_projections(data1, data2=None):
-    labels1 = list(a.attributes['time'] for a in data1.domain.attributes)
-    tdata1 = obiGEO.transpose(data1)
-    if data2:
-        labels2 = list('[%s]' % a.attributes['time'] for a in data2.domain.attributes)
-        tdata2 = obiGEO.transpose(data2)
-        tdata1, tdata2 = common_domain(tdata1, tdata2)
-        classifier = signed_PCA(tdata1)
-        proj1 = classifier(tdata1)
-        proj2 = classifier(tdata2)
-    else:
-        classifier = signed_PCA(tdata1)
-        proj1 = classifier(tdata1)
-        proj2, labels2 = [], []
-    return proj1, labels1, proj2, labels2, classifier
-
-
-############
-
-if __name__ == '__main__':
-    # Data set 1
-    data1 = obiGEO.GDS('GDS2666').getdata(report_genes=True, transpose=False)
-    labels1 = list(a.attributes['time'] for a in data1.domain.attributes)
-    attr_set = list(set(a.attributes['time'] for a in data1.domain.attributes))
-    convd = conv(attr_set)
-    conttime(data1, convd)
-    # Data set 2
-    data2 = obiGEO.GDS('GDS2667').getdata(report_genes=True, transpose=False)
-    labels2 = list(a.attributes['time'] for a in data2.domain.attributes)
-    attr_set = list(set(a.attributes['time'] for a in data2.domain.attributes))
-    convd = conv(attr_set)
-    conttime(data2, convd)
-    # Normalize data set 1
-    ndata1, _ = normalize(data1, type='quantile')
-    # Filtering
-    attr_set = {}
-    for a in ndata1.domain.attributes:
-        attr_set[a.attributes['time']] = attr_set.get(a.attributes['time'], []) + [a.name]
-    scores = AREA(ndata1, sorted(attr_set.items()))
-    genes = map(itemgetter(0), sorted(scores, key=itemgetter(1), reverse=1)[:1000])
-    fdata1 = ndata1.filter(gene=genes)
-    # Rescale data set 2
-    ttrain, ttest = normalize(fdata1, data2)
-    # Model construction and prediction
-    train = obiGEO.transpose(ttrain)
-    # test = obiGEO.transpose(ttest)
-    # cdtrain, cdtest = common_domain(train, test)
-    classifier = signed_PCA(train)
-    # proj1 = classifier(cdtrain)
-    # proj2 = classifier(cdtest)
-
-    samples = fdata1.domain.features[:6]
-    genes_sel = ssf(fdata1, samples, classifier, merge=False)
-
-
-
-

File _differentiation/widgets/OWDifferentiationScale.py

-"""
-<name>Differentiation Scale</name>
-<description></description>
-<priority>100</priority>
-<icon>icons/Differentiation Scale.svg</icon>
-
-"""
-
-from __future__ import absolute_import
-
-import os
-import sys
-
-from collections import defaultdict
-from operator import itemgetter, add
-
-import numpy
-
-import Orange
-from Orange.OrangeWidgets import OWGUI
-from Orange.OrangeWidgets.OWWidget import *
-
-from Orange.bio.differentiation import difscale
-
-
-class OWDifferentiationScale(OWWidget):
-    settingsList = ["auto_commit", "merge"]
-
-    def __init__(self, parent=None, signalManager=None,
-                 title="Differentiation Scale"):
-        OWWidget.__init__(self, parent, signalManager, title,
-                          wantGraph=True)
-
-        self.inputs = [("Gene Expression Samples", Orange.data.Table,
-                        self.set_data),
-                       ("Additional Expression Samples", Orange.data.Table,
-                        self.set_additional_data)]
-
-        self.outputs = [("Selected Time Points", Orange.data.Table),
-                        ("Additional Selected Time Points", Orange.data.Table),
-                        ("Informative genes", Orange.data.Table),
-                        ("Additional informative genes", Orange.data.Table)]
-
-        self.selected_time_label = 0
-        self.auto_commit = 0
-        self.merge = 1
-
-        self.loadSettings()
-
-        self.selection_changed_flag = False
-
-        #####
-        # GUI
-        #####
-        box = OWGUI.widgetBox(self.controlArea, "Info")
-        self.info_label = OWGUI.widgetLabel(box, "No data on input")
-        self.info_label.setWordWrap(True)
-        self.info_label.setSizePolicy(QSizePolicy.Expanding,
-                                      QSizePolicy.Expanding)
-
-        OWGUI.rubber(self.controlArea)
-
-        box = OWGUI.widgetBox(self.controlArea, "Genes for selected samples")
-        cb = OWGUI.checkBox(box, self, "merge", "Merge samples by stage",
-                            tooltip="Output genes for selected stages",
-                            callback=self.commit_if)
-
-        box = OWGUI.widgetBox(self.controlArea, "Selection")
-
-        cb = OWGUI.checkBox(box, self, "auto_commit", "Commit on any change",
-                            tooltip="Send updated selections automatically",
-                            callback=self.commit_if)
-
-        b = OWGUI.button(box, self, "Commit",
-                         callback=self.commit,
-                         tooltip="Send selections on output signals")
-
-        OWGUI.setStopper(self, b, cb, "selection_changed_flag",
-                         callback=self.commit)
-
-        self.connect(self.graphButton, SIGNAL("pressed()"), self.save_graph)
-
-        self.scene = DiffScaleScene()
-        self.scene_view = DiffScaleView(self.scene, self.mainArea)
-        self.scene_view.setRenderHint(QPainter.Antialiasing)
-        self.scene_view.setMinimumWidth(300)
-
-        self.mainArea.layout().addWidget(self.scene_view)
-        self.connect(self.scene, SIGNAL("selectionChanged()"),
-                     self.on_selection_changed)
-        self.connect(self.scene_view, SIGNAL("view_resized(QSize)"),
-                     self.on_view_resized)
-
-        self.data = None
-        self.additional_data = None
-        self.projections1 = []
-        self.projections2 = []
-        self.labels1 = []
-        self.labels2 = []
-        self.pca = None
-
-        self.selected_time_samples = [], []
-
-        self.controlArea.setMaximumWidth(300)
-        self.resize(600, 480)
-
-    def clear(self):
-        """ Clear the widget state
-        """
-        self.projections1 = []
-        self.projections2 = []
-        self.labels1 = []
-        self.labels2 = []
-        self.clear_selection()
-        self.scene.clear()
-
-    def clear_selection(self):
-        """ Clear the time point selection.
-        """
-        self.selected_time_samples = [], []
-
-    def set_data(self, data=None):
-        """ Set the data for the widget.
-        """
-        self.clear()
-        self.data = data
-
-    def set_additional_data(self, data=None):
-        """ Set an additional data set.
-        """
-        self.clear()
-        self.additional_data = data
-
-    def handleNewSignals(self):
-        if self.data is not None:
-            self.run_projections()
-            self.projection_layout()
-            self.update_graph()
-
-            info_text = """\
-Data with {0} genes
-and {1} samples on input.\n""".format(
-                len(self.data),
-                len(self.data.domain.attributes)
-                )
-
-            if self.additional_data is not None:
-                info_text += """\
-Additional data with {0} genes
-and  {1} samples on input.""".format(
-                len(self.additional_data),
-                len(self.additional_data.domain.attributes)
-                )
-
-            self.info_label.setText(info_text)
-        else:
-            self.send("Selected Time Points", None)
-            self.send("Additional Selected Time Points", None)
-            self.send("Informative genes", None)
-            self.send("Additional informative genes", None)
-            self.info_label.setText("No data on input\n")
-
-    def run_projections(self):
-        """ Run difscale.get_projections with the current inputs.
-        """
-        self.error()
-
-        try:
-            (self.projections1, self.labels1,
-             self.projections2, self.labels2, self.pca) = \
-                difscale.get_projections(self.data, data2=self.additional_data)
-        except Exception, ex:
-            self.error("Failed to obtain the projections due to: %r" % ex)
-            self.clear()
-            return
-
-    def projection_layout(self):
-        """ Compute the layout for the projections.
-        """
-        if self.projections1:
-            projections = self.projections1 + self.projections2
-            projections = numpy.array(projections)
-
-            x_min = numpy.min(projections)
-            x_max = numpy.max(projections)
-
-            # Scale projections
-            projections = (projections - x_min) / ((x_max - x_min) or 1.0)
-            projections = list(projections)
-
-            labels = self.labels1 + self.labels2
-
-            samples = [(attr, self.data) \
-                       for attr in self.data.domain.attributes]
-            if self.additional_data is not None:
-                samples += [(attr, self.additional_data) \
-                            for attr in self.additional_data.domain.attributes]
-
-            # TODO: handle samples with the same projection
-            # the point_layout should return the proj to sample mapping instead
-            proj_to_sample = dict([((label, proj), sample) \
-                                   for label, proj, sample \
-                                   in zip(labels, projections, samples)]
-                                  )
-            self.proj_to_sample = proj_to_sample
-
-            time_points = point_layout(labels, projections)
-            self.time_points = time_points
-
-            all_points = numpy.array(reduce(add, [p for _, p in time_points], []))
-            self.all_points = all_points
-
-            # samples for time label (same order as in self.time_points)
-            self.time_samples = []
-
-            point_i = 0
-            for label, points, in time_points:
-                samples = []
-                for x, y in points:
-                    samples.append(proj_to_sample.get((label, x), None))
-                self.time_samples.append((label, samples))
-
-    def update_graph(self):
-        """ Populate the Graphics Scene with the current projections.
-        """
-        scene_size_hint = self.scene_view.viewport().size()
-        scene_size_hint = QSizeF(max(scene_size_hint.width() - 50, 100),
-                                 scene_size_hint.height())
-        self.scene.clear()
-
-        if self.projections1:
-            level_height = 20
-            all_points = self.all_points.copy()
-            all_points[:, 0] *= scene_size_hint.width()
-            all_points[:, 1] *= -level_height
-
-            point_i = 0
-            centers = []
-            z_value = 0
-            for label, samples in self.time_samples:
-                # Points
-                p1 = all_points[point_i]
-                points = all_points[point_i: point_i + len(samples), :]
-                for (x, y), sample in zip(points, samples):
-                    item = GraphicsTimePoint(QRectF(QPointF(x-3, y-3), QSizeF(6, 6)))
-                    item.setBrush(QBrush(Qt.black))
-                    item.sample = sample
-                    item.setToolTip(sample[0].name if sample else "")
-                    item.setZValue(z_value)
-                    self.scene.addItem(item)
-                    point_i += 1
-                p2 = all_points[point_i - 1]
-
-                # Line over all points
-                line = QGraphicsLineItem(QLineF(*(tuple(p1) + tuple(p2))))
-                line.setPen(QPen(Qt.black, 2))
-                line.setZValue(z_value - 1)
-                self.scene.addItem(line)
-
-                # Time label on top of the median
-                n_points = len(points)
-                if n_points % 2:
-                    center = points[n_points / 2]
-                else:
-                    center = (points[n_points / 2] + points[n_points / 2 + 1]) / 2.0
-                centers.append(center)
-                x, y = center
-                text = QGraphicsSimpleTextItem(label)
-                w = text.boundingRect().width()
-                text.setPos(x - w / 2.0, y - 17.5)
-                self.scene.addItem(text)
-
-            self.scene.addLine(QLineF(0.0, 0.0, scene_size_hint.width(), 0.0))
-
-            polygon = QPolygonF([QPointF(3.0, 0.0),
-                                 QPointF(-2.0, -2.0),
-                                 QPointF(0.0, 0.0),
-                                 QPointF(-2.0, 2.0),
-                                 QPointF(3.0, 0.0)])
-
-            arrow = QGraphicsPolygonItem(polygon)
-            arrow.setBrush(QBrush(Qt.black))
-            arrow.setPos(scene_size_hint.width(), 0.0)
-            arrow.scale(2, 2)
-            self.scene.addItem(arrow)
-
-            title = QGraphicsSimpleTextItem("Development (time)")
-            font = self.font()
-            font.setPointSize(10)
-            title.setFont(font)
-            w = title.boundingRect().width()
-            title.setPos(scene_size_hint.width() - w, -15)
-            self.scene.addItem(title)
-
-            rects = []
-            ticks = []
-            axis_label_items = []
-            labels = [(center, label) for center, (label, _) in \
-                      zip(centers, self.time_samples)]
-            labels = sorted(labels, key=lambda (c, l): c[0])
-            for center, label in labels:
-                x, y = center
-                item = QGraphicsSimpleTextItem(label)
-                w = item.boundingRect().width()
-                item.setPos(x - w / 2.0, 4.0)
-                rects.append(item.sceneBoundingRect().normalized())
-                ticks.append(QPointF(x - w / 2.0, 4.0))
-                axis_label_items.append(item)
-
-            rects = greedy_scale_label_layout(ticks, rects, spacing=5)
-
-            for (tick, label), rect, item in zip(labels, rects,
-                                                 axis_label_items):
-                x, y = tick
-                self.scene.addLine(x, -2, x, 2)
-                if rect.top() - item.pos().y() > 5:
-                    self.scene.addLine(x, 2, rect.center().x(), 14.0)
-                if rect.top() - item.pos().y() > 15:
-                    self.scene.addLine(rect.center().x(), 14.0,
-                                       rect.center().x(), rect.top())
-
-                item.setPos(rect.topLeft())
-                self.scene.addItem(item)
-
-            rect = self.scene.itemsBoundingRect()
-            self.scene.setSceneRect(rect.adjusted(-10, -10, 10, 10))
-
-    def on_view_resized(self, *args):
-        self.update_graph()
-
-    def on_selection_changed(self):
-        try:
-            selected = self.scene.selectedItems()
-        except RuntimeError:
-            return
-
-        selected_attrs1 = []
-        selected_attrs2  =[]
-        for point in selected:
-            attr, data = point.sample if point.sample else (None, None)
-            if data is self.data:
-                selected_attrs1.append(attr)
-            elif data is self.additional_data:
-                selected_attrs2.append(attr)
-
-        self.selected_time_samples = selected_attrs1, selected_attrs2
-        self.commit_if()
-
-    def commit_if(self):
-        if self.auto_commit:
-            self.commit()
-        else:
-            self.selection_changed_flag = True
-
-    def commit(self):
-        if self.data is not None:
-            selected1, selected2 = self.selected_time_samples
-            attrs1 = [a for a in self.data.domain.attributes \
-                      if a in selected1]
-            domain = Orange.data.Domain(attrs1, self.data.domain.class_var)
-            domain.add_metas(self.data.domain.get_metas())
-            data = Orange.data.Table(domain, self.data)
-            self.send("Selected Time Points", data)
-            if attrs1:
-                genelists = difscale.ssf(self.data, attrs1, self.pca, merge=self.merge)
-                genes_table = difscale.ssf_table(genelists)
-                self.send("Informative genes", genes_table)
-            else:
-                self.send("Informative genes", None)
-
-            if self.additional_data is not None:
-                attrs2 = [a for a in self.additional_data.domain.attributes \
-                          if a in selected2]
-                domain = Orange.data.Domain(attrs2, self.additional_data.domain.class_var)
-                domain.add_metas(self.additional_data.domain.get_metas())
-                data = Orange.data.Table(domain, self.additional_data)
-                self.send("Additional Selected Time Points", data)
-                if attrs2:
-                    genelists = difscale.ssf(self.additional_data, attrs2, self.pca, merge=self.merge)
-                    genes_table = difscale.ssf_table(genelists)
-                    self.send("Additional informative genes", genes_table)
-                else:
-                    self.send("Additional informative genes", None)
-        else:
-            self.send("Selected Time Points", None)
-            self.send("Additional Selected Time Points", None)
-            self.send("Informative genes", None)
-            self.send("Additional informative genes", None)
-        self.selection_changed_flag = False
-
-    def save_graph(self):
-        from Orange.OrangeWidgets.OWDlgs import OWChooseImageSizeDlg
-        dlg = OWChooseImageSizeDlg(self.scene, parent=self)
-        dlg.exec_()
-
-
-class GraphicsTimePoint(QGraphicsEllipseItem):
-    def __init__(self, *args):
-        QGraphicsEllipseItem.__init__(self, *args)
-        self.setFlags(QGraphicsItem.ItemIsSelectable)
-        self.setAcceptsHoverEvents(True)
-        self._is_hovering = False
-
-    def paint(self, painter, option, widget=0):
-        if self.isSelected():
-            brush = QBrush(Qt.red)
-            pen = QPen(Qt.red, 1)
-        else:
-            brush = QBrush(Qt.darkGray)
-            pen = QPen(Qt.black, 1)
-        if self._is_hovering:
-            brush = QBrush(brush.color().darker(200))
-        painter.save()
-        painter.setBrush(brush)
-        painter.setPen(pen)
-        painter.drawEllipse(self.rect())
-        painter.restore()
-
-    def hoverEnterEvent(self, event):
-        self._is_hovering = True
-        self.update()
-        return QGraphicsEllipseItem.hoverEnterEvent(self, event)
-
-    def hoverLeaveEvent(self, event):
-        self._is_hovering = False
-        self.update()
-        return QGraphicsEllipseItem.hoverLeaveEvent(self, event)
-
-
-class DiffScaleView(QGraphicsView):
-    def resizeEvent(self, event):
-        QGraphicsView.resizeEvent(self, event)
-        self.emit(SIGNAL("view_resized(QSize)"), event.size())
-
-
-class DiffScaleScene(QGraphicsScene):
-    def __init__(self, *args):
-        QGraphicsScene.__init__(self, *args)
-        self._selection_rect = None
-
-    def mousePressEvent(self, event):
-        item = self.itemAt(event.scenePos())
-        if not item and event.modifiers() & Qt.ControlModifier:
-            # Default implementation clears selection on ctrl press on
-            # an empty spot.
-            return
-        else:
-            # Let the default implementation handle it
-            return QGraphicsScene.mousePressEvent(self, event)
-
-    def mouseMoveEvent(self, event):
-        if event.buttons() & Qt.LeftButton:
-            if not self._selection_rect:
-                self._selection_rect = QGraphicsRectItem(scene=self)
-                self._selection_rect.setPen(
-                    QPen(QBrush(QColor(51, 153, 255, 192)),
-                         0.4, Qt.SolidLine, Qt.RoundCap)
-                )
-                self._selection_rect.setBrush(
-                    QBrush(QColor(168, 202, 236, 192))
-                )
-                self._selection_rect.setZValue(-100)
-
-            down = event.buttonDownScenePos(Qt.LeftButton)
-            rect = QRectF(down, event.scenePos()).normalized()
-            self._selection_rect.setRect(rect)
-            self._selection_rect.show()
-            if not event.modifiers() & Qt.ControlModifier:
-                self.clearSelection()
-
-            items = self.items(self._selection_rect.rect(),
-                               Qt.IntersectsItemShape,
-                               Qt.AscendingOrder)
-
-            for item in items:
-                if isinstance(item, GraphicsTimePoint) and \
-                        item.flags() & GraphicsTimePoint.ItemIsSelectable:
-                    item.setSelected(True)
-
-        return QGraphicsScene.mouseMoveEvent(self, event)
-
-    def mouseReleaseEvent(self, event):
-        if self._selection_rect:
-            self._selection_rect.hide()
-            self._selection_rect.setRect(QRectF())
-            self.removeItem(self._selection_rect)
-            self._selection_rect = None
-        return QGraphicsScene.mouseReleaseEvent(self, event)
-
-
-def point_layout(labels, points, label_size_hints=None):
-    groups = defaultdict(list)
-    for label, point in zip(labels, points):
-        groups[label].append(point)
-
-    for label, points in list(groups.items()):
-        points = sorted(points)
-        # TODO: Use label_size_hints for min, max
-        groups[label] = (points, (points[0], points[-1]))
-
-    sorted_groups = sorted(groups.items(), key=itemgetter(1), reverse=True)
-    levels = {}
-    curr_level = 1
-    label_levels = {}
-    while sorted_groups:
-        label, (points, (x_min, x_max)) = sorted_groups.pop(-1)
-        max_level_pos = levels.get(curr_level, x_min)
-        if x_min < max_level_pos:
-            curr_level += 1
-            sorted_groups.append((label, (points, (x_min, x_max))))
-        else:
-            label_levels[label] = curr_level
-            levels[curr_level] = x_max
-            curr_level = 1
-
-    for label, (points, _) in list(groups.items()):
-        level = float(label_levels[label])
-        groups[label] = [(x, level) for x in points]
-
-    return list(groups.items())
-
-
-def greedy_scale_label_layout(ticks, rects, spacing=3):
-    """ Layout the labels at ticks on a linear scale, by raising the
-    overlapping labels.
-
-    """
-    def adjust_interval(start, end, min_v, max_v):
-        """ Adjust (start, end) interval to fit inside the (min_v, max_v).
-        """
-        if start < min_v:
-            return (min_v, min_v + (end - start))
-        elif max_v > end:
-            return (max_v - (end - start), max_v)
-        else:
-            return (start, end)
-
-    def center_interval(start, end, center):
-        """ Center the interval on `center`
-        """
-        span = end - start
-        return centered(center, span)
-
-    def centered(center, span):
-        """ Return an centered interval with span.
-        """
-        return (center - span / 2.0, center + span / 2.0)
-
-    def contains((start, end), (start1, end1)):
-        return start <= start1  and end >= end1
-
-    def fit(work, ticks, min_x, max_x):
-        """ Fit the work set between min_x and max_x  and centered on the
-        ticks, if possible.
-        """
-        fits = False
-        work_set = map(QRectF, work)
-        tick_center = sum([r.center().x() for r in work_set]) / len(work_set)
-        if len(work_set) == 1:
-            if work_set[0].left() >= min_x and work_set[0].right() <= max_x:
-                return work_set
-            else:
-                return []
-
-        elif len(work_set) == 2: # TODO: MErge this with the > 2
-            w_sum = sum([r.width() for r in work_set]) + spacing
-            if w_sum < max_x - min_x:
-                r1, r2 = work_set
-                interval = centered(tick_center, w_sum)
-
-                if not contains((min_x, max_x), interval):
-                    interval = adjust_interval(*(interval + (min_x, max_x)))
-
-                if contains((min_x, max_x), interval):
-                    r1.moveLeft(interval[0])
-                    r2.moveLeft(interval[1] - r2.width())
-                    r1.moveTop(r1.top() + 10)
-                    r2.moveTop(r2.top() + 10)
-                    return work_set
-                else:
-                    return []
-            else:
-                return []
-
-        elif len(work_set) > 2:
-            center = (work_set[0].center().x() + work_set[-1].center().x()) / 2.0
-            w_sum = work_set[0].width() / 2.0 + work_set[-1].width() / 2.0 + spacing
-            for i, r in enumerate(work_set[1:-1]):
-                w_sum += r.width() + spacing
-            interval = centered(center, w_sum)
-
-            if not contains((min_x, max_x), interval):
-                interval = adjust_interval(*(interval + (min_x, max_x)))
-
-            if contains((min_x, max_x), interval):
-                istart, iend = interval
-                rstart, rend = work_set[0], work_set[-1]
-                rstart.moveLeft(istart)
-                rstart.moveTop(rstart.top() + 10)
-                rend.moveLeft(iend - rend.width())
-                rend.moveTop(rend.top() + 10)
-                istart += rstart.width() / 2.0
-                iend -= rend.width() / 2.0
-                for r in work_set[1: -1]:
-                    r.moveLeft(istart)
-                    r.moveTop(r.top() + 20)
-                    istart += r.width() + spacing
-                return work_set
-            else:
-                return []
-
-    queue = sorted(zip(ticks, rects),
-                   key=lambda (t, _): t.x(),
-                   reverse=True)
-    done = False
-    rects = []
-
-    min_x = -1e30
-    max_x = 1e30
-
-    while queue:
-        work_set = [queue.pop(-1)]
-        set_fits = False
-        max_x = queue[-1][1].left() if queue else 1e30
-        while not set_fits:
-            new_rects = fit(map(itemgetter(1), work_set),
-                            map(itemgetter(0), work_set),
-                            min_x, max_x)
-            if new_rects: # Can the work set be fit.
-                set_fits = True
-                rects.extend(new_rects)
-                min_x = work_set[-1][1].right()
-
-            else:
-                # Extend the work set with one more label rect
-                work_set.append(queue.pop(-1))
-                max_x = queue[-1][1].left() if queue else 1e30
-    return rects
-
-
-if __name__ == "__main__":
-    app = QApplication(sys.argv)
-    w = OWDifferentiationScale()
-    data = Orange.data.Table(os.path.expanduser("~/Documents/GDS2666n"))
-    w.show()
-    w.set_data(data)
-    w.handleNewSignals()
-    app.exec_()
-    w.saveSettings()

File _differentiation/widgets/OWFeatureSelection.py

-"""
-<name>Gene Selection</name>
-<description>Gene differential expression scoring and selection.</description>
-<priority>50</priority>
-<icon>icons/Gene Selection.svg</icon>
-"""
-
-from __future__ import absolute_import, with_statement
-
-from collections import defaultdict
-from functools import wraps
-from operator import add
-
-import numpy as np
-import numpy.ma as ma
-
-import Orange
-
-from Orange.OrangeWidgets import OWGUI
-from Orange.OrangeWidgets.OWGraph import *
-from Orange.OrangeWidgets.OWHist import OWInteractiveHist
-from Orange.OrangeWidgets.OWWidget import *
-
-from Orange.bio.obiExpression import *
-
-from Orange.bio.widgets.OWGenotypeDistances import SetContextHandler
-
-from Orange.bio.differentiation.difscale import \
-        ExpressionSignificance_AREA, ExpressionSignificance_FCts, normalize
-
-
-class ScoreHist(OWInteractiveHist):
-    def __init__(self, master, parent=None, type="hiTail"):
-        OWInteractiveHist.__init__(self, parent, type=type)
-        self.master = master
-        self.setAxisTitle(QwtPlot.xBottom, "Score")
-        self.setAxisTitle(QwtPlot.yLeft, "Frequency")
-        self.activateSelection()
-
-    def setBoundary(self, low, hi):
-        OWInteractiveHist.setBoundary(self, low, hi)
-        self.master.update_selected_info_label(low, hi)
-        self.master.commit_if()
-
-
-def disable_controls(method):
-    """Disable the widget's control area during the duration of this call.
-    """
-    @wraps(method)
-    def f(self, *args, **kwargs):
-        self.controlArea.setDisabled(True)
-        qApp.processEvents()
-        try:
-            return method(self, *args, **kwargs)
-        finally:
-            self.controlArea.setDisabled(False)
-    return f
-
-
-def one_tail_test_high(array, low, hi):
-    return array >= hi
-
-
-class OWFeatureSelection(OWWidget):
-    settingsList = ["normalization_method_index", "scoring_method_index",
-                    "compute_null", "permutations_count", "select_p_value",
-                    "n_best", "auto_commit"]
-
-    contextHandlers = {"": SetContextHandler("", ["label_index"])}
-
-    NORMALIZATION_METHODS = ["Quantile", "Median"]
-
-    SCORING_METHODS = \
-        [("AREA (timeseries)", ExpressionSignificance_AREA,
-          one_tail_test_high, False),
-         ("FC (timeseries)", ExpressionSignificance_FCts,
-          one_tail_test_high, False)
-         ]
-
-    def __init__(self, parent=None, signalManager=None,
-                 name="Differentiation Preprocessing"):
-        OWWidget.__init__(self, parent, signalManager, name, wantGraph=True,
-                          showSaveGraph=True)
-
-        self.inputs = [("Data Set 1", Orange.data.Table, self.set_data_1),
-                       ("Data Set 2", Orange.data.Table, self.set_data_2)]
-
-        self.outputs = [("Data Set 1", Orange.data.Table),
-                        ("Data Set 2", Orange.data.Table)]
-
-        self.normalization_method_index = 0
-        self.scoring_method_index = 0
-        self.label_index = 0
-        self.compute_null = False
-        self.permutations_count = 10
-        self.auto_commit = False
-        self.n_best = 20
-        self.select_p_value = 0.01
-        self.add_scores_to_output = True
-        self.data_changed_flag = False
-
-        self.loadSettings()
-
-        #####
-        # GUI
-        #####
-        box_histogram = OWGUI.widgetBox(self.mainArea)
-        self.histogram = ScoreHist(self, box_histogram)
-        self.histogram.type = "hiTail"
-        box_histogram.layout().addWidget(self.histogram)
-        self.histogram.show()
-
-        box = OWGUI.widgetBox(self.controlArea, "Info")
-        self.data_info_label = OWGUI.widgetLabel(box, "\n\n")
-        self.data_info_label.setWordWrap(True)
-        self.selectedInfoLabel = OWGUI.widgetLabel(box, "")
-
-        box = OWGUI.widgetBox(self.controlArea, "Normalization")
-        OWGUI.comboBox(box, self, "normalization_method_index",
-                       items=self.NORMALIZATION_METHODS,
-                       callback=[self.on_normalization_method_changed],
-                       tooltip="Select Normalization Method")
-
-        box = OWGUI.widgetBox(self.controlArea, "Scoring Method")
-        OWGUI.comboBox(box, self, "scoring_method_index",
-                       items=[sm[0] for sm in self.SCORING_METHODS],
-                       callback=[self.update_scores]
-                       )
-
-        box = OWGUI.widgetBox(self.controlArea, "Target Labels")
-        self.labels_cb = OWGUI.comboBox(box, self, "label_index",
-                                        callback=self.on_target_changed,
-                                        tooltip="Select target time label")
-
-        box = OWGUI.widgetBox(self.controlArea, "Selection")
-        box.layout().setSpacing(0)
-
-        self.upper_boundary_spin = \
-                OWGUI.doubleSpin(box, self, "histogram.upperBoundary",
-                                 min=-1e6, max=1e6, step=1e-6,
-                                 label="Upper threshold:",
-                                 labelWidth=120,
-                                 callback=self.update_boundary,
-                                 callbackOnReturn=True)
-
-        check = OWGUI.checkBox(box, self, "compute_null",
-                               "Compute null distribution",
-                               callback=self.update_scores)
-
-        perm_count_spin = \
-                OWGUI.spin(box, self, "permutations_count",
-                           min=1, max=10, step=1,
-                           label="Permutations:",
-                           labelWidth=120,
-                           callback=self.update_scores,
-                           callbackOnReturn=True)
-
-        check.disables.append(perm_count_spin)
-
-        box1 = OWGUI.widgetBox(box, orientation="horizontal")
-
-        pvalue_spin = OWGUI.doubleSpin(box1, self, "select_p_value",
-                                       min=2e-7, max=1.0, step=1e-7,
-                                       label="P-value:")
-
-        check.disables.append(pvalue_spin)
-
-        p_value_button = OWGUI.button(box1, self, "Select",
-                                      callback=self.select_p_best)
-        check.disables.append(p_value_button)
-        check.makeConsistent()
-
-        box1 = OWGUI.widgetBox(box, orientation='horizontal')
-        OWGUI.spin(box1, self, "n_best", 0, 10000, step=1,
-                   label="Best Ranked:")
-
-        OWGUI.button(box1, self, "Select", callback=self.select_n_best)
-
-        box = OWGUI.widgetBox(self.controlArea, "Output")
-        b = OWGUI.button(box, self, "&Commit", callback=self.commit)
-        cb = OWGUI.checkBox(box, self, "auto_commit", "Commit on change")
-
-        OWGUI.setStopper(self, b, cb, "data_changed_flag", self.commit)
-        OWGUI.checkBox(box, self, "add_scores_to_output",
-                       "Add gene scores to output",
-                       callback=self.commit_if)
-
-        OWGUI.rubber(self.controlArea)
-
-        self.connect(self.graphButton, SIGNAL("clicked()"),
-                     self.histogram.saveToFile)
-
-        self.data_1 = self.data_2 = None
-        self.normalized_data_1 = self.normalized_data_2 = None
-        self.normalized_data = None
-
-        self.have_new_data_1 = False
-        self.have_new_data_2 = False
-        self.attribute_targets = []
-        self.attribute_labels = []
-
-        self.cuts = {}
-        self.null_dist = []
-        self.targets = []
-        self.scores = {}
-
-        self.resize(800, 600)
-
-    def clear(self):
-        """Clear widget state.
-        """
-        self.data_1 = self.data_2 = None
-        self.normalized_data_1 = self.normalized_data_2 = None
-        self.normalized_data = None
-        self.have_new_data_1 = self.have_new_data_2 = False
-
-        self.attribute_targets = []
-        self.attribute_labels = []
-        self.clear_plot()
-
-    def clear_plot(self):
-        """Clear the histogram plot.
-        """
-        self.histogram.removeDrawingCurves()
-        self.histogram.clear()
-        self.histogram.replot()
-
-    def update_labels(self, data):
-        """Update possible target labels from `data`.
-        """
-        labels = []
-        targets = []
-        if data is not None:
-            items = [attr.attributes.items() \
-                     for attr in data.domain.attributes]
-            items = reduce(add, items, [])
-
-            targets = defaultdict(set)
-            for label, value in items:
-                targets[label].add(value)
-
-            targets = [(key, sorted(vals)) for key, vals in targets.items() \
-                       if len(vals) >= 2]
-            labels = [t[0] for t in targets]
-
-        self.attribute_targets = targets
-        self.attribute_labels = labels
-
-        self.labels_cb.clear()
-        self.labels_cb.addItems(labels)
-
-    def set_data_1(self, data=None):
-        """Set input 'Data Set 1'
-        """
-        self.clear()
-        self.data_1 = data
-        self.have_new_data_1 = True
-
-        self.error([0, 1])
-        self.warning(0)
-
-    def set_data_2(self, data=None):
-        """Set input 'Data Set 2'
-        """
-        self.data_2 = data
-        self.have_new_data_2 = True
-
-    def handleNewSignals(self):
-        if self.data_1 is not None:
-            self.run_normalization(self.data_1, self.data_2)
-            self.set_normalized_data(self.normalized_data_1)
-            self.update_scores()
-
-            self.commit_if()
-        else:
-            self.send("Data Set 1", None)
-            self.send("Data Set 2", None)
-
-        self.have_new_data_1 = self.have_new_data_2 = False
-
-    def run_normalization(self, data_1, data_2=None):
-        method = self.NORMALIZATION_METHODS[self.normalization_method_index]
-        if data_1 is not None:
-            norm_1, norm_2 = normalize(data_1, data_2, type=method.lower())
-            self.normalized_data_1 = norm_1
-            self.normalized_data_2 = norm_2
-        else:
-            self.normalized_data_1 = None
-            self.normalized_data_2 = None
-
-    def set_normalized_data(self, data):
-        """Set the normalized data for scoring and selection.
-        """
-        self.closeContext("")
-        self.error([1])
-        self.normalized_data = data
-        self.update_labels(data)
-
-        if data is not None and not self.attribute_labels:
-            # If attr. labels are missing, show an error
-            self.error(1, "Cannot compute gene scores! Requires a data-set "
-                          "with attribute labels!")
-
-        if data is not None:
-            # Load context selection
-            items = [(label, v) \
-                     for label, values in self.attribute_targets \
-                     for v in values]
-
-            self.openContext("", set(items))  # Load selections from context
-            self.label_index = min(self.label_index,
-                                   len(self.attribute_labels) - 1)
-
-    def set_label(self, label):
-        """Set the target label for score computation.
-        """
-        self.target_label = label
-        self.update_scores()
-
-    def compute_scores(self, data, score_func, use_attribute_labels,
-                       target=None, advance=lambda: None):
-        score_func = score_func(data, use_attribute_labels)
-        advance()
-        score = score_func(target=target)
-        score = [(key, val) for key, val in score if val is not ma.masked]
-        return score
-
-    def compute_null_distribution(self, data, score_func, use_attributes,
-                                  target=None, perm_count=10,
-                                  advance=lambda: None):
-        score_func = score_func(data, use_attributes)
-        dist = score_func.null_distribution(perm_count, target,
-                                            advance=advance)
-        return [score for run in dist for _, score in run \
-                if score is not ma.masked]
-
-    @disable_controls
-    def update_scores(self):
-        """Compute the scores and update the histogram.
-        """
-        self.clear_plot()
-        self.error(0)
-
-        target_label = None
-        if self.label_index < len(self.attribute_labels):
-            target_label = self.attribute_labels[self.label_index]
-
-        if not self.normalized_data or target_label is None:
-            return
-
-        _, score_func, _, _ = \
-                self.SCORING_METHODS[self.scoring_method_index]
-
-        pb_tick_count = 4 + self.permutations_count if self.compute_null else 3
-        pb = OWGUI.ProgressBar(self, pb_tick_count)
-
-        scores = self.compute_scores(self.normalized_data, score_func,
-                                     True,
-                                     target_label,
-                                     advance=pb.advance)
-        self.scores = dict(scores)
-
-        pb.advance()
-        if self.compute_null:
-            self.null_dist = \
-                self.compute_null_distribution(
-                        self.normalized_data, score_func, True,
-                        target_label, self.permutations_count,
-                        advance=pb.advance)
-        else:
-            self.null_dist = []
-
-        pb.advance()
-
-        if self.scores:
-            self.histogram.setValues(self.scores.values())
-            minx, maxx = self.histogram.minx, self.histogram.maxx
-            self.histogram.setBoundary(maxx, maxx)
-
-            if self.compute_null and self.null_dist:
-                nullY, nullX = numpy.histogram(self.null_dist,
-                                               bins=self.histogram.xData)
-                # Normalize null histogram so it has the same surface as
-                # scores histogram
-                nullY /= self.permutations_count
-                self.histogram.nullCurve = self.histogram.addCurve(
-                        "nullCurve", Qt.black, Qt.black, 6,
-                        symbol=QwtSymbol.NoSymbol,
-                        style=QwtPlotCurve.Steps,
-                        xData=nullX,
-                        yData=nullY
-                        )
-
-                minx = min(min(nullX), self.histogram.minx)
-                maxx = max(max(nullX), self.histogram.maxx)
-                miny = min(min(nullY), self.histogram.miny)
-                maxy = max(max(nullY), self.histogram.maxy)
-
-                self.histogram.setAxisScale(QwtPlot.xBottom,
-                                            minx - (0.05 * (maxx - minx)),
-                                            maxx + (0.05 * (maxx - minx)))
-
-                self.histogram.setAxisScale(QwtPlot.yLeft,
-                                            miny - (0.05 * (maxy - miny)),
-                                            maxy + (0.05 * (maxy - miny)))
-
-            self.warning(0)
-        else:
-            self.warning(0, "No scores obtained.")
-
-        self.histogram.replot()
-        pb.advance()
-        pb.finish()
-        self.update_data_info_label()
-
-    def update_data_info_label(self):
-        if self.normalized_data is not None:
-            data = self.normalized_data
-            genes = len(data)
-            samples = len(data.domain.attributes)
-            target_labels = [t[1] for t in self.targets]
-
-            text = "%i samples, %i genes\n" % (samples, genes)
-            text += "Sample target: '%s'" % (",".join(target_labels))
-        else:
-            text = "No data on input\n"
-        self.data_info_label.setText(text)
-
-    def update_selected_info_label(self, cutOffLower=0, cutOffUpper=0):
-        self.cuts[self.scoring_method_index] = (cutOffLower, cutOffUpper)
-        if self.normalized_data:
-            scores = np.array(self.scores.values())
-            test = self.SCORING_METHODS[self.scoring_method_index][2]
-            (positive, ) = np.nonzero(test(scores, cutOffLower, cutOffUpper))
-            self.selectedInfoLabel.setText("%i selected genes" % len(positive))
-        else:
-            self.selectedInfoLabel.setText("0 selected genes")
-
-    def select_n_best(self):
-        """Select N best ranked genes.
-        """
-        scores = sorted(self.scores.values())
-        scores = scores[-self.n_best:]
-        if not scores:
-            return
-
-        boundary = scores[0]
-        self.histogram.setBoundary(boundary, boundary)
-
-    def update_boundary(self):
-        if self.normalized_data is not None and self.scores:
-            self.histogram.setBoundary(self.histogram.upperBoundary,
-                                       self.histogram.upperBoundary)
-
-    def select_p_best(self):
-        if not self.null_dist:
-            return
-
-        null_dist = sorted(self.null_dist)
-        count = int(len(null_dist) * self.select_p_value)
-        count = min(count, len(null_dist))
-        if True:
-            cut = null_dist[-count] if count else null_dist[-1]
-            self.histogram.setBoundary(cut, cut)
-
-    def commit_if(self):
-        if self.auto_commit:
-            self.commit()
-        else:
-            self.data_changed_flag = True
-
-    def commit(self):
-        if not self.normalized_data or not self.scores:
-            return
-
-        test = one_tail_test_high
-
-        cut_off = self.histogram.upperBoundary
-        cut_off_lower = self.histogram.lowerBoundary
-
-        scores = np.array(self.scores.items())
-        scores[:, 1] = test(np.array(scores[:, 1], dtype=float),
-                            cut_off_lower, cut_off)
-
-        # selected and remaining are sets of indices into data.
-        selected = set([int(key) for key, test in scores if test])
-
-        newdata_1 = None
-        newdata_2 = None
-
-        if self.normalized_data and selected:
-            selected = sorted(selected)
-            newdata_1 = select_instances(self.normalized_data, selected)
-
-            # If second data set is available we need to 'renormalize' it
-            # since the global median has changed in the selected gene subset.
-            if self.normalized_data_2 is not None:
-                _, newdata_2 = normalize(
-                            newdata_1, self.normalized_data_2, type="median")
-
-            # Add scores to Data Set 1
-            if self.add_scores_to_output:
-                name = self.SCORING_METHODS[self.scoring_method_index][0]
-                score_attr = Orange.feature.Continuous(name)
-                mid = orange.newmetaid()
-
-                newdata_1.domain.addmeta(mid, score_attr)
-                for ex, key in zip(newdata_1, selected):
-                    ex[mid] = self.scores[key]
-
-        self.send("Data Set 1", newdata_1)
-        self.send("Data Set 2", newdata_2)
-
-        self.data_changed_flag = False
-
-    def on_normalization_method_changed(self):
-        self.run_normalization(self.data_1, self.data_2)
-        if self.normalized_data_1:
-            self.set_normalized_data(self.normalized_data_1)
-            self.update_scores()
-
-    def on_target_changed(self):
-        """User changed the selected label
-        """
-        label = self.attribute_labels[self.label_index]
-        self.set_label(label)
-
-    def settingsFromWidgetCallbackTargetSelection(self, handler, context):
-        context.label_index = self.label_index
-
-    def settingsToWidgetCallbackTargetSelection(self, handler, context):
-        self.label_index = getattr(context, "label_index", self.label_index)
-
-
-def select_instances(table, indices):
-    selected = [table[i] for i in indices]
-    return Orange.data.Table(Orange.data.Domain(table.domain), selected)
-
-
-if __name__ == "__main__":
-    import sys
-    app = QApplication(sys.argv)
-    w = OWFeatureSelection()
-    w.show()
-    data_1 = Orange.data.Table(os.path.expanduser("~/Documents/GDS2666.tab"))
-    data_2 = Orange.data.Table(os.path.expanduser("~/Documents/GDS2667.tab"))
-    w.set_data_1(data_1)
-    w.set_data_2(data_2)
-#    w.set_data(None)
-#    w.set_data(data)
-    w.handleNewSignals()
-    app.exec_()
-    w.saveSettings()

File _differentiation/widgets/OWGEODatasets.py

-"""
-<name>GEO Time Series Data Sets</name>
-<description>Access to Gene Expression Omnibus time series data sets.</description>
-<priority>20</priority>
-<contact>Ales Erjavec (ales.erjavec(@at@)fri.uni-lj.si)</contact>
-<icon>icons/GEO Time Data.svg</icon>
-"""
-
-from __future__ import absolute_import, with_statement
-
-import os
-import sys
-import glob
-
-from collections import defaultdict
-from functools import partial
-
-from Orange.orng import orngServerFiles
-from Orange.orng.orngDataCaching import data_hints
-from Orange.OrangeWidgets import OWGUI, OWGUIEx
-from Orange.OrangeWidgets.OWWidget import *
-
-from Orange.bio import obiGEO
-
-LOCAL_GDS_COLOR = Qt.darkGreen
-
-TextFilterRole = OWGUI.OrangeUserRole.next()
-
-class TreeModel(QAbstractItemModel):
-    def __init__(self, data, header, parent):
-        QAbstractItemModel.__init__(self, parent)
-        self._data = [[QVariant(s) for s in row] for row in data]
-        self._dataDict = {}
-        self._header = {Qt.Horizontal: dict([(i, {Qt.DisplayRole: h}) for i, h in enumerate(header)])}
-        self._roleData = {Qt.DisplayRole:self._data}
-        dataStore = partial(defaultdict, partial(defaultdict, partial(defaultdict, QVariant)))
-        self._roleData = dataStore(self._roleData)
-        self._header = dataStore(self._header)
-    
-    def setColumnLinks(self, column, links):
-        font =QFont()
-        font.setUnderline(True)
-        font = QVariant(font)
-        for i, link in enumerate(links):
-            self._roleData[LinkRole][i][column] = QVariant(link)
-            self._roleData[Qt.FontRole][i][column] = font
-            self._roleData[Qt.ForegroundRole][i][column] = QVariant(QColor(Qt.blue))
-    
-    def setRoleData(self, role, row, col, data):
-        self._roleData[role][row][col] = data
-        
-    def setData(self, index, value, role=Qt.EditRole):
-        self._roleData[role][index.row()][index.column()] = value
-        self.emit(SIGNAL("dataChanged(QModelIndex, QModelIndex)"), index, index)
-        
-    def data(self, index, role):
-        row, col = index.row(), index.column()
-        return self._roleData[role][row][col]
-        
-    def index(self, row, col, parent=QModelIndex()):
-        return self.createIndex(row, col, 0)
-    
-    def parent(self, index):
-        return QModelIndex()
-    
-    def rowCount(self, index=QModelIndex()):
-        if index.isValid():
-            return 0
-        else:
-            return len(self._data)
-        
-    def columnCount(self, index):
-        return len(self._header[Qt.Horizontal])
-
-    def headerData(self, section, orientation, role):
-        try:
-            return QVariant(self._header[orientation][section][role])
-        except KeyError, er:
-#            print >> sys.stderr, er
-            return QVariant()
-        
-    def setHeaderData(self, section, orientation, value, role=Qt.EditRole):
-        self._header[orientation][section][role] = value
-        
-from Orange.utils import lru_cache
-
-class MySortFilterProxyModel(QSortFilterProxyModel):    
-    def __init__(self, parent=None):
-        QSortFilterProxyModel.__init__(self, parent)
-        self._filter_strings = []
-        self._cache = {}
-        self._cache_fixed = {}
-        self._cache_prefix = {}
-        self._row_text = {}
-        
-        # Create a cached version of _filteredRows
-        self._filteredRows = lru_cache(100)(self._filteredRows) 
-
-    def setSourceModel(self, model):
-        """ Set the source model for the filter
-        """ 
-        self._filter_strings = []
-        self._cache = {}
-        self._cache_fixed = {}
-        self._cache_prefix = {}
-        self._row_text = {}
-        QSortFilterProxyModel.setSourceModel(self, model)
-        
-    def addFilterFixedString(self, string, invalidate=True):
-        """ Add `string` filter to the list of filters. If invalidate is
-        True the filter cache will be recomputed.
-        """
-        self._filter_strings.append(string)
-        all_rows = range(self.sourceModel().rowCount())
-        row_text = [self.rowFilterText(row) for row in all_rows]
-        self._cache[string] = [string in text for text in row_text]
-        if invalidate:
-            self.updateCached()
-            self.invalidateFilter()
-        
-    def removeFilterFixedString(self, index=-1, invalidate=True):
-        """ Remove the `index`-th filter string. If invalidate is True the
-        filter cache will be recomputed.
-        """
-        string = self._filter_strings.pop(index) 
-        del self._cache[string] 
-        if invalidate:
-            self.updateCached()
-            self.invalidateFilter()
-            
-    def setFilterFixedStrings(self, strings):
-        """ Set a list of string to be the new filters.
-        """
-        s_time = time.time()
-        to_remove = set(self._filter_strings) - set(strings)
-        to_add = set(strings) - set(self._filter_strings)
-        for str in to_remove:
-            self.removeFilterFixedString(self._filter_strings.index(str), invalidate=False)
-        
-        for str in to_add:
-            self.addFilterFixedString(str, invalidate=False)
-        self.updateCached()
-        self.invalidateFilter()
-            
-    def _filteredRows(self, filter_strings):
-        """ Return a dictionary mapping row indexes to True False values.
-        .. note:: This helper function is wrapped in the __init__ method. 
-        """
-        all_rows = range(self.sourceModel().rowCount())
-        cache = self._cache
-        return dict([(row, all([cache[str][row] for str in filter_strings])) for row in all_rows])
-    
-    def updateCached(self):
-        """ Update the combined filter cache.
-        """
-        self._cache_fixed = self._filteredRows(tuple(sorted(self._filter_strings))) 
-        
-    def setFilterFixedString(self, string):
-        """Should this raise an error? It is not being used.
-        """
-        QSortFilterProxyModel.setFilterFixedString(self, string)
-        
-    def rowFilterText(self, row):
-        """ Return text for `row` to filter on. 
-        """
-        f_role = self.filterRole()
-        f_column = self.filterKeyColumn()
-        s_model = self.sourceModel()
-        data = s_model.data(s_model.index(row, f_column), f_role)
-        if isinstance(data, QVariant):
-            data = unicode(data.toString(), errors="ignore")
-        else:
-            data = unicode(data, errors="ignore")
-        return data
-        
-    def filterAcceptsRow(self, row, parent): 
-        return self._cache_fixed.get(row, True)
-    
-    def lessThan(self, left, right):
-        if left.column() == 1 and right.column(): # TODO: Remove fixed column handling
-            left_gds = str(left.data(Qt.DisplayRole).toString())
-            right_gds = str(right.data(Qt.DisplayRole).toString())
-            left_gds = left_gds.lstrip("GDS")
-            right_gds = right_gds.lstrip("GDS")
-            try:
-                return int(left_gds) < int(right_gds)
-            except Exception, ex:
-                pass
-        return QSortFilterProxyModel.lessThan(self, left, right)
-
-
-from Orange.OrangeWidgets.OWGUI import LinkStyledItemDelegate, LinkRole
-
-
-def childiter(item):
-    """ Iterate over the children of an QTreeWidgetItem instance.
-    """
-    for i in range(item.childCount()):
-        yield item.child(i)
-
-
-def has_time_annotations(gds):
-    """Does the gds dataset have time data with at least 3 time points.
-    """
-    subsets = gds["subsets"]
-    time_sub = [sub for sub in subsets \
-                if sub["type"].lower().strip() == "time"]
-    return len(time_sub) >= 3
-
-
-class OWGEODatasets(OWWidget):
-    settingsList = ["outputRows", "mergeSpots", "gdsSelectionStates",
-                    "splitterSettings", "currentGds", "autoCommit"]
-
-    def __init__(self, parent=None, signalManager=None,
-                 name="GEO Data Sets"):
-        OWWidget.__init__(self, parent, signalManager, name)
-
-        self.outputs = [("Expression Data", ExampleTable)]
-
-        ## Settings
-#        self.selectedSubsets = []
-#        self.sampleSubsets = []
-        self.selectedAnnotation = 0
-        self.includeIf = False
-        self.minSamples = 3
-        self.autoCommit = False
-        self.outputRows = 0
-        self.mergeSpots = True
-        self.filterString = ""
-        self.currentGds = None
-        self.selectionChanged = False
-        self.autoCommit = False
-        self.gdsSelectionStates = {}
-        self.splitterSettings = ['\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xea\x00\x00\x00\xd7\x01\x00\x00\x00\x07\x01\x00\x00\x00\x02',
-                                 '\x00\x00\x00\xff\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x01\xb5\x00\x00\x02\x10\x01\x00\x00\x00\x07\x01\x00\x00\x00\x01']
-
-        self.loadSettings()
-
-        ## GUI
-        self.infoBox = OWGUI.widgetLabel(OWGUI.widgetBox(self.controlArea, "Info", addSpace=True), "\n\n")
-
-        box = OWGUI.widgetBox(self.controlArea, "Output", addSpace=True)
-        OWGUI.radioButtonsInBox(box, self, "outputRows", ["Genes or spots", "Samples"], "Rows", callback=self.commitIf)
-        OWGUI.checkBox(box, self, "mergeSpots", "Merge spots of same gene", callback=self.commitIf)
-
-        box = OWGUI.widgetBox(self.controlArea, "Output", addSpace=True)
-        self.commitButton = OWGUI.button(box, self, "Commit", callback=self.commit)
-        cb = OWGUI.checkBox(box, self, "autoCommit", "Commit on any change")
-        OWGUI.setStopper(self, self.commitButton, cb, "selectionChanged", self.commit)
-        OWGUI.rubber(self.controlArea)
-
-        self.filterLineEdit = OWGUIEx.lineEditHint(self.mainArea, self, "filterString", "Filter",
-                                   caseSensitive=False, matchAnywhere=True, 
-                                   #listUpdateCallback=self.filter, callbackOnType=False, 
-                                   callback=self.filter,  delimiters=" ")
-        
-        splitter = QSplitter(Qt.Vertical, self.mainArea)
-        self.mainArea.layout().addWidget(splitter)
-        self.treeWidget = QTreeView(splitter)
-        
-        self.treeWidget.setSelectionMode(QAbstractItemView.SingleSelection)
-        self.treeWidget.setRootIsDecorated(False)
-        self.treeWidget.setSortingEnabled(True)
-        self.treeWidget.setAlternatingRowColors(True)
-        self.treeWidget.setUniformRowHeights(True)
-        self.treeWidget.setItemDelegate(LinkStyledItemDelegate(self.treeWidget))
-        self.treeWidget.setItemDelegateForColumn(0, OWGUI.IndicatorItemDelegate(self.treeWidget, role=Qt.DisplayRole))
-        
-        self.connect(self.treeWidget, SIGNAL("itemSelectionChanged ()"), self.updateSelection)
-        self.treeWidget.viewport().setMouseTracking(True)
-        
-        splitterH = QSplitter(Qt.Horizontal, splitter) 
-        
-        box = OWGUI.widgetBox(splitterH, "Description")
-        self.infoGDS = OWGUI.widgetLabel(box, "")
-        self.infoGDS.setWordWrap(True)
-        OWGUI.rubber(box)
-        
-        box = OWGUI.widgetBox(splitterH, "Sample Annotations")
-        self.annotationsTree = QTreeWidget(box)
-        self.annotationsTree.setHeaderLabels(["Type (Sample annotations)", "Sample count"])
-        self.annotationsTree.setRootIsDecorated(True)
-        box.layout().addWidget(self.annotationsTree)
-        self.connect(self.annotationsTree, SIGNAL("itemChanged(QTreeWidgetItem * , int)"), self.annotationSelectionChanged)
-        self._annotationsUpdating = False
-        self.splitters = splitter, splitterH
-        self.connect(splitter, SIGNAL("splitterMoved(int, int)"), self.splitterMoved)
-        self.connect(splitterH, SIGNAL("splitterMoved(int, int)"), self.splitterMoved)
-        
-        for sp, setting in zip(self.splitters, self.splitterSettings):
-            sp.restoreState(setting)
-            
-        self.searchKeys = ["dataset_id", "title", "platform_organism", "description"]
-        self.cells = []
-
-        QTimer.singleShot(50, self.updateTable)
-        self.resize(1000, 600)
-
-    def updateInfo(self):
-        text = "%i datasets\n%i datasets cached\n" % \
-                (len(self.gds),
-                 len(glob.glob(orngServerFiles.localpath("GEO") + "/GDS*"))
-                )
-
-        filtered = self.treeWidget.model().rowCount()
-        if len(self.cells) != filtered:
-            text += ("%i after filtering") % filtered
-        self.infoBox.setText(text)
-
-    def updateTable(self):
-        self.treeItems = []
-        self.progressBarInit()
-        with orngServerFiles.DownloadProgress.setredirect(self.progressBarSet):
-