Commits

Anonymous committed 2876f28

GO term finder widget and GOlib - GO handling library

Comments (0)

Files changed (2)

+## problem je v tem, da moras steti gen veckrat, za vsak tip Evidence enkrat
+import cPickle, math
+
+### misc
+# need to replace with a "standard" function
+binoms = {}
+def binom(n,k):
+    global binoms
+
+    r = binoms.get( str( (n, k)), None)
+    if r:
+        return r
+
+    if n==0 or k==0 or n==k:
+        binoms[ str( (n, k))] = 1
+        return 1
+    else: 
+        r = binom(n-1,k-1) + binom(n-1,k)
+        binoms[ str( (n,k))] = r
+        return r
+###
+
+evidenceTypesOrdered = [
+'IMP',
+'IGI',
+'IPI',
+'ISS',
+'IDA',
+'IEP',
+'IEA',
+'TAS',
+'NAS',
+'ND',
+'IC'
+]
+
+evidenceTypes = {
+'IMP': 'inferred from mutant phenotype',
+'IGI': 'inferred from genetic interaction', ## [with <database:gene_symbol[allele_symbol]>]',
+'IPI': 'inferred from physical interaction', ## [with <database:protein_name>]',
+'ISS': 'inferred from sequence similarity', ## [with <database:sequence_id>] ',
+'IDA': 'inferred from direct assay',
+'IEP': 'inferred from expression pattern',
+'IEA': 'inferred from electronic annotation', ## [to <database:id>]',
+'TAS': 'traceable author statement',
+'NAS': 'non-traceable author statement',
+'ND': 'no biological data available ',
+'IC': 'inferred by curator'
+}
+
+## returns DAG (dictionary) of maxdepth
+def DAGfilterForDepth(dag, node, cdepth):
+    filteredDAG = {}
+    if cdepth == 0:
+        filteredDAG[node] = []
+
+    if cdepth > 0:
+        filteredDAG[node] = dag[node]
+        for (childNode, rtype) in dag.get(node, []):
+            tmpd = DAGfilterForDepth(dag, childNode, cdepth - 1)
+            for (key, items) in tmpd.items():
+                filteredDAG[key] = items
+    return filteredDAG
+
+## creates DAG from GO and two lists of GOIDs:
+##   allGOIDs: GOIDS that can be included in DAG (because have genes assigned to them)
+##   sigGOIDs: GOIDs that were marked as significant
+##             DAG must show only the significant GOIDs and all other GOIDs needed to display a minimal and correct(full) GO
+##             correct/full - without any missing paths from bottom of DAG to root
+##
+## creates a minimal GO displaying all significant GOIDs with genes annotated to them
+##
+def createGODAGtoDisplay(Lgo, allGOIDs, sigGOIDs):
+    DAG = {}
+    ## select significant GOIDs (sigGOIDs) and their children that are also in allGOIDs (all terms with genes from cluster set)
+    for goid in allGOIDs:
+        ## select significant nodes
+        sig = goid in sigGOIDs
+        ## or those that have a significant parent
+        if not(sig):
+            nodeparents = Lgo['rGO'].get(goid, []) ## all parents of the node with goid
+            ## look if any of the parents significant
+            for sigID in sigGOIDs:
+                sig = sig or (sigID in nodeparents)
+                if sig: break ## found one significant parent, we must include this goid into DAG
+        
+        if sig:
+            ## select only those children that are in allGOIDs
+            nodechildren = Lgo['GO'].get(goid, [])
+            DAG[goid] = [(childgoid, rtype) for (childgoid, rtype) in nodechildren if childgoid in allGOIDs]
+
+    ## now we must fill in the the rest of the DAG with all the connections root -> children worth displaying (the ones already in the DAG)
+    worthDisp = DAG.keys()
+    goidsToFillWith = []
+    for goid in worthDisp: ## find all parents of those allGOIDs nodes worthDisplaying that are not yet present in DAG
+        nodeparents = Lgo['rGO'].get(goid, []) ## all parents
+        goidsToFillWith.extend( [parentgoid for parentgoid in nodeparents if (parentgoid in allGOIDs and parentgoid not in worthDisp and parentgoid not in goidsToFillWith)])
+
+    ## fill DAG
+    for goid in goidsToFillWith:
+        nodechildren = Lgo['GO'].get(goid, [])
+        assert( DAG.get(goid, None) == None) ## should not be already present
+        DAG[goid] = [(childgoid, rtype) for (childgoid, rtype) in nodechildren if (childgoid in worthDisp + goidsToFillWith)]
+    
+    ## connect 'root' to all top nodes in DAG
+    rootchildren = Lgo['GO'].get('root', [])
+    DAG['root'] = [(goid, rtype) for (goid, rtype) in rootchildren if goid in DAG.keys()] ## make root->node connections only for those terms that have significant children
+
+    return DAG
+
+def printNode(dag, goid2term, node, level, allgs, sigs):
+    print "\t"*level + str(int(node in allgs)) + " " + str(int(node in sigs)) + " " + str(node) + " " + str(goid2term.get(node, '?'))
+    for (childNode, rtype) in dag.get(node, []):
+        printNode(dag, goid2term, childNode, level + 1, allgs, sigs)
+
+def printDAG(dag, goid2term, allgs, sigs):
+    printNode(dag, goid2term, 'root', 0, allgs, sigs)
+
+def nodeDepth(dag, node, level):
+    depth = level
+    for (childNode, rype) in dag.get(node, []):
+        depth = max(depth, nodeDepth(dag, childNode, level + 1))
+    return depth
+
+def DAGdepth(dag):
+    return nodeDepth(dag, 'root', 0)
+
+### populate GO with genes
+##
+## for a given list of genes, annotation and GO it returns a list of all direct and indirect annotations to GO terms
+## optional constraint: a list of GO term only for which to return results
+##
+## used: for cluster and reference frequencies of GO terms
+##
+def populateGO(geneList, Lann, Lgo, LonlyGOIDs=None, progressBar = None, progressStart = 0.0, progressPart = 100.0):
+    if LonlyGOIDs and len(LonlyGOIDs) == 0:
+        return {}, {}, {}
+
+    genesGOIDdirect = {}
+    genesGOIDindirect = {}
+    genesGOIDboth = {}
+
+    pcn = 0.0
+    if LonlyGOIDs:
+        ## go over only the GOIDs in list
+        for daGOID in LonlyGOIDs:
+            if progressBar:
+                progressBar(progressStart + progressPart * pcn / len(LonlyGOIDs))
+                pcn += 1.0
+            geneAnn = Lann['GOID2gene'].get(daGOID, None)
+            if not(geneAnn): continue
+
+            for (daGene, daNOT, daEvidence, daAspect, daDB_Object_Type) in geneAnn:
+                if daAspect <> Lgo['aspect']: continue ## skip annotations different from the loaded GO aspect
+                if daGene not in geneList:
+                    continue
+
+                ## first include the direct annotation
+                tmpl = genesGOIDdirect.get(daGOID, [])
+                if (daGene, daEvidence) not in tmpl:
+                    genesGOIDdirect[daGOID] = tmpl + [(daGene, daEvidence)] ## update only if GO term in list of terms to return, or list None
+
+                ## update both
+                tmpl = genesGOIDboth.get(daGOID, [])
+                if (daGene, daEvidence) not in tmpl:
+                    genesGOIDboth[daGOID] = tmpl + [(daGene, daEvidence)] ## update only if GO term in list of terms to return, or list None
+
+                ## then all the indirect: by going over all parents of the daGOID, and make indirect annotations to those GO terms
+                for GOID in Lgo['rGO'].get(daGOID, []): ##.get(daGOID, []): ## use the reverse GO info to go to all parents
+    ##                if GOID == 'root': continue
+
+                    if GOID not in LonlyGOIDs:
+                        continue
+
+                    tmpl = genesGOIDindirect.get(GOID, [])
+                    if (daGene, daEvidence) not in tmpl:
+                        genesGOIDindirect[GOID] = tmpl + [(daGene, daEvidence)]
+
+                    tmpl = genesGOIDboth.get(GOID, [])
+                    if (daGene, daEvidence) not in tmpl:
+                        genesGOIDboth[GOID] = tmpl + [(daGene, daEvidence)]
+
+    else:
+        ## go over all genes and find the apropriate GOIDs
+        for gene in geneList: ## go over genes
+            if progressBar:
+                progressBar(progressStart + progressPart * pcn / len(geneList))
+                pcn += 1.0
+
+            geneAnn = Lann['gene2GOID'].get(gene, None)
+            if not(geneAnn): continue
+
+            for (daGOID, daNOT, daEvidence, daAspect, daDB_Object_Type) in geneAnn:
+                if daAspect <> Lgo['aspect']: continue ## skip annotations different from the loaded GO aspect
+    ##            if daNOT <> '': continue ## should we skip those annotations that tell when a gene is not part of a specific GO term?
+
+                ## first include the direct annotation
+                tmpl = genesGOIDdirect.get(daGOID, [])
+                if (gene, daEvidence) not in tmpl:
+                    genesGOIDdirect[daGOID] = tmpl + [(gene, daEvidence)] ## update only if GO term in list of terms to return, or list None
+
+                ## update both
+                tmpl = genesGOIDboth.get(daGOID, [])
+                if (gene, daEvidence) not in tmpl:
+                    genesGOIDboth[daGOID] = tmpl + [(gene, daEvidence)] ## update only if GO term in list of terms to return, or list None
+
+                ## then all the indirect: by going over all parents of the daGOID, and make indirect annotations to those GO terms
+                for GOID in Lgo['rGO'].get(daGOID, []): ##.get(daGOID, []): ## use the reverse GO info to go to all parents
+    ##                if GOID == 'root': continue
+
+                    tmpl = genesGOIDindirect.get(GOID, [])
+                    if (gene, daEvidence) not in tmpl:
+                        genesGOIDindirect[GOID] = tmpl + [(gene, daEvidence)]
+
+                    tmpl = genesGOIDboth.get(GOID, [])
+                    if (gene, daEvidence) not in tmpl:
+                        genesGOIDboth[GOID] = tmpl + [(gene, daEvidence)]
+
+    return genesGOIDdirect, genesGOIDindirect, genesGOIDboth
+
+
+### main function
+## for a given set of genes and reference it recreates the results made with the GO Term Finder Web interface
+##
+## inputs:
+##    clusteSet:    genes for which we want to find significant GO terms
+##    referenceSet: list of genes that define our reference, if None or empty list then all genes in the genome are considered for reference
+##    evidences:    list of evidence codes to consider from annotation when calculating the reference values for each GO term
+##
+##    annotation    must always be availabe to this function
+##
+##
+## outputs:
+##    sorted list: list of GOterms: (pval, number of genes in term, term GOID), sorted by the increasing p value
+##                 used when filtering GO terms by pvalue and number of genes
+##
+##    GO:          DAG made out of all necessary GO terms needed to display all the genes annotated to significant GO terms
+##                                       = they need to be (grand)children of a significant GO term and (grand)parents of a GO term
+##                                         that has a direct annotation to a gene in a significant GO term
+##
+##    GOtermValues:  dictionary of GO terms, giving the calculated values:
+##                    term description
+##                    cluster frequency
+##                    reference/genome frequency
+##                    p value
+##                    genes annotated to the term directly and indirectly
+##                    genes annotated to the term directly
+##
+##    genesInClusterSet
+##    genesInReferenceSet
+##    (the last two are used to calculate the relative frequencies)
+##
+lastFindTermsReference = [0, None, None]
+def findTerms(annotation, GO, clusterSet, referenceSet = None, evidences = None, progressBar = None, progressStart = 0.0, progressPart = 100.0):
+    global lastFindTermsReference
+    if evidences and len(evidences) == 0:
+        evidences = None
+
+    ## CLUSTER GENE FREQUENCIES
+    ## count the direct and indirect annotations for cluster genes
+    n = len(clusterSet) ## number of genes in cluster; used in the calculation of the p value
+    clusterGenesGOIDdirect, clusterGenesGOIDindirect, clusterGenesGOID = populateGO(clusterSet, annotation, GO, None, progressBar, progressStart, progressPart / 5.0)
+    ## when calculating the p value we use both, the direct and indirect count
+    ## but when selecting a node or a subtree we can use the direct and/or indirect
+
+
+    ## REFERENCE GO TERM (ALL GENES) FREQUENCIES
+    ## calculate the reference values only in GO terms that belong to cluster genes only, last parameter in populateGO
+    ## if we already made this calculation than reuse it, otherwise do it and remember the results
+
+    ## referenceSet can be None: whole genome
+    ##              can be a list of genes that need to be used for reference
+    resID = str(id(clusterSet)) + str(id(referenceSet)) + str(id(annotation)) + str(id(GO))
+    prevID, prevRefGenesGOID, prevReferenceSet = lastFindTermsReference
+    if resID == prevID:
+        refGenesGOID = prevRefGenesGOID ## same inputs, we can use old results
+        referenceSet = prevReferenceSet
+    else:
+        ## input changed, new references need to be calculated
+        if referenceSet == None:
+            referenceSet = annotation['gene2GOID'].keys() ## use all genes in the annotation
+        ## calculate frequencies for all GO terms
+        ## so we don't have to do it next time, the clusterGenes set changes, because it takes a lot of time anyway
+        refGenesGOIDdirect, refGenesGOIDindirect, refGenesGOID = populateGO(referenceSet, annotation, GO, clusterGenesGOID.keys(), progressBar, progressStart + progressPart / 2.0, 4.0 * progressPart / 5.0)
+        lastFindTermsReference = [resID, refGenesGOID, referenceSet]
+
+    N = len(referenceSet) ## number of genes in reference; used in the calculation of the p value
+    ## the reference set for the whole genome should be calculated at the time of data loading (GO or annotation)
+    ## and kept in memory, otherwise it will be too slow and unbearable to the user
+
+    GOtermValues = {}
+    sortedGOIDs = []
+    if N > 0:
+        for (GOID, genesInGOID) in clusterGenesGOID.items(): ## calculate values
+            ## count the number of different genes in reference
+            lst = refGenesGOID.get(GOID, [])
+            genesInRef = []
+            for (gene, daEvidence) in lst:
+                if (not(evidences) or daEvidence in evidences) and gene not in genesInRef:
+                    genesInRef.append( gene)
+
+            ## count the number of different genes in cluster; direct and indirect annotation type
+            genesInCluster = []
+            for (gene, daEvidence) in genesInGOID:
+                if (not(evidences) or daEvidence in evidences) and gene not in genesInCluster:
+                    genesInCluster.append( gene)
+            ##
+            ## count the number of different genes in cluster; direct annotation only
+            genesInClusterDirect = []
+            genesInGOIDdirect = clusterGenesGOIDdirect.get(GOID, [])
+            for (gene, daEvidence) in genesInGOIDdirect:
+                if (not(evidences) or daEvidence in evidences) and gene not in genesInClusterDirect:
+                    genesInClusterDirect.append( gene)
+
+            G = len(genesInRef) ## reference frequency = all genes in reference for the selected GO term
+            p = float(G) / float(N)
+            x = len(genesInCluster) ## cluster frequency
+            pval = sum([(binom(n, j) * math.pow(p, j) * math.pow(1.0-p, n-j)) for j in range(x, n+1, 1)])
+            GOtermValues[GOID] = (GO['term'].get(GOID, '?')[0], x, G, pval, genesInCluster, genesInClusterDirect)
+            sortedGOIDs.append( (pval, x, GOID))
+        sortedGOIDs.sort()
+
+    if progressBar:
+        progressBar(progressStart + progressPart)
+    return sortedGOIDs, GOtermValues, clusterSet, referenceSet
+
+
+if __name__=="__main__":
+    ### load the annotation and GO
+    ### all of this is done somewhere inside the widget
+    annotationFile = r"Annotations\Saccharomyces cerevisiae.annotation"
+    GOfile = r"GO\200312-biological_process.go"
+
+    ## read in the annotation
+    testAnnotation = cPickle.load(open(annotationFile, 'r'))
+    ## annotation is a dictionary: {'GOID2gene': GOID2gene, 'gene2GOID': gene2GOID, 'evidenceTypes': evidenceTypes}
+
+    ## read in the GO info
+    testGO = cPickle.load(open(GOfile, 'r'))
+    ## GO = {'aspect': aspect, 'term': term, 'relationTypes': termRelTypes, 'GO': GOIDtoGOID[aspect], 'rGO': rGOIDtoGOID[aspect]}
+    ###
+
+    print "GO and annotation loaded"
+
+    ## test inputs
+    genes = ['YPD1', 'WHI4', 'SHS1', 'GCS1', 'HO', 'YDL228C', 'SSB1', 'PTP1', 'BRE4', 'OST4', 'YDL233W', 'GYP7']
+    evidencesToConsider = [] ##['TAS']
+
+    sortedGOIDs, GOtermValues, clusterSet, referenceSet = findTerms(testAnnotation, testGO, genes, None, evidencesToConsider)
+
+    ## display
+    alpha = 0.1
+    n = len(clusterSet)
+    N = len(referenceSet)
+
+    sigGOIDs = [] ## significant GOID to display in GO
+    for (p, x, GOID) in sortedGOIDs:
+        if p > alpha: break ## end of significant GO terms reached
+        if x <= 1:
+            continue ## not worth mentioning
+        sigGOIDs.append( GOID)
+        GOterm, x, G, pval, genesInGOID, genesInGOIDdirect = GOtermValues[GOID]
+        print GOID, "\t", GOterm, "\t", len(genesInGOID), "out of", n, "\t", G, "out of", N, "\t", pval, "\t", genesInGOID
+
+    DAG = createGODAGtoDisplay(testGO, GOtermValues.keys(), sigGOIDs)
+    printDAG(DAG, testGO['term'], GOtermValues.keys(), sigGOIDs)
+
+    print
+    print "filtered DAG"
+    fDAG = DAGfilterForDepth(DAG, 'root', 1000)
+    print "fDAG:", fDAG
+    printDAG(fDAG, testGO['term'], GOtermValues.keys(), sigGOIDs)
+

OWGOTermFinder.py

+"""
+<name>GO Term Finder</name>
+<description>GO Term Finder</description>
+<icon>icons/GO.png</icon>
+<priority>100</priority>
+"""
+
+import orange, math
+import GOlib ## function needed to handle the GO and annotation
+import OWGUI
+from qt import *
+from qtcanvas import *
+from OWWidget import *
+from OWOptions import *
+from qttable import *
+from qwt import *
+
+class OWGOTermFinder(OWWidget):	
+    settingsList = ["AnnotationFileName", "RecentAnnotations", "ReferenceType", "RecentGOaspects",
+                    "FilterNumEnabled", "FilterNumValue", "FilterPvalEnabled", "FilterPvalue", "FilterDepthEnabled", "FilterDepthValue",
+                    "SelectMode", "SelectDisjoint"]
+
+    def __init__(self, parent=None, name='OWGoTermFinder'):
+        self.callbackDeposit = [] # deposit for OWGUI callback functions
+        OWWidget.__init__(self, parent, name, 'GO Term Finder', FALSE, FALSE) 
+
+        self.inputs = [("Cluster Examples", ExampleTable, self.clusterDataset, 0), ("Reference Examples", ExampleTable, self.referenceDataset, 0)]
+        self.outputs = [("Classified Examples", ExampleTableWithClass)]
+
+        #set default settings
+        # annotation
+        self.AnnotationFileName = self.GOaspectFileName = None # these are names of files
+        self.RecentAnnotations = []
+        self.BAnnotationIndx = 0
+        # reference
+        self.ReferenceType = 0 ## get the reference from the annotation
+        # GO
+        self.RecentGOaspects = []
+        self.BGOaspectIndx = 0
+        #
+        self.FilterNumEnabled = False
+        self.FilterNumValue = 1
+        self.FilterPvalEnabled = True
+        self.FilterPvalue = 0.05
+        self.FilterDepthEnabled = False
+        self.FilterDepthValue = 8
+        self.SelectMode = 0 # sub graph
+        self.SelectDisjoint = False # output inclusive
+        # check usage of all evidences
+        for etype in GOlib.evidenceTypesOrdered:
+            varName = "UseEvidence"+etype 
+##            self.settingsList.append( varName)
+            code = compile("self.%s = True" % (varName), ".", "single")
+            exec(code)
+
+        self.loadSettings()
+        self.data = None
+        # check if files exist and remove those that don't
+        self.RecentAnnotations = filter(os.path.exists, self.RecentAnnotations)
+        self.RecentGOaspects = filter(os.path.exists, self.RecentGOaspects)
+
+        # tmp structures - loaded by user
+        self.annotation = None
+        self.GO = None
+
+        # received by signals
+        self.geneNameAttr = 'GeneName'
+        # should read from 'GeneName' column in input signal "Examples"
+        self.clusterGenes = [] ## ['YPD1', 'WHI4', 'SHS1', 'GCS1', 'HO', 'YDL228C', 'SSB1', 'PTP1', 'BRE4', 'OST4', 'YDL233W', 'GYP7']
+        self.clusterData = None
+        # should read from 'GeneName' column in input signal "Examples Reference"
+        self.referenceGenes = []
+
+        # calculated from tmp structures and received signals
+        # updated by filters
+        self.GOIDsFound = [] # sorted by p value, so we know when to stop, if filtering by p. value
+        self.significantGOIDs = [] # selected by filters
+        self.GOtermValues = {}
+        self.dag = None
+        self.goLVitem2GOID = {}
+
+        # GUI definition
+        self.tabs = QTabWidget(self.controlArea, 'tabWidget')
+
+        # INPUT TAB
+        self.inputTab = QVGroupBox(self)
+        box = QVButtonGroup("Annotation", self.inputTab)
+        box2 = QHButtonGroup(box)
+        box2.setMaximumSize(250,50)
+        # annotation
+        self.annotationCombo = OWGUI.comboBox(box2, self, "BAnnotationIndx", items=[], callback=self.loadAnnotation)
+        self.annotationCombo.setMaximumSize(150, 20)
+        self.setFilelist(self.annotationCombo, self.RecentAnnotations)
+##        box2.hide()
+##        box2.show()
+        self.annotationBrowse = OWGUI.button(box2, self, 'Browse', callback=self.browseAnnotation)
+        self.annotationBrowse.setMaximumSize(50, 30)
+        self.evidencesBox = QVButtonGroup("Evidence codes in annotation", box)
+        self.evidenceCheckBoxes = {}
+        for etype in GOlib.evidenceTypesOrdered:
+            varName = "UseEvidence"+etype
+            tmpCB = OWGUI.checkBox(self.evidencesBox, self, varName, etype, box='', tooltip=GOlib.evidenceTypes.get(etype, '?unknown?'), callback=self.findTermsBuildDAG)
+            tmpCB.setEnabled(False)
+            self.evidenceCheckBoxes[etype] = tmpCB
+
+        # reference
+        OWGUI.radioButtonsInBox(self.inputTab, self, 'ReferenceType', ['From Annotation', 'From Signal'], box='Reference', callback=self.findTermsBuildDAG)
+        # GO aspects
+        box = QHButtonGroup("GO Aspect", self.inputTab)
+        box.setMaximumSize(250, 50)
+        self.GOaspectCombo = OWGUI.comboBox(box, self, 'BGOaspectIndx', items=[], callback=self.loadGOaspect)
+        self.GOaspectCombo.setMaximumSize(160, 20)
+        self.setFilelist(self.GOaspectCombo, self.RecentGOaspects)
+        self.GOaspectBrowse = OWGUI.button(box, self, 'Browse', callback=self.browseGOaspect)
+        self.GOaspectBrowse.setMaximumSize(50, 30)
+        self.tabs.insertTab(self.inputTab, "Input")
+
+        # FILTER TAB
+        filterTab = QVGroupBox(self)
+        box = QVButtonGroup("Filter GO Term Nodes", filterTab)
+        #
+        OWGUI.checkBox(box, self, 'FilterNumEnabled', "Number of instances", callback=self.setFilterNumEnabled)
+        self.sliderFilterNumValue = OWGUI.qwtHSlider(box, self, 'FilterNumValue', label='#:', labelWidth=33, minValue=1, maxValue=1000, step=1.0, precision=1, ticks=0, maxWidth=80, callback=self.runFilters)
+        if not self.FilterNumEnabled:
+            self.sliderFilterNumValue.box.setDisabled(1)
+        #
+        OWGUI.checkBox(box, self, 'FilterPvalEnabled', "p. value", callback=self.setFilterPvalEnabled)
+        self.sliderFilterPvalue = OWGUI.qwtHSlider(box, self, 'FilterPvalue', label='p:', labelWidth=33, minValue=0.0, maxValue=1.0, step=0.001, precision=3.0, ticks=0, maxWidth=80, callback=self.runFilters)
+        if not self.FilterPvalEnabled:
+            self.sliderFilterPvalue.box.setDisabled(1)
+        #
+        OWGUI.checkBox(box, self, 'FilterDepthEnabled', "GO depth", callback=self.setFilterDepthEnabled)
+        self.sliderFilterDepthValue = OWGUI.qwtHSlider(box, self, 'FilterDepthValue', label='p:', labelWidth=33, minValue=0.0, maxValue=100, step=1.0, precision=1.0, ticks=0, maxWidth=80, callback=self.runFilters)
+        if not self.FilterDepthEnabled:
+            self.sliderFilterDepthValue.box.setDisabled(1)
+        self.tabs.insertTab(filterTab, "Filter")
+
+        # SELECT TAB
+        selectTab = QVGroupBox(self)
+        OWGUI.radioButtonsInBox(selectTab, self, 'SelectMode', ['Subgraph', 'Node specific'], box='Mode', callback=self.viewSelectionChanged)
+        box = QVButtonGroup('Output', selectTab)
+        OWGUI.checkBox(box, self, 'SelectDisjoint', 'Disjoint/Inclusive', callback=self.viewSelectionChanged)
+        self.tabs.insertTab(selectTab, "Select")
+
+        # ListView for DAG, and table for significant GOIDs
+        self.DAGcolumns = ['GO term', 'Cluster frequency', 'Reference frequency', 'p value', 'Genes']
+        self.layout=QVBoxLayout(self.mainArea)
+        self.splitter = QSplitter(QSplitter.Vertical, self.mainArea)
+        self.layout.add(self.splitter)
+
+        # list view
+        self.goLV = QListView(self.splitter)
+        self.goLV.setMultiSelection(1)
+        self.goLV.setAllColumnsShowFocus(1)
+        self.goLV.addColumn(self.DAGcolumns[0])
+        self.goLV.setColumnWidth(0, 300)
+        self.goLV.setColumnWidthMode(0, QListView.Manual)
+        self.goLV.setColumnAlignment(0, QListView.AlignLeft)
+        for dagColumnTitle in self.DAGcolumns[1:]:
+            col = self.goLV.addColumn(dagColumnTitle)
+            self.goLV.setColumnWidth(col, 100)
+            self.goLV.setColumnWidthMode(col, QListView.Manual)
+            self.goLV.setColumnAlignment(col, QListView.AlignCenter)
+        self.connect(self.goLV, SIGNAL("selectionChanged()"), self.viewSelectionChanged)
+
+        # table of significant GO terms
+        self.sigTermsTable = QTable(self.splitter)
+        self.sigTermsTable.setNumCols(5)
+        self.sigTermsTable.setNumRows(0)
+        ## hide the vertical header
+        self.sigTermsTable.verticalHeader().hide()
+        self.sigTermsTable.setLeftMargin(0)
+        self.sigTermsTable.setSelectionMode(QTable.NoSelection)
+        self.sigTermsTable.setColumnWidth(0, 300)
+        for col in range(1, self.sigTermsTable.numCols()):
+            self.sigTermsTable.setColumnWidth(col, 100)
+        self.header = self.sigTermsTable.horizontalHeader()
+        for i in range(len(self.DAGcolumns)):
+            self.header.setLabel(i, self.DAGcolumns[i])
+        self.splitter.show()
+
+        self.resize(1000, 800)
+        self.layout.activate() # this is needed to scale the widget correctly
+        self.loadAnnotation()
+        self.loadGOaspect()
+
+    def setFilelist(self, filecombo, fileList):
+        filecombo.clear()
+        if fileList != []:
+            for file in fileList:
+                (dir, filename) = os.path.split(file)
+                #leave out the path
+                fnToDisp = filename
+                filecombo.insertItem(fnToDisp)
+            filecombo.setDisabled(False)
+        else:
+            filecombo.insertItem("(none)")
+            filecombo.setDisabled(True)
+
+    ##########################################################################
+    # handling of input/output signals
+    def clusterDataset(self, data, id):
+        self.clusterGenes = []
+        self.clusterData = data
+        if data:
+            dattrs = [str(a.name) for a in data.domain.attributes]
+            if self.geneNameAttr in dattrs:
+                for e in data:
+                    g = str(e[self.geneNameAttr].value)
+                    if g not in self.clusterGenes:
+                        self.clusterGenes.append( g)
+        self.findTermsBuildDAG()
+
+    def referenceDataset(self, data, id):
+        self.referenceGenes = []
+        if data:
+            dattrs = [str(a.name) for a in data.domain.attributes]
+            if self.geneNameAttr in dattrs:
+                for e in data:
+                    g = str(e[self.geneNameAttr].value)
+                    if g not in self.referenceGenes:
+                        self.referenceGenes.append( g)
+        self.findTermsBuildDAG()
+
+    def viewSelectionChanged(self):
+        geneToGOterm = {}
+        allGOterms = []
+        for li in self.goLVitem2GOID.keys():
+            if li.isSelected():
+                GOID = self.goLVitem2GOID.get(li, None)
+                GOterm, x, G, pval, genesInGOID, genesInGOIDdirect = self.GOtermValues.get(GOID, (GOID+'?', '', '', '', [], []))
+                if GOID == 'root': ## put real aspect instead of 'root'
+                    GOterm = self.GO.get('aspect', GOID+'?')
+                if GOterm not in allGOterms:
+                    allGOterms.append( GOterm)
+
+                ## make gene -> GOterm annotations only for some genes; depending on the selection type
+                if self.SelectMode == 1: 
+                    geneList = genesInGOIDdirect # node specific: use just the direct annotations
+                else:
+                    geneList = genesInGOID # subgraph: use both directly and indirectly annotated genes
+
+                for gene in geneList:
+                    tmpl = geneToGOterm.get(gene, [])
+                    if GOterm not in tmpl:
+                        tmpl.append(GOterm)
+                        geneToGOterm[gene] = tmpl
+        if self.clusterData:
+            # class value; GO terms
+            newclass = orange.EnumVariable("GO class", values=allGOterms)
+            # new domain
+            newdomain = orange.Domain( self.clusterData.domain.attributes + [newclass])
+            # new exampletable into where to put the filtered examples
+            newdata = orange.ExampleTable(newdomain)
+            for e in self.clusterData:
+                g = str(e[self.geneNameAttr].value)
+                geneTermList = geneToGOterm.get(g, [])
+                if self.SelectDisjoint and len(geneTermList) > 1: ## this gene should be omitted, because belongs to many GOterms
+                    continue
+                for goterm in geneTermList:
+                    ne = [str(e[a]) for a in self.clusterData.domain.attributes] + [goterm]
+                    newdata.append( orange.Example(newdomain, ne))
+            self.send("Classified Examples", newdata)
+        else:
+            self.send("Classified Examples", None)
+
+    ##########################################################################
+    # callback functions
+    def browseRemember(self, lst, indx, loadMethod, dialogText, dialogTitle):
+        if lst == []:
+            startfile = "."
+        else:
+            startfile = lst[0]
+        filename = QFileDialog.getOpenFileName(startfile, dialogText, None, dialogTitle)
+        fn = str(filename)
+        if fn in lst: # if already in list, remove it
+            lst.remove(fn)
+        lst.insert(0, fn)
+        indx = 0
+        loadMethod()
+
+    def loadRemember(self, lst, filecombo, indx):
+        loadedData = None
+        if indx < len(lst):
+            fn = lst[indx]
+            if fn != "(none)":
+                # remember the recent file list
+                if fn in lst: # if already in list, remove it
+                    lst.remove(fn)
+                lst.insert(0, fn) # add to beginning of list
+                self.setFilelist(filecombo, lst) # update combo
+                loadedData = cPickle.load(open(fn, 'r'))
+        return loadedData
+
+    def browseAnnotation(self):
+        self.browseRemember(self.RecentAnnotations, self.BAnnotationIndx, self.loadAnnotation, 'Annotation files (*.annotation)\nAll files(*.*)', 'Annotation Pickle File')
+
+    def loadAnnotation(self):
+        self.annotation = self.loadRemember(self.RecentAnnotations, self.annotationCombo, self.BAnnotationIndx)
+        self.updateEvidences()
+        self.findTermsBuildDAG()
+
+    def browseGOaspect(self):
+        self.browseRemember(self.RecentGOaspects, self.BGOaspectIndx, self.loadGOaspect, 'GO files (*.go)\nAll files(*.*)', 'Gene Ontology Pickle File')
+
+    def loadGOaspect(self):
+        self.GO = self.loadRemember(self.RecentGOaspects, self.GOaspectCombo, self.BGOaspectIndx)
+        self.updateEvidences()
+        self.findTermsBuildDAG()
+
+    def updateEvidences(self):
+        if not(self.annotation) or not(self.GO): ## if data missing, just disable everything
+            for (etype, tmpCB) in self.evidenceCheckBoxes.items():
+                tmpCB.setText(etype)
+                tmpCB.setEnabled(False)
+            return
+
+        # count the number of evidence in each type and number of genes with evidence; update the checkboxes
+        evidenceTypeCn = {}
+        for (gene, geneAnns) in self.annotation['gene2GOID'].items():
+            for (daGOID, daNOT, daEvidence, daAspect, daDB_Object_Type) in geneAnns:
+                if daAspect <> self.GO['aspect']: continue # skip annotations that are not for the loaded aspect
+                (cn, lst) = evidenceTypeCn.get(daEvidence, (0, []))
+                if gene not in lst:
+                    lst = lst + [gene]
+                evidenceTypeCn[daEvidence] = (cn + 1, lst)
+
+        for (etype, tmpCB) in self.evidenceCheckBoxes.items():
+            eCnLst = evidenceTypeCn.get(etype, None)
+            if eCnLst:
+                cn, lst = eCnLst
+                tmpCB.setEnabled(True)
+                tmpCB.setText('%s: %d annots (%d genes)' % (etype, cn, len(lst)))
+            else:
+                tmpCB.setEnabled(False)
+                tmpCB.setText(etype)
+
+    def setFilterNumEnabled(self):
+        self.sliderFilterNumValue.box.setDisabled(not self.FilterNumEnabled)
+        self.runFilters()
+
+    def setFilterPvalEnabled(self):
+        self.sliderFilterPvalue.box.setDisabled(not self.FilterPvalEnabled)
+        self.runFilters()
+
+    def setFilterDepthEnabled(self):
+        self.sliderFilterDepthValue.box.setDisabled(not self.FilterDepthEnabled)
+        self.runFilters()
+
+    ##########################################################################
+    # GO DAG calculations and filtering
+    def runFilters(self):
+        self.significantGOIDs = [] ## significant GOID to display in GO
+        for (p, x, GOID) in self.GOIDsFound:
+            if self.FilterPvalEnabled and p > self.FilterPvalue:
+                break ## end of significant GO terms reached
+            if self.FilterNumEnabled and x < self.FilterNumValue:
+                continue ## not worth mentioning
+            self.significantGOIDs.append( GOID)
+
+        self.dag = GOlib.createGODAGtoDisplay(self.GO, self.GOtermValues.keys(), self.significantGOIDs)
+        if self.FilterDepthEnabled:
+            self.dag = GOlib.DAGfilterForDepth(self.dag, 'root', self.FilterDepthValue)
+        self.updateDAG()
+
+    def findTermsBuildDAG(self):
+        if self.annotation and self.GO:
+            self.progressBarInit()
+            evidences = [etype for (etype, tmpCB) in self.evidenceCheckBoxes.items() if tmpCB.isChecked()]
+            if self.ReferenceType == 0: # from annotation
+                ## for reference use the whole genome
+                self.GOIDsFound, self.GOtermValues, clusterSet, referenceSet = GOlib.findTerms(self.annotation, self.GO, self.clusterGenes, None, evidences, self.progressBarSet, 0.0, 75.0)
+            else: # from the given set of genes - received by signal
+                ## for reference use genes in the reference list
+                self.GOIDsFound, self.GOtermValues, clusterSet, referenceSet = GOlib.findTerms(self.annotation, self.GO, self.clusterGenes, self.referenceGenes, evidences, self.progressBarSet, 0.0, 75.0)
+##            n = len(clusterSet); N = len(referenceSet) # needed if relative frequencies need to be displayed
+
+            ## find the max number of cluster gene istances in a GO term
+            maxNumIstances = max( [1] + [x for (GOterm, x, G, pval, genesInGOID, genesInGOIDdirect) in self.GOtermValues.values()])
+            ## create a DAG with all the possible nodes
+            ## and find the max depth of the DAG
+            sigGOIDs = [goid for (_, _, goid) in self.GOIDsFound]
+            tmpdag = GOlib.createGODAGtoDisplay(self.GO, self.GOtermValues.keys(), sigGOIDs)
+            maxDepth = GOlib.DAGdepth(tmpdag)
+            ## update the filter controls
+            self.sliderFilterNumValue.setRange(1, maxNumIstances, 1)
+            self.sliderFilterDepthValue.setRange(0, maxDepth, 1)
+
+            self.runFilters()
+            self.progressBarSet(95)
+            self.updateDAG()
+            self.progressBarFinished()
+
+    ##########################################################################
+    # drawing
+    def updateDAG(self, updateonly=0):
+        def walkupdate(listviewitem):
+            GOID = self.goLVitem2GOID[listviewitem]
+            GOterm, x, G, pval, genesInGOID, genesInGOIDdirect = self.GOtermValues.get(GOID, (GOID+'?', '', '', '', [], []))
+            if GOID == 'root': ## put real aspect instead of 'root'
+                GOterm = self.GO.get('aspect', GOID+'?')
+            if len(genesInGOID):
+                genesInGOIDstr = str(genesInGOID[0])
+            else:
+                genesInGOIDstr = ''
+            for gene in genesInGOID[1:]:
+                genesInGOIDstr += ", " + str(gene)
+
+            if pval: pval = "%1.4f" % pval
+            vals = [GOterm, x, G, pval, genesInGOIDstr]
+            for i in range(len(vals)):
+                listviewitem.setText(i, str(vals[i]))
+
+            child = listviewitem.firstChild()
+            while child:
+                walkupdate(child)
+                child = child.nextSibling()
+
+        def walkcreate(node, parent):
+            for (childNode, rtype) in self.dag.get(node, []):
+                bd = str(childNode)
+                li = QListViewItem(parent, bd)
+                li.setOpen(1)
+                self.goLVitem2GOID[li] = childNode
+                walkcreate(childNode, li)
+
+        if not(self.dag):
+            self.goLV.clear()
+            return
+
+        self.goLV.setRootIsDecorated(1)
+        if not updateonly:
+            self.goLV.clear()
+            self.goLVitem2GOID = {}
+            self.GOid2LVitem = {}
+            li = QListViewItem(self.goLV, 'root')
+            li.setOpen(1)
+            self.goLVitem2GOID[li] = 'root'
+            walkcreate('root', li)
+        walkupdate(self.goLV.firstChild())
+        self.goLV.show()
+
+        # update table of significant/filtered Terms
+        self.sigTermsTable.setNumRows(len(self.significantGOIDs))
+        for i in range(len(self.significantGOIDs)): ## sorted by the p value
+            GOID = self.significantGOIDs[i]
+            GOterm, x, G, pval, genesInGOID, genesInGOIDdirect = self.GOtermValues.get(GOID, (GOID+'?', '', '', '', [], []))
+            if GOID == 'root': ## put real aspect instead of 'root'
+                GOterm = self.GO.get('aspect', GOID+'?')
+
+            if len(genesInGOID):
+                genesInGOIDstr = str(genesInGOID[0])
+            else:
+                genesInGOIDstr = ''
+            for gene in genesInGOID[1:]:
+                genesInGOIDstr += ", " + str(gene)
+
+            if pval: pval = "%1.4f" % pval
+            vals = [GOterm, x, G, pval, genesInGOIDstr]
+            for j in range(len(vals)):
+                self.sigTermsTable.setText(i, j, str(vals[j]))
+
+if __name__=="__main__":
+    import orange
+    a = QApplication(sys.argv)
+    ow = OWGOTermFinder()
+    a.setMainWidget(ow)
+
+    d = orange.ExampleTable('testClusterSet.tab')
+    ow.clusterDataset(d, 0)
+    ow.show()
+    a.exec_loop()
+    ow.saveSettings()