Flashpoint avatar Flashpoint committed 54ca2a1 Merge

Separated the row and column clustering in OWHeatMap.py

Comments (0)

Files changed (1)

_bioinformatics/widgets/OWHeatMap.py

         groups[key(item)].append(item)
     return groups.items()
 
-def hierarchical_cluster_ordering(data, group_domains=None, opt_order=False, progress_callback=None):
+
+#Rows separately
+def hierarchical_cluster_ordering_data(data, group_domains=None, opt_order=False, progress_callback=None):
+    classVar = data.domain.classVar
+    if classVar and isinstance(classVar, orange.EnumVariable):
+        class_data = [select_by_class_indices(data, val) for val in data.domain.classVar.values]
+    else:
+        class_data = [[1] * len(data)]
+        
+    parts = len(class_data) + 1 
+    
+    def pp_callback(part):
+        def callback(value):
+            return progress_callback(value / parts + 100.0 * part / parts)
+        if progress_callback:
+            callback(100.0 * part / parts)
+            return callback
+        else:
+            return progress_callback
+        
+    if group_domains is not None and len(group_domains) > 1:
+        stacked = vstack_by_subdomain(data, group_domains)
+    else:
+        stacked = data    
+               
+    def indices_map(indices):
+        map = zip(range(len(indices)), indices)
+        map = [i for i, test in map if test]
+        return dict(enumerate(map))
+    
+    data_ordering = []
+    data_clusters = []
+    for i, indices in enumerate(class_data):
+        sub_data = data.select(indices)
+        cluster = orngClustering.hierarchicalClustering(sub_data, order=opt_order, progressCallback=pp_callback(i + 1))
+        ind_map = indices_map(indices)
+        data_ordering.append([ind_map[m] for m in cluster.mapping])
+        data_clusters.append(cluster)
+        
+    return data_ordering, data_clusters  
+
+#Columns separately
+def hierarchical_cluster_ordering_attr(data, group_domains=None, opt_order=False, progress_callback=None):
     classVar = data.domain.classVar
     if classVar and isinstance(classVar, orange.EnumVariable):
         class_data = [select_by_class_indices(data, val) for val in data.domain.classVar.values]
         
     attr_cluster = orngClustering.hierarchicalClustering_attributes(stacked, order=opt_order, progressCallback=pp_callback(0))
     attr_ordering = list(attr_cluster.mapping)
-        
-    def indices_map(indices):
-        map = zip(range(len(indices)), indices)
-        map = [i for i, test in map if test]
-        return dict(enumerate(map))
-    
-    data_ordering = []
-    data_clusters = []
-    for i, indices in enumerate(class_data):
-        sub_data = data.select(indices)
-        cluster = orngClustering.hierarchicalClustering(sub_data, order=opt_order, progressCallback=pp_callback(i + 1))
-        ind_map = indices_map(indices)
-        data_ordering.append([ind_map[m] for m in cluster.mapping])
-        data_clusters.append(cluster)
-        
-    return attr_ordering, attr_cluster, data_ordering, data_clusters  
-        
+           
+    return attr_ordering, attr_cluster
+
 
 ##############################################################################
 # parameters that determine the canvas layout
         self.BShowColumnID = 1; self.BShowSpotIndex = 1; self.BShowAnnotation = 1; self.BShowGeneExpression = 1
         self.BSpotVar = None; self.BAnnotationVar = None  # these are names of variables
         self.BSpotIndx = None; self.BAnnotationIndx = None # these are id's of the combo boxes
-        self.SortGenes = 1
         self.ShowClustering = 1
         self.SelectionType = 0         # selection on a single data set
         self.setColorPalette()
         self.data = []
         self.maxHSize = 30; self.maxVSize = 15
 
+        self.SortExamples = 0
+        self.SortAttributes = 0
+        
 
         # GUI definition
         self.connect(self.graphButton, SIGNAL("clicked()"), self.saveFig)
         button = OWGUI.button(box, self, "Edit colors", callback=self.openColorDialog, tooltip="Edit the heatmap color palette", debuggingEnabled=0)
         
         OWGUI.separator(settingsTab)
-
-        OWGUI.comboBox(settingsTab, self, "SortGenes", "Sort genes",
-                       items=["No sorting", "Sort genes", "Clustering",
-                              "Clustering with leaf ordering"],
-                               callback=self.update_sorting)
+        
+        # For examples
+        OWGUI.comboBox(settingsTab, self, "SortExamples", "Sort Examples",
+                        items=["No sorting", "Sort examples", "Clustering",
+                               "Clustering with leaf ordering"],
+                               callback=self.update_sorting_examples)
+        
+        # For attributes
+        OWGUI.comboBox(settingsTab, self, "SortAttributes", "Sort Attributes",
+                        items=["No sorting", "Clustering",
+                               "Clustering with leaf ordering"],
+                               callback=self.update_sorting_attributes)
+        
         OWGUI.rubber(settingsTab)
         
         # FILTER TAB
         self.sorted_data = None
         
         self._ordering_cache = {}
+        self._ordering_cache_examples = {}
+        self._ordering_cache_attributes = {}
         
         self.resize(800,400)
 
             by_keys = defaultdict(list)
             for (key, value), attrs in groups.items():
                 by_keys[key].append(attrs)
-            
+           
             # Find the keys for which all values have the same number of attributes.
             candidates = []
             for key, groups in by_keys.items():
         
     def set_dataset(self, data=None, id=None):
         self.closeContext("Selection")
+        
         self._ordering_cache.clear()
+        self._ordering_cache_examples.clear()
+        self._ordering_cache_attributes.clear()
+        
         self.clear()
         self.data = data
         if data is not None:
             groups = [("", data.domain)]
             
         group_domains = [dom for _, dom in groups]
-        
-        if self.SortGenes > 1:
+
+        # Both rows and columns
+        if self.SortExamples > 1 and self.SortAttributes > 0:
             self.progressBarInit()
-            
-            args_key = tuple(tuple(d) for d in group_domains), self.SortGenes == 3
+
+            args_key = tuple(tuple(d) for d in group_domains), self.SortExamples == 3, self.SortAttributes == 2
             cluster_ordering = self._ordering_cache.get(args_key, None)
             if cluster_ordering is None:
-                attr_ordering, attr_cluster, data_ordering, data_clusters = \
-                        hierarchical_cluster_ordering(data, group_domains,
-                                      opt_order=self.SortGenes == 3,
+
+                # Rows separately
+                data_ordering, data_clusters = \
+                        hierarchical_cluster_ordering_data(data, group_domains,
+                                      opt_order=self.SortExamples == 3,
                                       progress_callback=self.progressBarSet)
+
+                # Columns separately
+                attr_ordering, attr_cluster = \
+                        hierarchical_cluster_ordering_attr(data, group_domains,
+                                      opt_order=self.SortAttributes == 2,
+                                      progress_callback=self.progressBarSet)
+
                 # Cache the clusters
                 self._ordering_cache[args_key] = (attr_ordering, attr_cluster,
                                                   data_ordering, data_clusters)
                     
             sorted_data = [data[i] for i in itertools.chain(*data_ordering)]
             self.progressBarFinished()
+        
+        # Only rows
+        elif self.SortExamples > 1:
+            self.progressBarInit()
+
+            args_key = tuple(tuple(d) for d in group_domains), self.SortExamples == 3
+            cluster_ordering_examples = self._ordering_cache_examples.get(args_key, None)
+            if cluster_ordering_examples is None:
+
+                # Rows separately
+                data_ordering, data_clusters = \
+                        hierarchical_cluster_ordering_data(data, group_domains,
+                                      opt_order=self.SortExamples == 3,
+                                      progress_callback=self.progressBarSet)
+
+                # Cache the clusters
+                self._ordering_cache_examples[args_key] = (data_ordering, data_clusters)
+            else:
+                 data_ordering, data_clusters = cluster_ordering_examples
             
+            attr_ordering = range(len(group_domains[0][1].attributes))
+            attr_cluster = None
+            sorted_data = [data[i] for i in itertools.chain(*data_ordering)]
+            self.progressBarFinished()
+        
+        # Only columns
+        elif self.SortAttributes > 0:
+            self.progressBarInit()
+
+            args_key = tuple(tuple(d) for d in group_domains), self.SortAttributes == 2
+            cluster_ordering_attributes = self._ordering_cache_attributes.get(args_key, None)
+            if cluster_ordering_attributes is None:
+
+                # Columns separately
+                attr_ordering, attr_cluster = \
+                        hierarchical_cluster_ordering_attr(data, group_domains,
+                                      opt_order=self.SortAttributes == 2,
+                                      progress_callback=self.progressBarSet)
+
+                # Cache the clusters
+                self._ordering_cache_attributes[args_key] = (attr_ordering, attr_cluster)
+            else:
+                 attr_ordering, attr_cluster = cluster_ordering_attributes
+            
+            data_ordering = []
+            data_clusters = [None]
+            sorted_data = data
+            self.progressBarFinished()
+
         else:
             attr_ordering = range(len(group_domains[0][1].attributes))
             attr_cluster = None
                 
             group_data = orange.ExampleTable(group_domain, sorted_data)
             self._group_data.append((group_data, group_domain)) # Crashes at accessing the heatmap.examples[0] without this 
-            if self.SortGenes == 1:
+            if self.SortExamples == 1:
                 hc = orangene.HeatmapConstructor(group_data)
             else:
                 hc = orangene.HeatmapConstructor(group_data, None)
         self.data_clusters = data_clusters
         self.sorted_data = sorted_data
         self.group_domains = groups
-            
+             
     def create_heatmaps(self, constructors):
         self.lowerBound = 1000
         self.upperBound = -1000
     def update_sorting(self):
         if self.data:
             self.update_heatmaps()
-            
+        
+    def update_sorting_examples(self):
+        if self.data:
+            self.update_heatmaps()
+
+    def update_sorting_attributes(self):
+        if self.data:
+            self.update_heatmaps()
+
     def update_legend(self):
         for item in self.heatmap_scene.items():
             if isinstance(item, GraphicsLegendWidget):
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.