Commits

Anonymous committed e379b72 Draft

Visualize the 'created' and 'updated' time of all notebooks using area plot

Comments (0)

Files changed (4)

 import json
 from VizEvernote import EvernoteAnalyzer, EvernoteVisualizer
 from VizNotebooks import NotebooksAnalyzer
+from VizNotebooks import NotebooksVisualizer
 import matplotlib.pyplot as plt
 
 
 
 def test_notebooks():
     na = NotebooksAnalyzer('/home/wangjing/Public/EvernoteJSON/')
-    na._print()
+    print(na)
+    na.precentage('created', 'month')
+    na.dump(open('./Evernote-notebooks-ana.json', 'w'))
+
+    nv = NotebooksVisualizer()
+    nv.load(open('./Evernote-notebooks-ana.json', 'r'))
+    nv.plot_precentage('created', 'month')
+
 
 if __name__ == "__main__":
     # count()
     groups = itertools.groupby(items, group_key)
     return dict((k, len(list(group))) for k, group in groups)
 
+class Analyzer(object):
 
-class EvernoteAnalyzer(object):
+    def dump(self, fp):
+        json.dump(self.stat, fp)
+
+    def load(self, fp):
+        self.stat = json.load(fp)
+
+class EvernoteAnalyzer(Analyzer):
     """  Analyze Evernote Data
 
     Parameters
             a list of notes
         t_type : {'created', 'updated'}
             type of time. Only notes with this field is presented
-        tag : {0, str}
-            if tag == 0: tag field is note checked. Otherwise, only notes
+        tag : {None, str}
+            if tag == None: tag field is not checked. Otherwise, only notes
                 whose tag field equals are preserved.
 
         Returns
         --------------
         selected_notes : list
         """
-        if tag == '':
+        if tag == None:
             return [note for note in notes if note.get(t_type)]
         else:
             return [note for note in notes
             tag_t[stat_key] = [note[t_type] for note in tag_notes if note.get(t_type)]
         self.stat['tag_t'] = tag_t
 
-    def dump(self, fp):
-        json.dump(self.stat, fp)
-
     def strptime(self, t_str, resolution):
         """  convert **t_str** to datetime.time_struct
 
 
 class EvernoteVisualizer(EvernoteAnalyzer):
 
-    def load(self, fp):
-        self.stat = json.load(fp)
+    @staticmethod
+    def sort_pair(d):
+        return zip(*sorted(d.items()))
 
     def plot_count(self, t_type, resolution, width, tick_num, rotation):
         """  visualize the counts for notes for a duration.
         --------------
         """
 
-        def sort_pair(d):
-            return zip(*sorted(d.items()))
-        keys, values = sort_pair(self.stat['%s-%s' % (t_type, resolution)])
+
+        keys, values = self.sort_pair(self.stat['%s-%s' % (t_type, resolution)])
 
         start = self.strptime(keys[0], resolution)
         ind = np.array([get_time_diff(start, self.strptime(k, resolution), resolution)
 from __future__ import print_function, division, absolute_import
 from VizEvernote import EvernoteAnalyzer
+from VizEvernote import EvernoteVisualizer
 import os
 import os.path
 import json
+import matplotlib.pyplot as plt
+import numpy as np
+from util import stackplot
 
 
-class NotebooksAnalyzer(object):
+class NotebooksAnalyzer(EvernoteAnalyzer):
     def __init__(self, folder):
-        self.notebooks = dict()
+        self.notebooks = []
+        self.nb_data = dict()
         self.analyzers = dict()
+        self.stat = dict()
         self._load_notes(folder)
 
     def _load_notes(self, folder):
                 with open(f_path, 'r') as note_f:
                     notes = json.load(note_f)
                     nb_name = os.path.basename(f)
-                    self.notebooks[nb_name] = notes
+                    self.notebooks.append(nb_name)
+                    self.nb_data[nb_name] = notes
                     self.analyzers[nb_name] = EvernoteAnalyzer(notes)
-    def _print(self):
-        for k, v in self.notebooks.iteritems():
-            print('notebook: ', k)
+        self.stat['notebooks'] = self.notebooks
+
+    def __str__(self):
+        return 'Notebooks:\n%s' % ('\n'.join(self.notebooks))
+
+    def precentage(self, t_type, resolution):
+        p_data = []
+        for nb in self.notebooks:
+            gc = self.analyzers[nb].count(t_type, resolution)
+            p_data.append(gc)
+
+        # get all possible dates
+        dates = set()
+        for d in p_data:
+            dates |= set(d.keys())
+
+        stat_data = dict(zip(dates, [[] for k in dates]))
+        for pd in p_data:
+            for dt in dates:
+                stat_data[dt].append(pd.get(dt, 0))
+
+        self.stat['p_data_%s-%s'%(t_type, resolution)] = stat_data
+
+class NotebooksVisualizer(EvernoteVisualizer):
+    def plot_precentage(self, t_type, resolution):
+        k =  'p_data_%s-%s'%(t_type, resolution)
+        dates, precentages = self.sort_pair(self.stat[k])
+        stackplot(plt.gca(), np.arange(len(precentages)), zip(*precentages))
+        plt.show()
+
         plt.xticks(tick_pos + width / 2., ticks, rotation=rotation)
     else:
         plt.xticks(np.array(left) + width / 2., names, rotation=rotation)
+
+
+"""
+Stacked area plot for 1D arrays inspired by Douglas Y'barbo's stackoverflow
+answer:
+http://stackoverflow.com/questions/2225995/how-can-i-create-stacked-line-graph-with-matplotlib
+
+(http://stackoverflow.com/users/66549/doug)
+
+"""
+import numpy as np
+
+__all__ = ['stackplot']
+
+
+def stackplot(axes, x, *args, **kwargs):
+    """Draws a stacked area plot.
+
+    *x* : 1d array of dimension N
+
+    *y* : 2d array of dimension MxN, OR any number 1d arrays each of dimension
+          1xN. The data is assumed to be unstacked. Each of the following
+          calls is legal::
+
+            stackplot(x, y)               # where y is MxN
+            stackplot(x, y1, y2, y3, y4)  # where y1, y2, y3, y4, are all 1xNm
+
+    Keyword arguments:
+
+    *colors* : A list or tuple of colors. These will be cycled through and
+               used to colour the stacked areas.
+               All other keyword arguments are passed to
+               :func:`~matplotlib.Axes.fill_between`
+
+    Returns *r* : A list of
+    :class:`~matplotlib.collections.PolyCollection`, one for each
+    element in the stacked area plot.
+    """
+
+    if len(args) == 1:
+        y = np.atleast_2d(*args)
+    elif len(args) > 1:
+        y = np.row_stack(args)
+
+    colors = kwargs.pop('colors', None)
+    if colors is not None:
+        axes.set_color_cycle(colors)
+
+    # Assume data passed has not been 'stacked', so stack it here.
+    y_stack = np.cumsum(y, axis=0)
+
+    r = []
+
+    # Color between x = 0 and the first array.
+    r.append(axes.fill_between(x, 0, y_stack[0, :],
+             facecolor=axes._get_lines.color_cycle.next(), **kwargs))
+
+    # Color between array i-1 and array i
+    for i in xrange(len(y) - 1):
+        r.append(axes.fill_between(x, y_stack[i, :], y_stack[i + 1, :],
+                 facecolor=axes._get_lines.color_cycle.next(), **kwargs))
+    return r