Commits

Anonymous committed e379b72 Draft

Visualize the 'created' and 'updated' time of all notebooks using area plot

  • Participants
  • Parent commits a406518

Comments (0)

Files changed (4)

 import json
 from VizEvernote import EvernoteAnalyzer, EvernoteVisualizer
 from VizNotebooks import NotebooksAnalyzer
+from VizNotebooks import NotebooksVisualizer
 import matplotlib.pyplot as plt
 
 
 
 def test_notebooks():
     na = NotebooksAnalyzer('/home/wangjing/Public/EvernoteJSON/')
-    na._print()
+    print(na)
+    na.precentage('created', 'month')
+    na.dump(open('./Evernote-notebooks-ana.json', 'w'))
+
+    nv = NotebooksVisualizer()
+    nv.load(open('./Evernote-notebooks-ana.json', 'r'))
+    nv.plot_precentage('created', 'month')
+
 
 if __name__ == "__main__":
     # count()

File VizEvernote.py

     groups = itertools.groupby(items, group_key)
     return dict((k, len(list(group))) for k, group in groups)
 
+class Analyzer(object):
 
-class EvernoteAnalyzer(object):
+    def dump(self, fp):
+        json.dump(self.stat, fp)
+
+    def load(self, fp):
+        self.stat = json.load(fp)
+
+class EvernoteAnalyzer(Analyzer):
     """  Analyze Evernote Data
 
     Parameters
             a list of notes
         t_type : {'created', 'updated'}
             type of time. Only notes with this field is presented
-        tag : {0, str}
-            if tag == 0: tag field is note checked. Otherwise, only notes
+        tag : {None, str}
+            if tag == None: tag field is not checked. Otherwise, only notes
                 whose tag field equals are preserved.
 
         Returns
         --------------
         selected_notes : list
         """
-        if tag == '':
+        if tag == None:
             return [note for note in notes if note.get(t_type)]
         else:
             return [note for note in notes
             tag_t[stat_key] = [note[t_type] for note in tag_notes if note.get(t_type)]
         self.stat['tag_t'] = tag_t
 
-    def dump(self, fp):
-        json.dump(self.stat, fp)
-
     def strptime(self, t_str, resolution):
         """  convert **t_str** to datetime.time_struct
 
 
 class EvernoteVisualizer(EvernoteAnalyzer):
 
-    def load(self, fp):
-        self.stat = json.load(fp)
+    @staticmethod
+    def sort_pair(d):
+        return zip(*sorted(d.items()))
 
     def plot_count(self, t_type, resolution, width, tick_num, rotation):
         """  visualize the counts for notes for a duration.
         --------------
         """
 
-        def sort_pair(d):
-            return zip(*sorted(d.items()))
-        keys, values = sort_pair(self.stat['%s-%s' % (t_type, resolution)])
+
+        keys, values = self.sort_pair(self.stat['%s-%s' % (t_type, resolution)])
 
         start = self.strptime(keys[0], resolution)
         ind = np.array([get_time_diff(start, self.strptime(k, resolution), resolution)

File VizNotebooks.py

 from __future__ import print_function, division, absolute_import
 from VizEvernote import EvernoteAnalyzer
+from VizEvernote import EvernoteVisualizer
 import os
 import os.path
 import json
+import matplotlib.pyplot as plt
+import numpy as np
+from util import stackplot
 
 
-class NotebooksAnalyzer(object):
+class NotebooksAnalyzer(EvernoteAnalyzer):
     def __init__(self, folder):
-        self.notebooks = dict()
+        self.notebooks = []
+        self.nb_data = dict()
         self.analyzers = dict()
+        self.stat = dict()
         self._load_notes(folder)
 
     def _load_notes(self, folder):
                 with open(f_path, 'r') as note_f:
                     notes = json.load(note_f)
                     nb_name = os.path.basename(f)
-                    self.notebooks[nb_name] = notes
+                    self.notebooks.append(nb_name)
+                    self.nb_data[nb_name] = notes
                     self.analyzers[nb_name] = EvernoteAnalyzer(notes)
-    def _print(self):
-        for k, v in self.notebooks.iteritems():
-            print('notebook: ', k)
+        self.stat['notebooks'] = self.notebooks
+
+    def __str__(self):
+        return 'Notebooks:\n%s' % ('\n'.join(self.notebooks))
+
+    def precentage(self, t_type, resolution):
+        p_data = []
+        for nb in self.notebooks:
+            gc = self.analyzers[nb].count(t_type, resolution)
+            p_data.append(gc)
+
+        # get all possible dates
+        dates = set()
+        for d in p_data:
+            dates |= set(d.keys())
+
+        stat_data = dict(zip(dates, [[] for k in dates]))
+        for pd in p_data:
+            for dt in dates:
+                stat_data[dt].append(pd.get(dt, 0))
+
+        self.stat['p_data_%s-%s'%(t_type, resolution)] = stat_data
+
+class NotebooksVisualizer(EvernoteVisualizer):
+    def plot_precentage(self, t_type, resolution):
+        k =  'p_data_%s-%s'%(t_type, resolution)
+        dates, precentages = self.sort_pair(self.stat[k])
+        stackplot(plt.gca(), np.arange(len(precentages)), zip(*precentages))
+        plt.show()
+
         plt.xticks(tick_pos + width / 2., ticks, rotation=rotation)
     else:
         plt.xticks(np.array(left) + width / 2., names, rotation=rotation)
+
+
+"""
+Stacked area plot for 1D arrays inspired by Douglas Y'barbo's stackoverflow
+answer:
+http://stackoverflow.com/questions/2225995/how-can-i-create-stacked-line-graph-with-matplotlib
+
+(http://stackoverflow.com/users/66549/doug)
+
+"""
+import numpy as np
+
+__all__ = ['stackplot']
+
+
+def stackplot(axes, x, *args, **kwargs):
+    """Draws a stacked area plot.
+
+    *x* : 1d array of dimension N
+
+    *y* : 2d array of dimension MxN, OR any number 1d arrays each of dimension
+          1xN. The data is assumed to be unstacked. Each of the following
+          calls is legal::
+
+            stackplot(x, y)               # where y is MxN
+            stackplot(x, y1, y2, y3, y4)  # where y1, y2, y3, y4, are all 1xNm
+
+    Keyword arguments:
+
+    *colors* : A list or tuple of colors. These will be cycled through and
+               used to colour the stacked areas.
+               All other keyword arguments are passed to
+               :func:`~matplotlib.Axes.fill_between`
+
+    Returns *r* : A list of
+    :class:`~matplotlib.collections.PolyCollection`, one for each
+    element in the stacked area plot.
+    """
+
+    if len(args) == 1:
+        y = np.atleast_2d(*args)
+    elif len(args) > 1:
+        y = np.row_stack(args)
+
+    colors = kwargs.pop('colors', None)
+    if colors is not None:
+        axes.set_color_cycle(colors)
+
+    # Assume data passed has not been 'stacked', so stack it here.
+    y_stack = np.cumsum(y, axis=0)
+
+    r = []
+
+    # Color between x = 0 and the first array.
+    r.append(axes.fill_between(x, 0, y_stack[0, :],
+             facecolor=axes._get_lines.color_cycle.next(), **kwargs))
+
+    # Color between array i-1 and array i
+    for i in xrange(len(y) - 1):
+        r.append(axes.fill_between(x, y_stack[i, :], y_stack[i + 1, :],
+                 facecolor=axes._get_lines.color_cycle.next(), **kwargs))
+    return r