Source

enzo-dev / src / performance_tools / performance_tools.py

Full commit
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
#!/usr/bin/env python
### performance_tools.py
### Date: 10.13.11
### Author: Cameron Hummels and Sam Skillman
### Description: 

### performance_tools is a module for plotting the performance information 
### which comes out of Enzo.  It consists of a few universal helper functions
### and a new class: perform.  You can use it one of two ways.  You can
### import this library in python, create a perform object, and then
### create a few plots using the plot_quantity, plot_stack and plot_maxmin 
### functions, like this:

### $ python
### >>> import performance_tools as pt
### >>> p = pt.perform('performance.out')
### >>> p.plot_quantity('Total','Mean Time')
### >>> p.plot_stack([],'Mean Time',repeated_field='Level')
### >>> p.plot_maxmin([],repeated_field='Level',fractional=True)

### Or you can just call this python file from the command line after
### editing the source file to have it print out whatever plots you want
### it to generate (some defaults are included at the bottom of this file) 
### like this:

### $ python performance_tools.py performance.out

import matplotlib as mpl
import pylab as pl
import numpy as np
from matplotlib import cm

def err_handler(type, flag):
    print "Floating point error (%s), with flag %s, probably due to one of your timers being 0.0.  You can probably ignore this." % (type, flag)
np.seterrcall(err_handler)
np.seterr(all = 'call')

def is_listlike(obj):
    """
    Checks to see if an object is listlike (but not a string)
    """
    return not isinstance(obj, basestring)
    
def preserve_extrema(extrema, xdata, ydata):
    """
    Keep track of the universal extrema over multiple x-y datasets

    Parameters
    ----------
    extrema : 5-element list
        This keeps track of the current min/max for x and y datasets.  It
        has a 5th element to keep track whether the extrema have yet been
        set. [xmin, xmax, ymin, ymax, already_set_bit]
        For already_set_bit, 0=no, 1=yes.
    xdata,ydata : array-like
        The dataset you wish to set add to your minima/maxima
    """
    ### If setting extrema for the first time, just use xdata/ydata min/max
    if extrema[4] == 0:
        minx = np.min(xdata)
        maxx = np.max(xdata)
        miny = np.min(ydata[np.nonzero(ydata)])
        maxy = np.max(ydata[np.nonzero(ydata)])
    ### Otherwise, preserve the existing extrema
    else:
        minx = np.min([extrema[0], np.min(xdata)])
        maxx = np.max([extrema[1], np.max(xdata)])
        miny = np.min([extrema[2], np.min(ydata[np.nonzero(ydata)])])
        maxy = np.max([extrema[3], np.max(ydata[np.nonzero(ydata)])])
    return [minx,maxx,miny,maxy,1]

def smooth(x, window_len=11, window='hanning'):
    """
    Smooth a 1D array using a window with requested size.

    This function taken from http://www.scipy.org/Cookbook/SignalSmooth
    
    CBH modified it to cutoff the (window_len-1)/2 values at each end of the
    final 1D array (the reflected part's remnants), so that the final
    array size is the same as the original array size.

    This method is based on the convolution of a scaled window with the signal.
    The signal is prepared by introducing reflected copies of the signal 
    (with the window size) in both ends so that transient parts are minimized
    in the beginning and end part of the output signal.
    
    Parameters
    ----------
    x : 1D array_like
        The input signal 
    window_len: int, optional
        The dimension of the smoothing window; should be an odd integer
    window: string
        the type of smoothing window to use: 'flat', 'hanning', 'hamming', 
        'bartlett', 'blackman'. flat window will produce a moving 
        average smoothing.

    Returns
    -------
    out : 1D array
        The smoothed version of input x

    See Also
    --------
    np.hanning, np.hamming, np.bartlett, np.blackman, np.convolve
    scipy.signal.lfilter
 
    Examples
    --------
    >>> t = np.linspace(-4,4,100)
    >>> x = np.sin(t)
    >>> x_noisy = x + pl.randn(len(t))*0.1
    >>> x_smooth = smooth(x_noisy)
    """
    if x.ndim != 1:
        raise ValueError, "smooth only accepts 1 dimension arrays."

    if x.size < window_len:
        raise ValueError, "Input vector needs to be bigger than window size."

    if window_len<3:
        return x

    if window_len%2 == 0:
        raise ValueError, "smooth requires an odd-valued window_len." 

    if not window in ['flat', 'hanning', 'hamming', 'bartlett', 'blackman']:
        raise ValueError, "Window is one of 'flat', 'hanning', 'hamming', 'bartlett', 'blackman'"

    s = np.r_[x[window_len-1:0:-1], x, x[-1:-window_len:-1]]
    if window == 'flat': # moving average
        w = np.ones(window_len, 'd')
    else:
        w = eval('np.'+window+'(window_len)')

    y = np.convolve(w/w.sum(), s, mode='valid')
    clip_len = (window_len-1)/2
    return y[clip_len:len(y)-clip_len]

class perform:
    """
    This simple class stores the performance information collected by Enzo.
    Enzo puts this information in an ASCII file typically called 
    "performance.out".  That file is called by the constructor for this 
    class and converted into the internal structure called "data".  
    
    "data" is a dictionary, where each key is one of the field labels 
    from "performance.out" (i.e. the first word from a line, e.g. "Level 0").  
    The payload for each value is a python record array of N dimension, 
    where N is the number of cycles for that key.

    For each cycle, there are M entries. In the case of the "Level X" and 
    "Total" keys, these M entries are:

    "Cycle"
    "Mean Time", 
    "Stddev Time", 
    "Min Time", 
    "Max Time", 
    "Cell Updates", 
    "Num Grids", 
    "Updates/processor/sec"

    In all other cases, these M entries are:
    "Cycle"
    "Mean Time", 
    "Stddev Time", 
    "Min Time", 
    "Max Time"

    Since this is a record array, you can use these entries as the indices
    when indexing the array (e.g. data['Total']['Cycle']).
    """
    def __init__(self, filename):
        self.filename = filename
        self.data = self.build_struct(filename)
        self.fields = self.data.keys()
 
    def build_struct(self, filename):
        """
        Build the internal data structure which holds all of the data from
        your external textfile outputted by enzo.  It allocates the memory 
        all at once instead of building the structure line by line.

        Parameters
        ----------
        filename : string
            The name of the file used as input

        Returns
        -------
        out : dictionary
            A dictionary of recarrays.  Each "label" (e.g. "Total") within the 
            input text file is represented as a key in the dictionary.  The 
            payload for each key is an N-dim record array where N is the 
            number of cycles over which that label appeared. See the
            docstrings for the perform class for more information.
        """

        ### Open the file, and pull out the data.  
        ### Comments are ignored.  Each block of multiline text is treated 
        ### individually.  The first line of each text block is expected to 
        ### be the cycle_number, which is stored into an array.  Each 
        ### subsequent line uses the first string as a dictionary key, 
        ### with the remaining columns the data for that key.  
        ### A blank line signifies the end of a text block
        key_list = []
        data = {}
        num_cycles = 0

        input = open(filename, "r")
        input_list = input.readlines()
        input.close()

        ### First run through the file contents identifying all possible keys
        ### and counting the number of cycles so as to allocate sufficiently
        ### big arrays in the structure to house the data.
        for line in input_list:
            if not line.startswith("#") and line.strip():
                line_list = line.split()
                line_key = line_list[0]
                line_key = " ".join(line_key.split('_'))
                if not key_list.__contains__(line_key):
                    key_list.append(line_key)
                if line_key == 'Cycle Number':
                    num_cycles += 1
        key_list.remove('Cycle Number') # Don't want it as a key

        ### Now build the dictionary with the appropriate memory and 
        ### retraverse the input file to fill it with the input information
        for key in key_list:
            if key == "Total" or key.startswith('Level'):
                records = [('Cycle', 'float'), ('Mean Time', 'float'),
                           ('Stddev Time', 'float'), ('Min Time', 'float'),
                           ('Max Time', 'float'), ('Cell Updates', 'float'),
                           ('Num Grids', 'float'), 
                           ('Updates/processor/sec', 'float')]
            else:
                records = [('Cycle', 'float'), ('Mean Time', 'float'),
                           ('Stddev Time', 'float'), ('Min Time', 'float'),
                           ('Max Time', 'float')]

            data[key] = np.zeros(num_cycles, dtype=records)

        i = -1 
        for line in input_list:
            if not line.startswith("#") and line.strip():
                line_list = line.split()
   
                if line_list[0] == 'Cycle_Number':
                    cycle = int(line_list[1])
                    i += 1
                    continue
    
                ### If we've made it this far, cycle is defined and we're
                ### populating our dictionary with data for cycle #x.
                line_key = line_list[0]
    
                ### Convert line_key's underscores to spaces
                line_key = " ".join(line_key.split('_'))
        
                line_value = np.array([cycle] + line_list[1:],dtype='float64') 
                line_value = np.nan_to_num(line_value)  # error checking
    
                data[line_key][i] = line_value

        ### Make sure all cycles are set for all keys, even those that 
        ### didn't output every cycle
        for key in key_list:
            data[key]["Cycle"] = data["Total"]["Cycle"]
        return data

    def plot_quantity(self, field_label, y_field_index, 
                      y_field_axis_label="", x_field_index='Cycle', 
                      x_field_axis_label="Cycle Number",
                      filename="performance.png", repeated_field="", 
                      log_y_axis="Auto", smooth_len=0, bounds="Off",
                      fractional=False, xlim=[], ylim=[]):
        """
        Produce a plot for the given quantity(s) from the performance data.
    
        Parameters
        ----------
        field_label : string or array_like of strings
            The label of the field you wish to plot.  If you wish to plot
            multiple fields, enumerate them in an array or tuple. Ex: "Level 0"
        y_field_index : string
            The index of the field you wish to plot on the y axis. 
            Ex: "Cycle", "Mean Time", "Stddev Time", "Min Time", "Max Time",
            "Cell Updates", "Num Grids", "Updates/processor/sec".
            If you have a single value for many field_labels, it is assumed
            that such a value will index all of them.  If you have an array_like
            structure of strings, each one will index the corresponding key in 
            field_lable.  
        y_field_axis_label : string, optional
            The y axis label on the resulting plot. Default = the y_field_index
            of the recarray.
        x_field_index : string, optional
            The index of the field you wish to plot on the x axis.
            Default = "Cycle"
        x_field_axis_label : string, optional
            The x axis label on the resulting plot. Default = "Cycle Number"
        filename : string, optional
            The filename where I will store your plotted data.
        repeated_field : string, optional
            If you have a regularly named set of fields you wish to plot 
            against each other (e.g. "Level 0", "Level 1", "Level 2"), then
            include the string here and they will all be included automatically
            and in order (e.g. "Level").  There are two special cases to this
            parameter.  "Non-Level" includes all fields without "Level" in the 
            name (or "Total"), and "All" includes all fields.
        log_y_axis : string, optional
            This controls whether the plot will use logarithmic units for the
            y axis.  Valid settings are "Auto", "On", and "Off".  When "Auto" is
            used, the code automatically recognizes when you have a maximum 
            y value more than 3 orders of magnitude greater than your minimum y
            value (for non-zero values) at which point it plots the y axis in 
            log units.
        smooth_len : int, optional
            This value controls the amount by which smoothing occurs over
            N consecutive cycles of data.  Default = 0 (i.e. None). 
            Must be an odd number (recommended 5-11)
        bounds : string, optional
            This controls whether to overplot additional bounding data over
            the existing plotted quantities.  Valid values of this variable
            are "minmax", "sigma" and "Off".  "minmax" overplots the minima and
            maxima bounds, whereas "sigma" plots the mean +/- 1 sigma bounds.
        fractional : bool, optional
            When set to true, the plotted values shown in fractions of the 
            equivalent field in "Total".
        xlim, ylim : array_like, optional
            Set these variables two 2-element lists/arrays in order to
            explicitly set your plot limits
    
        See Also
        --------
        plot_stack, plot_maxmin
    
        Examples
        --------
        To produce a simple plot of the mean time taken over the course of 
        the simulation to run the RebuildHierarchy section of code.
        Save this plot to performance.png:
    
        >>> plot_quantity("RebuildHierarchy", "Mean Time")
    
        To produce a plot comparing the RebuildHiearchy and SolveHydroEquations
        maximum time taken over the course of the simulation and save it 
        to file "test.png":
    
        >>> plot_quantity(["RebuildHierarchy", "SolveHydroEquations"],
        "Max Time", "Maximum Time (sec)", filename="test.png")
    
        To produce a plot comparing the maximum time from RebuildHiearchy and 
        the minimum time from SolveHydroEquations taken over the course of the 
        simulation and save it to file "test.png":
    
        >>> plot_quantity(["RebuildHierarchy", "SolveHydroEquations"],
        ["Max Time", "Min Time"], "Time (sec)", filename="test.png")
    
        To produce a plot comparing the mean time taken by all of the different
        levels over the course of the simulation and save it to file "test.png": 
        >>> plot_quantity([], "Mean Time", "Mean Time (sec)", 
        filename="test.png", repeated_field="Level")
        """
        ax = pl.subplot(111)
        data = self.data
        extrema = np.zeros(5)
    
        ### Convert plots of single quantities to list format for homogenous 
        ### processing.
        if not is_listlike(field_label):
            field_label = [field_label]
    
        ### If there is a repeated_field, figure out how many fields 
        ### there are including any that were defined in the original 
        ### field_label argument.
        if repeated_field:
            key_list = data.keys()
            if repeated_field == "All":
                field_label = key_list

            elif repeated_field == "Non-Level":
                for key in key_list:
                    if not key.startswith("Level") and \
                       not key.startswith("Total"):
                        field_label.append(key)
            else:
                for key in key_list:
                    if key.startswith(repeated_field):
                        field_label.append(key)
        num_fields = len(field_label)
        field_label.sort()
    
        ### If y_field_index is a single index, then replace it with a list of
        ### identical indices
        if not is_listlike(y_field_index):
            y_field_index = num_fields*[y_field_index]
    
        ### Create a normalization vector to use on each vector
        ### before plotting.  In non-fractional case, this vector is 1.
        if fractional:
            norm = data['Total']
        else:
            records = [('Cycle', 'float'), ('Mean Time', 'float'),
                       ('Stddev Time', 'float'), ('Min Time', 'float'),
                       ('Max Time', 'float'), ('Cell Updates', 'float'),
                       ('Num Grids', 'float'), 
                       ('Updates/processor/sec', 'float')]
            norm = np.ones(data['Total'].shape, dtype=records)    

        ### Loop through the y datasets to figure out the extrema
        for i in range(len(field_label)):
            xdata = data[field_label[i]][x_field_index]
            ydata = data[field_label[i]][y_field_index[i]] / \
                    norm[y_field_index[i]]
            if smooth_len:
                ydata = smooth(ydata,smooth_len)
            extrema = preserve_extrema(extrema,xdata,ydata)
        if log_y_axis=="Auto":
            if extrema[3]/extrema[2] > 1e3:
                log_y_axis="On"
            else:
                log_y_axis="Off"
    
        ### Now for the actual plotting
        for i in range(len(field_label)):
            color = cm.jet(1.*i/num_fields)
            xdata = data[field_label[i]][x_field_index]
            ydata = data[field_label[i]][y_field_index[i]] / \
                    norm[y_field_index[i]]
            if smooth_len:
                ydata = smooth(ydata,smooth_len)
            if log_y_axis=="On":
                pl.semilogy(xdata,ydata,color=color,label=field_label[i])
            else:
                pl.plot(xdata,ydata,color=color,label=field_label[i])
            if not bounds == "Off":
                zerodata = np.zeros(len(ydata))
                if bounds == "minmax":
                    min_bound = data[field_label[i]]["Min Time"] / \
                    norm["Min Time"]
                    max_bound = data[field_label[i]]["Max Time"] / \
                    norm["Max Time"]
                else:
                    min_bound = ydata - data[field_label[i]]["Stddev Time"] / \
                    norm["Stddev Time"]
                    max_bound = ydata + data[field_label[i]]["Stddev Time"] / \
                    norm["Stddev Time"]
                if smooth_len:
                    min_bound = smooth(min_bound, smooth_len)
                    max_bound = smooth(max_bound, smooth_len)
                fillin = pl.fill_between(xdata,min_bound,max_bound,
                        facecolor=color)
                fillin.set_alpha(0.5)
    
        ### If xlim and ylim are set explicitly.  If not, use smart defaults
        ### using extrema
        if xlim:
            pl.xlim(xlim)
        else:
            pl.xlim(extrema[0:2])
        if ylim:
            pl.ylim(ylim)
        else:
            if log_y_axis=="On":
                y_log_range = 1.2*np.log10(extrema[3]/extrema[2])
                ### To assure there is a labeled tick mark on the y-axis
                if y_log_range < 1.:
                    y_log_range = 1.
                pl.ylim([extrema[2],extrema[2]*10**y_log_range])
            else:
                pl.ylim([0.,1.2*extrema[3]])


        ### Make a legend
        ### Shink current plot by 20% to make room for external legend
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

        ### Put a legend to the right of the current axis
        ### Reverse the order of the entries, so colors match order plotted
        handles, labels = ax.get_legend_handles_labels()
        legend = ax.legend(handles[::-1], labels[::-1],
                          loc='center left', bbox_to_anchor=(1, 0.5))

        ### Make the legend small
        ltext  = legend.get_texts()
        pl.setp(ltext, fontsize='xx-small')

        ### Set the axis labels and save
        pl.xlabel(x_field_axis_label)
        if not y_field_axis_label:
            y_field_axis_label=y_field_index[0]
        if fractional:
            pl.ylabel(y_field_axis_label + ", as Fraction of Total")
        else:
            pl.ylabel(y_field_axis_label)
        pl.suptitle("Non-Stacked Quantities for " + self.filename)
        pl.savefig(filename)
        pl.clf()
        
    def plot_stack(self, field_label, y_field_index, 
                   y_field_axis_label="", x_field_index='Cycle', 
                   x_field_axis_label="Cycle Number", 
                   filename="performance.png", repeated_field="", 
                   log_y_axis="Auto", smooth_len=0, fractional=False,
                   xlim=[], ylim=[]):
        """
        Produce a plot for the given label/indices where each quantity is 
        stacked on top of the previous quantity.
    
        Parameters
        ----------
        field_label : array_like of strings
            The labels of the fields you wish to plot.  If you wish to plot
            a single field, then use plot_quantity instead.
        y_field_index : string or array_like of strings
            The index of the field you wish to plot on the y axis. 
            Ex: "Mean Time", "Stddev Time", "Min Time", "Max Time",
            "Cell Updates", "Num Grids", "Updates/processor/sec".
            If you have a single value for many field_labels, it is assumed
            that such a value will index all of them.  If you have an array_like
            structure of strings, each one will index the corresponding key in 
            field_lable.  
        y_field_axis_label : string, optional
            The y axis label on the resulting plot. Default = the y_field_index
            of the recarray.
        x_field_index : string, optional
            The index of the field you wish to plot on the x axis.
            Default = "Cycle"
        x_field_axis_label : string, optional
            The x axis label on the resulting plot. Default = "Cycle Number"
        filename : string, optional
            The filename where I will store your plotted data.
        repeated_field : string, optional
            If you have a regularly named set of fields you wish to plot 
            against each other (e.g. "Level 0", "Level 1", "Level 2"), then
            include the string here and they will all be included automatically
            and in order (e.g. "Level").  There are two special cases to this
            parameter.  "Non-Level" includes all fields without "Level" in the 
            name (or "Total"), and "All" includes all fields.
        log_y_axis : string, optional
            This controls whether the plot will use logarithmic units for the
            y axis.  Valid settings are "Auto", "On", and "Off".  When "Auto" is
            used, the code automatically recognizes when you have a maximum 
            y value more than 3 orders of magnitude greater than your minimum y
            value (for non-zero values) at which point it plots the y axis in 
            log units.
        smooth_len : int, optional
            This value controls the amount by which smoothing occurs over
            N consecutive cycles of data.  Default = 0 (i.e. None)
            Must be an odd number (recommended 5-11)
        fractional : bool, optional
            When set to true, the plotted values shown in fractions of the 
            equivalent field in "Total".
        xlim, ylim : array_like, optional
            Set these variables two 2-element lists/arrays in order to
            explicitly set your plot limits
    
        See Also
        --------
        plot_quantity, plot_maxmin
    
        Examples
        --------
        >>> plot_stack(["Level 0", "Level 1", "Level 2"], "Mean Time")
        """
        ax = pl.subplot(111)
        data = self.data
        extrema = np.zeros(5)

        ### Convert plots of single quantities to list format for homogenous 
        ### processing.
        if not is_listlike(field_label):
            field_label = [field_label]
        
        ### If a repeated_field, figure out how many repeated fields there are.
        ### including any that were defined in the original field_label arg.
        if repeated_field:
            key_list = data.keys()
            if repeated_field == "All":
                field_label = key_list

            elif repeated_field == "Non-Level":
                for key in key_list:
                    if not key.startswith("Level") and \
                       not key.startswith("Total"):
                        field_label.append(key)
            else:
                for key in key_list:
                    if key.startswith(repeated_field):
                        field_label.append(key)
        num_fields = len(field_label)
        field_label.sort()

        ### Group WriteAllData is usually very bumpy, so have it at the
        ### top of the stack, as opposed to the bottom where it makes it 
        ### difficult to read other quantities
        if field_label.__contains__("Group WriteAllData"):
            field_label.remove("Group WriteAllData")
            field_label.append("Group WriteAllData")
    
        ### If y_field_index is a single index, then replace it with a list of
        ### identical indices
        if not is_listlike(y_field_index):
            y_field_index = num_fields*[y_field_index]
    
        ### Since we're stacking (and making plots from a bottom bound to an
        ### upper bound for each y value, we need to cumulate it as we stack.
        ### ydata_cum is the same size as xdata, but it has two indices, one
        ### for the bottom bound of each stacked quantity and one for the top 
        ### bound.  It is cumulatively added as we loop through the plotted
        ### quantities.
        xdata = data[field_label[0]][x_field_index]
        ydata_cum = np.zeros((2,len(xdata)))

        ### Create a normalization vector to use on each vector
        ### before plotting.  In non-fractional case, this vector is 1.
        if fractional:
            norm = data['Total']
        else:
            records = [('Cycle', 'float'), ('Mean Time', 'float'),
                       ('Stddev Time', 'float'), ('Min Time', 'float'),
                       ('Max Time', 'float'), ('Cell Updates', 'float'),
                       ('Num Grids', 'float'), 
                       ('Updates/processor/sec', 'float')]
            norm = np.ones(data['Total'].shape, dtype=records)    
    
        ### Loop through the y datasets to figure out the extrema
        for i in range(len(field_label)):
            xdata = data[field_label[i]][x_field_index]
            ydata = data[field_label[i]][y_field_index[i]] / \
                    norm[y_field_index[i]]
            if smooth_len:
                ydata = smooth(ydata, smooth_len)
            ydata_cum[1] += ydata
            extrema = preserve_extrema(extrema,xdata,ydata_cum[1])
        if log_y_axis=="Auto":
            if extrema[3]/extrema[2] > 1e3:
                log_y_axis="On"
            else:
                log_y_axis="Off"
    
        ### Reset the cumulative ydata array to have the lowest value found
        ### or zero (for linear plots)
        ydata_cum = np.zeros((2,len(xdata)))
        if log_y_axis == "On":
            ydata_cum[0] += extrema[2]
    
        for i in range(0,num_fields):
            ydata = data[field_label[i]][y_field_index[i]] / \
                    norm[y_field_index[i]]
            if smooth_len:
                ydata = smooth(ydata, smooth_len)
            ydata_cum[1] += ydata
            color = cm.jet(1.*i/num_fields)
            pl.fill_between(xdata,ydata_cum[0],ydata_cum[1],color=color)
            if log_y_axis=="On":
                pl.semilogy(xdata,ydata_cum[1],color=color,label=field_label[i])
            else:
                pl.plot(xdata,ydata_cum[1],color=color,label=field_label[i])
            # Move our top bound to the bottom bound for our next iteration
            ydata_cum[0] = ydata_cum[1]
    
        ### If xlim and ylim are set explicitly.  If not, use smart defaults
        ### using extrema
        if xlim:
            pl.xlim(xlim)
        else:
            pl.xlim(extrema[0:2])
        if ylim:
            pl.ylim(ylim)
        else:
            if log_y_axis=="On":
                y_log_range = 1.2*np.log10(extrema[3]/extrema[2])
                ### To assure there is a labeled tick mark on the y-axis
                if y_log_range < 1.:
                    y_log_range = 1.
                pl.ylim([extrema[2],extrema[2]*10**y_log_range])
            else:
                pl.ylim([0.,1.2*extrema[3]])

        ### Make a legend
        ### Shink current plot by 20% to make room for external legend
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

        ### Put a legend to the right of the current axis
        ### Reverse the order of the entries, so colors match order plotted
        handles, labels = ax.get_legend_handles_labels()
        legend = ax.legend(handles[::-1], labels[::-1],
                          loc='center left', bbox_to_anchor=(1, 0.5))

        ### Make the legend small
        ltext  = legend.get_texts()
        pl.setp(ltext, fontsize='xx-small')

        ### Set the axis labels and save
        pl.xlabel(x_field_axis_label)
        if not y_field_axis_label:
            y_field_axis_label=y_field_index[0]
        if fractional:
            pl.ylabel(y_field_axis_label + ", as Fraction of Total")
        else:
            pl.ylabel(y_field_axis_label)
        pl.suptitle("Stacked Quantities for " + self.filename)
        pl.savefig(filename)
        pl.clf()

    def plot_maxmin(self, field_label, 
                    y_field_axis_label="Max Time - Min Time (sec)",
                    x_field_index='Cycle', x_field_axis_label="Cycle Number",
                    filename="performance.png", repeated_field="", 
                    log_y_axis="Auto", smooth_len=0, fractional=False, 
                    xlim=[], ylim=[]):
        """
        Produce a plot showing how well load balancing is working across 
        different sub-processes or levels.  It subtracts the minimum time 
        taken per processor from the maximum time taken per processor for 
        a task.
    
        Parameters
        ----------
        field_label : string or array_like of strings
            The label of the field you wish to plot.  If you wish to plot
            multiple fields, enumerate them in an array or tuple. Ex: "Level 0"
        y_field_axis_label : string, optional
            The y axis label on the resulting plot. 
            Default = "Max Time - Min Time (sec)"
        x_field_index : string, optional
            The index of the field you wish to plot on the x axis.
            Default = "Cycle"
        x_field_axis_label : string, optional
            The x axis label on the resulting plot. Default = "Cycle Number"
        filename : string, optional
            The filename where I will store your plotted data.
        repeated_field : string, optional
            If you have a regularly named set of fields you wish to plot 
            against each other (e.g. "Level 0", "Level 1", "Level 2"), then
            include the string here and they will all be included automatically
            and in order (e.g. "Level").  There are two special cases to this
            parameter.  "Non-Level" includes all fields without "Level" in the 
            name (or "Total"), and "All" includes all fields.
        log_y_axis : string, optional
            This controls whether the plot will use logarithmic units for the
            y axis.  Valid settings are "Auto", "On", and "Off".  When "Auto" is
            used, the code automatically recognizes when you have a maximum 
            y value more than 3 orders of magnitude greater than your minimum y
            value (for non-zero values) at which point it plots the y axis in 
            log units.
        smooth_len : int, optional
            This value controls the amount by which smoothing occurs over
            N consecutive cycles of data.  Default = 0 (i.e. None). 
            Must be an odd number (recommended 5-11)
        fractional : bool, optional
            When set to true, the plotted values shown in fractions of the 
            mean time for that field_label.
        xlim, ylim : array_like, optional
            Set these variables two 2-element lists/arrays in order to
            explicitly set your plot limits
    
        See Also
        --------
        plot_quantity, plot_stack
    
        Examples
        --------
        To produce a plot of the showing how well the load is balanced on each 
        level over the course of the simulation, and save this to 
        performance.png:
    
        >>> plot_maxmin([], repeated_field="All")
        """
        ax = pl.subplot(111)
        data = self.data
        extrema = np.zeros(5)
    
        ### Convert plots of single quantities to list format for homogenous 
        ### processing.
        if not is_listlike(field_label):
            field_label = [field_label]
    
        ### If there is a repeated_field, figure out how many fields 
        ### there are including any that were defined in the original 
        ### field_label argument.
        if repeated_field:
            key_list = data.keys()
            if repeated_field == "All":
                field_label = key_list

            elif repeated_field == "Non-Level":
                for key in key_list:
                    if not key.startswith("Level") and \
                       not key.startswith("Total"):
                        field_label.append(key)
            else:
                for key in key_list:
                    if key.startswith(repeated_field):
                        field_label.append(key)
        num_fields = len(field_label)
        field_label.sort()
    
        ### Loop through the y datasets to figure out the extrema
        for i in range(len(field_label)):
            xdata = data[field_label[i]][x_field_index]
            ydata = data[field_label[i]]["Max Time"] - \
                    data[field_label[i]]["Min Time"]
            if fractional:
                ydata /= data[field_label[i]]["Mean Time"]
                ydata[ydata != ydata]=0.0
            if smooth_len:
                ydata = smooth(ydata,smooth_len)
            extrema = preserve_extrema(extrema,xdata,ydata)
        if log_y_axis=="Auto":
            if extrema[3]/extrema[2] > 1e3:
                log_y_axis="On"
            else:
                log_y_axis="Off"
    
        ### Now for the actual plotting
        for i in range(len(field_label)):
            color = cm.jet(1.*i/num_fields)
            xdata = data[field_label[i]][x_field_index]
            ydata = data[field_label[i]]["Max Time"] - \
                    data[field_label[i]]["Min Time"]
            if fractional:
                ydata /= data[field_label[i]]["Mean Time"]
                ydata[ydata != ydata]=0.0
            if smooth_len:
                ydata = smooth(ydata,smooth_len)
            if log_y_axis=="On":
                try:
                    pl.semilogy(xdata,ydata,color=color,label=field_label[i])
                except:
                    pl.plot(xdata,ydata,color=color,label=field_label[i])
            else:
                pl.plot(xdata,ydata,color=color,label=field_label[i])
    
        ### If xlim and ylim are set explicitly.  If not, use smart defaults
        ### using extrema
        if xlim:
            pl.xlim(xlim)
        else:
            pl.xlim(extrema[0:2])
        if ylim:
            pl.ylim(ylim)
        else:
            if log_y_axis=="On":
                y_log_range = 1.2*np.log10(extrema[3]/extrema[2])
                ### To assure there is a labeled tick mark on the y-axis
                if y_log_range < 1.:
                    y_log_range = 1.
                pl.ylim([extrema[2],extrema[2]*10**y_log_range])
            else:
                pl.ylim([0.,1.2*extrema[3]])


        ### Make a legend
        ### Shink current plot by 20% to make room for external legend
        box = ax.get_position()
        ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])

        ### Put a legend to the right of the current axis
        ### Reverse the order of the entries, so colors match order plotted
        handles, labels = ax.get_legend_handles_labels()
        legend = ax.legend(handles[::-1], labels[::-1],
                          loc='center left', bbox_to_anchor=(1, 0.5))

        ### Make the legend small
        ltext  = legend.get_texts()
        pl.setp(ltext, fontsize='xx-small')

        ### Set the axis labels and save
        pl.xlabel(x_field_axis_label)
        if not y_field_axis_label:
            y_field_axis_label=y_field_index[0]
        if fractional:
            pl.ylabel(y_field_axis_label + ", as Fraction of Mean Time")
        else:
            pl.ylabel(y_field_axis_label)
        pl.suptitle("Variance of Load Balance for " + self.filename)
        pl.savefig(filename)
        pl.clf()
        
        
### *** DEFAULT COMMAND LINE BEHAVIOR ***

### If performance_tools.py is invoked from the command line, these are its 
### default behaviors:
###
### -- Build a perform object from the provided filename
### -- Make some handy plots and save them

if __name__ == "__main__":
    from optparse import OptionParser
    usage = "usage: %prog <.out file>"
    parser = OptionParser(usage)
    parser.add_option("-s","--smooth",dest="nsmooth",type='int',
                      default=0,
                      help="Set number of cycles over which to smooth (odd)")
    (opts, args) = parser.parse_args()
    if len(args) != 1:
        parser.error("incorrect number of arguments")
    filename = args[0]

    ### Build a perform object from the data and generate some default plots.
    ### If you want to generate your own plots of different quantities, 
    ### add them below.
    p = perform(filename)

    ### Plot the mean time taken per processor on each level and on the 
    ### simulation as a whole (Total).  Overplot in lighter tones are the 
    ### minimum and maximum time taken on a processor for each of these 
    ### quantities.
    p.plot_quantity('Total', 'Mean Time', y_field_axis_label="Mean Time (sec)", 
                    repeated_field="Level", filename='p1.png',
                    smooth_len=opts.nsmooth, bounds='minmax')

    ### Plot the mean time taken per processor on each level and on the 
    ### simulation as a whole (Total).  Overplot in lighter tones are the 
    ### minimum and maximum time taken on a processor for each of these 
    ### quantities.  Scale everything to be as a fraction of the total 
    ### time taken.
    p.plot_quantity('Total', 'Mean Time', y_field_axis_label="Mean Time (sec)", 
                    repeated_field="Level", filename='p2.png',
                    smooth_len=opts.nsmooth, bounds='minmax', fractional=True)

    ### Plot the mean time taken per processor on each level.  Stack each 
    ### level on the previous layer cumulatively.  
    p.plot_stack([], 'Mean Time', y_field_axis_label="Mean Time (sec)", 
                 repeated_field="Level", filename='p3.png', 
                 smooth_len=opts.nsmooth)

    ### Plot the mean time taken per processor performing the RebuildHiearchy
    ### and SolveHydroEquations tasks.  Stack each level on the previous 
    ### layer cumulatively.  Scale everything to be as a fraction of the
    ### total time taken.
    p.plot_stack([], 'Mean Time', y_field_axis_label="Mean Time", 
                 repeated_field="Non-Level", filename='p4.png', 
                 smooth_len=opts.nsmooth, fractional=True, ylim=[0.0,1.0])

    ### Plot the number of cell updates generated at each level and stack them
    ### cumulatively.
    p.plot_stack([], 'Cell Updates', y_field_axis_label='Number of Cell Updates', 
                 repeated_field="Level", filename='p5.png', 
                 smooth_len=opts.nsmooth)

    ### Plot the efficiency (updates/processor/sec) for each level and for
    ### the simulation as a whole versus time.
    p.plot_quantity(['Total'], 'Updates/processor/sec', 
                    y_field_axis_label='Efficiency (cell updates/sec/processor)', 
                    repeated_field='Level', filename='p6.png', 
                    smooth_len=opts.nsmooth)

    ### Plot the load balancing (Max Time - Min Time) for all subprocesses 
    ### and levels of the simulation as a whole versus time.  
    p.plot_maxmin([], repeated_field="All", filename='p7.png', fractional=False,
                  smooth_len=opts.nsmooth)

    ### Plot the load balancing (Max Time - Min Time) for all subprocesses 
    ### and levels of the simulation as a whole versus time.  Normalize them 
    ### by the mean time taken for each process.
    p.plot_maxmin([], repeated_field="All", filename='p8.png', fractional=True,
                  smooth_len=opts.nsmooth)