1. Shlomi Fish
  2. perl-Statistics-Descriptive

Commits

Shlomi Fish  committed e29792b

Returning undef() or empty list upon lack of data.

Thanks to Shawn Laffan for the patch - fixes:

https://rt.cpan.org/Ticket/Display.html?id=74693

  • Participants
  • Parent commits 6653104
  • Branches default

Comments (0)

Files changed (3)

File Statistics-Descriptive/Changes

View file
  • Ignore whitespace
 Revision history for Perl extension Statistics::Descriptive.
 
+3.0300      February 11, 2012
+    - Now mean() and median() and other routines return undef() if there are
+    no data.
+    - Somewhat incompatible change: some methods that returned undef() under
+    list context now return an empty list (so it will be false).
+        - it is generally not recommended to call such methods in list context
+        as they should always be called in scalar context.
+    - Resolves https://rt.cpan.org/Ticket/Display.html?id=74693
+        - thanks to Shawn Laffan for the report and the patch.
+
 3.0203      November 17, 2011
     - Fix https://rt.cpan.org/Ticket/Display.html?id=72495 .
         - percentile should not die and should return undef if there are

File Statistics-Descriptive/lib/Statistics/Descriptive.pm

View file
  • Ignore whitespace
 ##Define the fields to be used as methods
 %fields = (
   count			=> 0,
-  mean			=> 0,
-  sum			=> 0,
-  sumsq			=> 0,
+  mean			=> undef,
+  sum			=> undef,
+  sumsq			=> undef,
   min			=> undef,
   max			=> undef,
   mindex		=> undef,
   maxdex		=> undef,
   sample_range		=> undef,
-  variance => undef,
+  variance              => undef,
   );
 
 __PACKAGE__->_make_accessors( [ grep { $_ ne "variance" } keys(%fields) ] );
   $self->count($count);
   ##indicator the value is not cached.  Variance isn't commonly enough
   ##used to recompute every single data add.
-  $self->_variance(undef());
+  $self->_variance(undef);
   return 1;
 }
 
 sub standard_deviation {
   my $self = shift;  ##Myself
-  return undef if (!$self->count());
+  return if (!$self->count());
   return sqrt($self->variance());
 }
 
 ##Return variance; if needed, compute and cache it.
 sub variance {
   my $self = shift;  ##Myself
+
+  return if (!$self->count());
+  
   my $div = @_ ? 0 : 1;
   my $count = $self->count();
   if ($count < 1 + $div) {
 
     if ((! $count) || ($percentile < 100 / $count))
     {
-        return undef;
+        return;  #  allow for both scalar and list context
     }
 
     $self->sort_data();
 sub median {
     my $self = shift;
 
+    return if !$self->count;    
+    
     ##Cached?
     if (! defined($self->_median()))
     {
        return;
     }
     
+    #  check data count after the args are checked - should help debugging
+    return if !$self->count;  
+    
     $self->sort_data();
 
     return $self->_data->[0] if ( $QuantileNumber == 0 );
         ($lower,$upper) = ($_[0],$_[1]);
     }
 
+    #  check data count after the args
+    return if !$self->count;    
+
     ##Cache
     my $thistm = join ':',$lower,$upper;
     my $cache = $self->_trimmed_mean_cache();
 
 sub geometric_mean {
     my $self = shift;
+    
+    return if !$self->count;
 
     if (!defined($self->_geometric_mean()))
     {

File Statistics-Descriptive/t/descr.t

View file
  • Ignore whitespace
 use strict;
 use warnings;
 
-use Test::More tests => 29;
+use Test::More tests => 50;
 
 use Benchmark;
 use Statistics::Descriptive;
     my $stat = Statistics::Descriptive::Full->new();
     my @results = $stat->least_squares_fit();
     # TEST
-    ok (!scalar(@results), "Results on an non-filled object are empty.");
+    ok (!scalar(@results), "Least-squares results on a non-filled object are empty.");
 
     # test #2
     # data are y = 2*x - 1
     # TEST
     ok (!defined($ret), 'Returns undef and does not die.');
 }
+
+
+
+#  test stats when no data have been added
+{
+    my $stat = Statistics::Descriptive::Full->new();
+    my ($result, $str);
+
+    #  An accessor method for _permitted would be handy,
+    #  or one to get all the stats methods
+    my @methods = qw {
+        mean sum variance standard_deviation
+        min mindex max maxdex sample_range
+        skewness kurtosis median
+        harmonic_mean geometric_mean
+        mode least_squares_fit
+        percentile frequency_distribution 
+    };
+    #  least_squares_fit is handled in an earlier test, so is actually a duplicate here
+    
+    #diag 'Results are undef when no data added';
+    #  need to update next line when new methods are tested here
+    # TEST:$method_count=18
+    foreach my $method (sort @methods) {  
+        $result = $stat->$method;
+        # TEST*$method_count
+        ok (!defined ($result), "$method is undef when object has no data.");
+    }
+
+    #  quantile and trimmed_mean require valid args, so don't test in the method loop
+    my $method = 'quantile';
+    $result = $stat->$method(1);
+    # TEST
+    ok (!defined ($result), "$method is undef when object has no data.");
+    
+    $method = 'trimmed_mean';
+    $result = $stat->$method(0.1);
+    # TEST
+    ok (!defined ($result), "$method is undef when object has no data.");    
+}
+
+#  test SD when only one value added
+{
+    my $stat = Statistics::Descriptive::Full->new();
+    $stat->add_data( 1 );
+
+    my $result = $stat->standard_deviation();
+    # TEST
+    ok ($result == 0, "SD is zero when object has one record.");
+}