multovl  1.3
Multiple overlaps of genomic regions
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
stat.hh
1 /* <LICENSE>
2 License for the MULTOVL multiple genomic overlap tools
3 
4 Copyright (c) 2007-2012, Dr Andras Aszodi,
5 Campus Science Support Facilities GmbH (CSF),
6 Dr-Bohr-Gasse 3, A-1030 Vienna, Austria, Europe.
7 All rights reserved.
8 
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 
13  * Redistributions of source code must retain the above copyright notice,
14  this list of conditions and the following disclaimer.
15  * Redistributions in binary form must reproduce the above copyright notice,
16  this list of conditions and the following disclaimer in the documentation
17  and/or other materials provided with the distribution.
18  * Neither the name of the Campus Science Support Facilities GmbH
19  nor the names of its contributors may be used to endorse
20  or promote products derived from this software without specific prior
21  written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
24 AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
26 AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
27 THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
30 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
31 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 </LICENSE> */
35 #ifndef MULTOVL_PROB_STAT_HEADER
36 #define MULTOVL_PROB_STAT_HEADER
37 
38 // == HEADER stat.hh ==
39 
43 
44 // -- Library headers --
45 
46 #include "empirdistr.hh"
47 
48 // -- Boost headers --
49 
50 #include "boost/lexical_cast.hpp"
51 
52 // -- Standard headers --
53 
54 #include <limits>
55 #include <map>
56 
57 // == CLASSES ==
58 
59 namespace multovl {
60 namespace prob {
61 
67 class Stat
68 {
69 public:
70 
71  // -- Member classes --
72 
76  {
77  public:
78  explicit NotfoundException(
79  unsigned int multiplicity
80  ):
81  _msg("Multiplicity " +
82  boost::lexical_cast<std::string>(multiplicity) +
83  " not found"
84  ) {}
85  const std::string error_message() const
86  {
87  return _msg;
88  }
89 
90  private:
91  std::string _msg;
92 
93  };
94  // END of member class NotFoundException
95 
96  // utility to keep together the actual total overlap length
97  // and the empirically estimated null distribution
98  // for a given multiplicity
99  class Distr
100  {
101  public:
102 
103  // Init. For the time being the default /ncell/ will be used
104  explicit Distr(unsigned int ncell = 0):
105  _actual(0.0),
106  _nulldistr(ncell),
107  _pvalue(0.0),
108  _zscore(0.0),
109  _valid(false)
110  {}
111 
112  // Init straight away by invoking add(val, is_actual)
113  Distr(double val, bool is_actual):
114  _actual(0.0),
115  _nulldistr(),
116  _pvalue(0.0),
117  _zscore(0.0)
118  {
119  add(val, is_actual);
120  }
121 
122  // Adds a value
123  // \param val the value to be added
124  // \param act if true, then the /actual/ member
125  // will be set to /val/, otherwise it will be added
126  // to the /nulldistr/ estimate
127  void add(double val, bool is_actual)
128  {
129  if (is_actual) _actual = val;
130  else _nulldistr.add(val);
131  _valid = false;
132  }
133 
134  // Evaluates the nulldistr and the actual/nulldistr
135  // comparison quantities such as p-value, z-score...
136  // If there were enough data, then is_valid() will return /true/ afterwards
137  void evaluate()
138  {
139  _nulldistr.evaluate();
140  try {
141  double c = _nulldistr.cdf(_actual);
142  _pvalue = (c >= 0.5)? 1.0 - c: c;
143  _zscore = (_actual - _nulldistr.mean())/_nulldistr.std_dev();
144  _valid = true;
145  } catch (const EmpirDistr::Exception&) {
146  _valid = false;
147  }
148  }
149 
150  // accessors
151  bool is_valid() const { return _valid; }
152  double actual() const { return _actual; }
153  const EmpirDistr& nulldistr() const { return _nulldistr; }
154  double p_value() const { return _pvalue; }
155  double z_score() const { return _zscore; }
156 
157  private:
158  // data
159  double _actual;
160  EmpirDistr _nulldistr;
161  double _pvalue;
162  double _zscore;
163  bool _valid;
164  };
165 
167  Stat():
168  _distrs(),
169  _minmult(std::numeric_limits<unsigned int>::max()),
170  _maxmult(0)
171  {}
172 
180  void add(unsigned int multiplicity,
181  double val,
182  bool is_actual = false);
183 
185  void evaluate();
186 
191  const Distr& distr(unsigned int multiplicity) const throw(NotfoundException);
192 
194  unsigned int min_mult() const { return _minmult; }
195 
197  unsigned int max_mult() const { return _maxmult; }
198 
199 private:
200 
201  // multiplicity => data distribution
202  typedef std::map<unsigned int, Distr> distrs_t;
203  typedef distrs_t::iterator diter_t;
204 
205  distrs_t _distrs;
206  unsigned int _minmult, _maxmult;
207 };
208 
209 } // namespace prob
210 } // namespace multovl
211 
212 #endif // MULTOVL_PROB_STAT_HEADER
213 
double std_dev() const
void add(unsigned int multiplicity, double val, bool is_actual=false)
Stat()
Init to default (empty).
Definition: stat.hh:167
EmpirDistr & add(double x)
void evaluate()
evaluate(): normalise the counts and make the CDF.
Definition: empirdistr.hh:69
const Distr & distr(unsigned int multiplicity) const
unsigned int min_mult() const
Definition: stat.hh:194
void evaluate()
Evaluates the distributions inside.
Definition: stat.hh:67
Definition: empirdistr.hh:58
double cdf(double x) const
Definition: stat.hh:99
unsigned int max_mult() const
Definition: stat.hh:197