multovl  1.3
Multiple overlaps of genomic regions
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Pages
multioverlap.hh
Go to the documentation of this file.
1 /* <LICENSE>
2 License for the MULTOVL multiple genomic overlap tools
3 
4 Copyright (c) 2007-2012, Dr Andras Aszodi,
5 Campus Science Support Facilities GmbH (CSF),
6 Dr-Bohr-Gasse 3, A-1030 Vienna, Austria, Europe.
7 All rights reserved.
8 
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are
11 met:
12 
13  * Redistributions of source code must retain the above copyright notice,
14  this list of conditions and the following disclaimer.
15  * Redistributions in binary form must reproduce the above copyright notice,
16  this list of conditions and the following disclaimer in the documentation
17  and/or other materials provided with the distribution.
18  * Neither the name of the Campus Science Support Facilities GmbH
19  nor the names of its contributors may be used to endorse
20  or promote products derived from this software without specific prior
21  written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS
24 AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
25 INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
26 AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
27 THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28 INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
29 NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
30 USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
31 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
32 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
33 THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34 </LICENSE> */
35 #ifndef MULTOVL_MULTIOVERLAP_HEADER
36 #define MULTOVL_MULTIOVERLAP_HEADER
37 
38 // == Header multioverlap.hh ==
39 
44 
45 // -- System headers --
46 
47 #include <string>
48 #include <map>
49 #include <set>
50 #include <vector>
51 #include <iostream>
52 #include <algorithm>
53 #include <iterator>
54 
55 // -- Boost headers --
56 
57 #include "boost/operators.hpp"
58 #include "boost/serialization/vector.hpp"
59 
60 // -- Own headers --
61 
62 #include "multireglimit.hh"
63 #include "multiregion.hh"
64 
65 // == Classes ==
66 
67 namespace multovl {
68 
76 {
77 private:
78 
79  typedef std::pair<unsigned int, unsigned int> uintpair_t;
80  typedef std::vector<uintpair_t> uintpairvec_t;
81 
82  // -- internal classes --
83 
89  class Filter
90  {
91  public:
92 
105  Filter(unsigned int ovlen, unsigned int minmult,
106  unsigned int maxmult, bool checksoli, bool intrack = true);
107 
120  bool accept_new_region(unsigned int mrstart, unsigned int mrend,
121  const ancregset_t& ancestors, unsigned int& mult) const;
122 
123  private:
124 
125  static unsigned int distinct_track_count(const ancregset_t& ancestors);
126 
127  unsigned int _ovlen, _minmult, _maxmult;
128  bool _solitary, _intrack;
129 
130  }; // class Filter
131 
132 public:
133 
139  class Counter
140  {
141  public:
142 
144  Counter(): _histo(), _total(0) {}
145 
147  void count(const MultiRegion& mr);
148 
150  unsigned int total() const { return _total; }
151 
153  Counter& operator+=(const Counter& other);
154 
159  std::string to_string() const;
160 
161  private:
162 
163  static
164  std::string get_key(
165  const std::vector<int>::const_iterator& from,
166  const std::vector<int>::const_iterator& to
167  );
168 
169  typedef std::map<std::string, unsigned int> histo_t;
170  histo_t _histo;
171  unsigned int _total;
172 
173  }; // class Counter
174 
175  typedef std::vector<MultiRegion> multiregvec_t;
176 
177  // -- methods --
178 
180  MultiOverlap(): MultiRegLimit(), _multiregions() {}
181 
183  MultiOverlap(const Region& region, unsigned int trackid):
184  MultiRegLimit(region, trackid), _multiregions()
185  {}
186 
203  unsigned int find_overlaps(unsigned int ovlen,
204  unsigned int minmult = 1, unsigned int maxmult = 0, bool intrack = true);
205 
216  unsigned int find_unionoverlaps(unsigned int ovlen,
217  unsigned int minmult = 2, unsigned int maxmult = 0);
218 
222  const multiregvec_t& overlaps() const { return _multiregions; }
223 
227  void overlap_stats(Counter& counter) const;
228 
229 private:
230 
231  // -- data
232  multiregvec_t _multiregions;
233 
234  // serialization
235  friend class boost::serialization::access;
236  template <class Archive>
237  void serialize(Archive& ar, const unsigned int version)
238  {
239  ar & boost::serialization::base_object<multovl::MultiRegLimit>(*this);
240  ar & _multiregions;
241  }
242 
243 };
244 
245 } // namespace multovl
246 
247 #endif // MULTOVL_MULTIOVERLAP_HEADER
Counter & operator+=(const Counter &other)
Merge the contents of another Counter object into the calling object.
MultiOverlap()
Init to empty.
Definition: multioverlap.hh:180
std::multiset< AncestorRegion > ancregset_t
Definition: ancregion.hh:134
MultiOverlap(const Region &region, unsigned int trackid)
Init to contain a region and trackid.
Definition: multioverlap.hh:183
void count(const MultiRegion &mr)
Count update based on the ancestor IDs in /mr/.
Region representing multiple overlaps for file-based "multovl".
Definition: multiregion.hh:62
Definition: multireglimit.hh:64
Counter()
Default ctor.
Definition: multioverlap.hh:144
const multiregvec_t & overlaps() const
Definition: multioverlap.hh:222
Instances of the Region class represent regions on a sequence. They have first and last coordinates...
Definition: region.hh:65
unsigned int find_overlaps(unsigned int ovlen, unsigned int minmult=1, unsigned int maxmult=0, bool intrack=true)
unsigned int total() const
Return the total count.
Definition: multioverlap.hh:150
void overlap_stats(Counter &counter) const
Definition: multioverlap.hh:75
unsigned int find_unionoverlaps(unsigned int ovlen, unsigned int minmult=2, unsigned int maxmult=0)
std::string to_string() const
Collection of multiple region limits.
Definition: multioverlap.hh:139