libStatGen Software  1
BamIndex.h
1 /*
2  * Copyright (C) 2010-2012 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifndef __BAM_INDEX_H__
19 #define __BAM_INDEX_H__
20 
21 #include <stdint.h>
22 #include <vector>
23 #include <map>
24 #include <stdlib.h>
25 
26 #include "IndexBase.h"
27 
28 #include "InputFile.h"
29 #include "SamStatus.h"
30 
31 class BamIndex : public IndexBase
32 {
33 public:
34 
35  BamIndex();
36  virtual ~BamIndex();
37 
38  /// Reset the member data for a new index file.
39  virtual void resetIndex();
40 
41  // Read & parse the specified index file.
42  /// \param filename the bam index file to be read.
43  /// \return the status of the read.
44  SamStatus::Status readIndex(const char* filename);
45 
46  /// Get the list of chunks associated with this region.
47  /// For an entire reference ID, set start and end to -1.
48  /// To start at the beginning of the region, set start to 0/-1.
49  /// To go to the end of the region, set end to -1.
50  bool getChunksForRegion(int32_t refID, int32_t start, int32_t end,
51  SortedChunkList& chunkList);
52 
53  uint64_t getMaxOffset() const;
54 
55  /// Get the minimum and maximum file offsets for the specfied reference ID.
56  /// \param refID the reference ID to locate in the file.
57  /// \param minOffset returns the min file offset for the specified reference
58  /// \param maxOffset returns the max file offset for the specified reference
59  /// \return whether or not the reference was found in the file
60  bool getReferenceMinMax(int32_t refID,
61  uint64_t& minOffset,
62  uint64_t& maxOffset) const;
63 
64  /// Get the number of mapped reads for this reference id. Returns -1 for
65  /// out of range refIDs.
66  /// \param refID reference ID for which to extract the number of mapped reads.
67  /// \return number of mapped reads for the specified reference id.
68  int32_t getNumMappedReads(int32_t refID);
69 
70  /// Get the number of unmapped reads for this reference id. Returns -1 for
71  /// out of range refIDs.
72  /// \param refID reference ID for which to extract the number of unmapped reads.
73  /// \return number of unmapped reads for the specified reference id
74  int32_t getNumUnMappedReads(int32_t refID);
75 
76  /// Print the index information.
77  /// \param refID reference ID for which to print info for. -1 means print for all references.
78  /// \param summary whether or not to just print a summary (defaults to false). The summary just contains summary info for each reference and not every bin/chunk.
79  void printIndex(int32_t refID, bool summary = false);
80 
81  // Number of reference sequences.
82  /// The number used for an unknown number of reads.
83  static const int32_t UNKNOWN_NUM_READS = -1;
84 
85  /// The number used for the reference id of unmapped reads.
86  static const int32_t REF_ID_UNMAPPED = -1;
87 
88  /// The number used to indicate that all reference ids should be used.
89  static const int32_t REF_ID_ALL = -2;
90 
91 private:
92  uint64_t maxOverallOffset;
93 
94  int32_t myUnMappedNumReads;
95 };
96 
97 
98 #endif
SortedChunkList
Definition: IndexBase.h:49
BamIndex
Definition: BamIndex.h:32
BamIndex::getChunksForRegion
bool getChunksForRegion(int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList)
Get the list of chunks associated with this region.
Definition: BamIndex.cpp:218
BamIndex::UNKNOWN_NUM_READS
static const int32_t UNKNOWN_NUM_READS
The number used for an unknown number of reads.
Definition: BamIndex.h:83
BamIndex::getNumMappedReads
int32_t getNumMappedReads(int32_t refID)
Get the number of mapped reads for this reference id.
Definition: BamIndex.cpp:355
BamIndex::getReferenceMinMax
bool getReferenceMinMax(int32_t refID, uint64_t &minOffset, uint64_t &maxOffset) const
Get the minimum and maximum file offsets for the specfied reference ID.
Definition: BamIndex.cpp:337
BamIndex::REF_ID_ALL
static const int32_t REF_ID_ALL
The number used to indicate that all reference ids should be used.
Definition: BamIndex.h:89
BamIndex::printIndex
void printIndex(int32_t refID, bool summary=false)
Print the index information.
Definition: BamIndex.cpp:398
BamIndex::getNumUnMappedReads
int32_t getNumUnMappedReads(int32_t refID)
Get the number of unmapped reads for this reference id.
Definition: BamIndex.cpp:377
IndexBase
Definition: IndexBase.h:63
StatGenStatus::Status
Status
Return value enum for StatGenFile methods.
Definition: StatGenStatus.h:32
InputFile.h
BamIndex::readIndex
SamStatus::Status readIndex(const char *filename)
Definition: BamIndex.cpp:45
BamIndex::resetIndex
virtual void resetIndex()
Reset the member data for a new index file.
Definition: BamIndex.cpp:35
BamIndex::REF_ID_UNMAPPED
static const int32_t REF_ID_UNMAPPED
The number used for the reference id of unmapped reads.
Definition: BamIndex.h:86