libStatGen Software  1
SamStatistics.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "SamStatistics.h"
19 #include <iomanip>
20 #include "SamFlag.h"
21 
22 SamStatistics::SamStatistics()
23 {
24  reset();
25 }
26 
27 SamStatistics::~SamStatistics()
28 {
29  reset();
30 }
31 
32 void SamStatistics::reset()
33 {
34  myReadCount = 0;
35  myMappedReadCount = 0;
36  myPairedReadCount = 0;
37  myProperPairedReadCount = 0;
38  myBaseCount = 0;
39  myMappedReadBases = 0;
40  myDupReadCount = 0;
41  myQCFailureReadCount = 0;
42 }
43 
44 
45 bool SamStatistics::updateStatistics(SamRecord& samRecord)
46 {
47  // Each record has one read, so update the read count.
48  ++myReadCount;
49 
50  int32_t readLen = samRecord.getReadLength();
51 
52  // Get the flag to determine the type or
53  // read (mapped, paired, proper paired).
54  uint16_t flag = samRecord.getFlag();
55 
56  // If the read is mapped, update the mapped c
57  if(SamFlag::isMapped(flag))
58  {
59  ++myMappedReadCount;
60  myMappedReadBases += readLen;
61  }
62  if(SamFlag::isPaired(flag))
63  {
64  ++myPairedReadCount;
65  if(SamFlag::isProperPair(flag))
66  {
67  ++myProperPairedReadCount;
68  }
69  }
70  if(SamFlag::isDuplicate(flag))
71  {
72  ++myDupReadCount;
73  }
74  if(SamFlag::isQCFailure(flag))
75  {
76  ++myQCFailureReadCount;
77  }
78 
79  // Increment the total number of bases.
80  myBaseCount += readLen;
81 
82  return(true);
83 }
84 
85 
86 void SamStatistics::print()
87 {
88  double DIVIDE_UNITS = 1000000;
89  std::string units = "(e6)";
90 
91  std::cerr << std::fixed << std::setprecision(2);
92 
93  // If total reads is less than DIVIDE_UNITS, just show the straight number.
94  if(myReadCount < DIVIDE_UNITS)
95  {
96  DIVIDE_UNITS = 1;
97  units.clear();
98  }
99 
100  // Read Counts
101  std::cerr << "TotalReads" << units << "\t"
102  << myReadCount/DIVIDE_UNITS << std::endl;
103  std::cerr << "MappedReads" << units << "\t"
104  << myMappedReadCount/DIVIDE_UNITS << std::endl;
105  std::cerr << "PairedReads" << units << "\t"
106  << myPairedReadCount/DIVIDE_UNITS << std::endl;
107  std::cerr << "ProperPair" << units << "\t"
108  << myProperPairedReadCount/DIVIDE_UNITS << std::endl;
109  std::cerr << "DuplicateReads" << units << "\t"
110  << myDupReadCount/DIVIDE_UNITS << std::endl;
111  std::cerr << "QCFailureReads" << units << "\t"
112  << myQCFailureReadCount/DIVIDE_UNITS << std::endl;
113  std::cerr << std::endl;
114 
115  // Read Percentages
116  std::cerr << "MappingRate(%)\t"
117  << 100 * myMappedReadCount/(double)myReadCount << std::endl;
118  std::cerr << "PairedReads(%)\t"
119  << 100 * myPairedReadCount/(double)myReadCount << std::endl;
120  std::cerr << "ProperPair(%)\t"
121  << 100 * myProperPairedReadCount/(double)myReadCount << std::endl;
122  std::cerr << "DupRate(%)\t"
123  << 100 * myDupReadCount/(double)myReadCount << std::endl;
124  std::cerr << "QCFailRate(%)\t"
125  << 100 * myQCFailureReadCount/(double)myReadCount << std::endl;
126  std::cerr << std::endl;
127 
128  // Base Counts
129  std::cerr << "TotalBases" << units << "\t"
130  << myBaseCount/DIVIDE_UNITS << std::endl;
131  std::cerr << "BasesInMappedReads" << units << "\t"
132  << myMappedReadBases/DIVIDE_UNITS << std::endl;
133 }
SamRecord::getFlag
uint16_t getFlag()
Get the flag (FLAG).
Definition: SamRecord.cpp:1372
SamRecord::getReadLength
int32_t getReadLength()
Get the length of the read.
Definition: SamRecord.cpp:1379
SamRecord
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52