libStatGen Software  1
TestFilter.cpp
1 /*
2  * Copyright (C) 2011 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "TestFilter.h"
19 #include "TestValidate.h"
20 #include "SamFilter.h"
21 #include <assert.h>
22 
23 void testFilter()
24 {
25  // Call generic test which since the sam and bam are identical, should
26  // contain the same results.
27  FilterTest::testFilter(FilterTest::SAM);
28 #ifdef __ZLIB_AVAILABLE__
29  FilterTest::testFilter(FilterTest::BAM);
30 #endif
31 }
32 
33 
34 void FilterTest::testFilter(FileType inputType)
35 {
36  SamFile inSam;
37 
38  if(inputType == SAM)
39  {
40  assert(inSam.OpenForRead("testFiles/testSam.sam"));
41  }
42  else
43  {
44  assert(inSam.OpenForRead("testFiles/testBam.bam"));
45  }
46 
47  // Read the SAM Header.
48  SamFileHeader samHeader;
49  assert(inSam.ReadHeader(samHeader));
50  validateHeader(samHeader);
51 
52  SamRecord samRecord;
53  assert(inSam.ReadRecord(samHeader, samRecord) == true);
54  validateRead1(samRecord);
55 
56  // Clip the read, 2 from the front and 2 from the back, which causes 2D to
57  // be dropped.
58  assert(SamFilter::softClip(samRecord, 2, 2) == SamFilter::CLIPPED);
59  assert(samRecord.get0BasedPosition() == TestValidate::READ1_POS + 2);
60  std::string expectedCigar = "2S1M2S";
61  assert(samRecord.getCigar() == expectedCigar);
62  assert(samRecord.getSequence() == TestValidate::READ1_SEQ);
63  assert(samRecord.getQuality() == TestValidate::READ1_QUAL);
64  // Only 1 base, so the end is the same as start
65  assert(samRecord.get0BasedAlignmentEnd() == TestValidate::READ1_POS + 2);
66  assert(samRecord.getAlignmentLength() == 1);
67  assert(samRecord.get0BasedUnclippedStart() == TestValidate::READ1_UNCLIP_START);
68  // The new unclipped end is not the same as the original end because the
69  // 2 deletions are lost.
70  assert(samRecord.get0BasedUnclippedEnd() == TestValidate::READ1_UNCLIP_END - 2);
71 
72 
73  assert(inSam.ReadRecord(samHeader, samRecord) == true);
74  validateRead2(samRecord);
75 
76  assert(inSam.ReadRecord(samHeader, samRecord) == true);
77  validateRead3(samRecord);
78 
79  assert(inSam.ReadRecord(samHeader, samRecord) == true);
80  validateRead4(samRecord);
81 
82  assert(inSam.ReadRecord(samHeader, samRecord) == true);
83  validateRead5(samRecord);
84 
85  assert(inSam.ReadRecord(samHeader, samRecord) == true);
86  validateRead6(samRecord);
87 
88  // Clip the read 2 more from the front and 2 from the back.
89  assert(SamFilter::softClip(samRecord, 5, 2) == SamFilter::CLIPPED);
90  assert(samRecord.get0BasedPosition() == TestValidate::READ6_POS + 2);
91  expectedCigar = "2H5S1M2S";
92  assert(samRecord.getCigar() == expectedCigar);
93  assert(samRecord.getSequence() == TestValidate::READ6_SEQ);
94  assert(samRecord.getQuality() == TestValidate::READ6_QUAL);
95  // Only 1 base, so the end is the same as start
96  assert(samRecord.get0BasedAlignmentEnd() == TestValidate::READ6_POS + 2);
97  assert(samRecord.getAlignmentLength() == 1);
98  assert(samRecord.get0BasedUnclippedStart() == TestValidate::READ6_UNCLIP_START);
99  assert(samRecord.get0BasedUnclippedEnd() == TestValidate::READ6_UNCLIP_END);
100 
101  assert(inSam.ReadRecord(samHeader, samRecord) == true);
102  validateRead7(samRecord);
103 
104  // Clip the read 2 more from the front and 2 morefrom the back.
105  assert(SamFilter::softClip(samRecord, 5, 3) == SamFilter::CLIPPED);
106  assert(samRecord.get0BasedPosition() == TestValidate::READ7_POS + 2);
107  expectedCigar = "5S1M3S3H";
108  assert(samRecord.getCigar() == expectedCigar);
109  assert(samRecord.getSequence() == TestValidate::READ7_SEQ);
110  assert(samRecord.getQuality() == TestValidate::READ7_QUAL);
111  // Only 1 base, so the end is the same as start
112  assert(samRecord.get0BasedAlignmentEnd() == TestValidate::READ7_POS + 2);
113  assert(samRecord.getAlignmentLength() == 1);
114  assert(samRecord.get0BasedUnclippedStart() == TestValidate::READ7_UNCLIP_START);
115  assert(samRecord.get0BasedUnclippedEnd() == TestValidate::READ7_UNCLIP_END);
116 
117  assert(inSam.ReadRecord(samHeader, samRecord) == true);
118  validateRead8(samRecord);
119 
120  assert(inSam.ReadRecord(samHeader, samRecord) == true);
121  validateRead9(samRecord);
122 
123  assert(inSam.ReadRecord(samHeader, samRecord) == true);
124  validateRead10(samRecord);
125 }
126 
SamRecord::get0BasedPosition
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
Definition: SamRecord.cpp:1307
SamRecord::get0BasedAlignmentEnd
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1455
SamFilter::CLIPPED
@ CLIPPED
Filtering clipped the read.
Definition: SamFilter.h:31
SamFile::ReadRecord
bool ReadRecord(SamFileHeader &header, SamRecord &record)
Reads the next record from the file & stores it in the passed in record.
Definition: SamFile.cpp:501
SamRecord::get0BasedUnclippedStart
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1494
SamRecord::getAlignmentLength
int32_t getAlignmentLength()
Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
Definition: SamRecord.cpp:1481
SamRecord::getQuality
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
Definition: SamRecord.cpp:1626
SamRecord::get0BasedUnclippedEnd
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.
Definition: SamRecord.cpp:1514
FileType
Definition: FileType.h:24
SamRecord::getSequence
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...
Definition: SamRecord.cpp:1556
SamFileHeader
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:35
SamRecord::getCigar
const char * getCigar()
Returns the SAM formatted CIGAR string.
Definition: SamRecord.cpp:1543
SamRecord
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52
SamFile::OpenForRead
bool OpenForRead(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for reading with the specified filename, determing the type of file and SAM/BAM b...
Definition: SamFile.cpp:93
SamFile::ReadHeader
bool ReadHeader(SamFileHeader &header)
Reads the header section from the file and stores it in the passed in header.
Definition: SamFile.cpp:437
SamFile
Allows the user to easily read/write a SAM/BAM file.
Definition: SamFile.h:36
SamFilter::softClip
static FilterStatus softClip(SamRecord &record, int32_t numFrontClips, int32_t numBackClips)
Soft clip the record from the front and/or the back.
Definition: SamFilter.cpp:155