libStatGen Software  1
SmithWaterman.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdint.h>
21 #include "SmithWaterman.h"
22 
23 // put TEST below here, so that makedepend will see the .h, so that we
24 // can get a clean dependency for SmithWaterman.o, so that we can at least
25 // compile the header when we change it.
26 
27 #if defined(TEST)
28 
29 #include <getopt.h>
30 #include "Generic.h"
31 
32 // g++ -g -o testSW -DTEST SmithWaterman.cpp
33 //
34 // Smith-Waterman - test code uses a 256x256 array of int16
35 //
36 int swat(
37  bool showAllCases,
38  const char *A,
39  const char *qualities,
40  const char *B,
41  int direction,
42  const char *expectedCigarString,
43  int expectedSumQ
44 )
45 {
46  int allowedInsertDelete = 1024;
47  int errors = 0;
48 
49  // read length 256
50  // reference length 1024
51  SmithWaterman<256, 1024, uint16_t, const char *, const char *, const char *, uint32_t, uint32_t > sw(&A, &qualities, &B, strlen(A), strlen(B), allowedInsertDelete, direction);
52 
53  //
54  // now we align the read:
55  //
56  sw.populateH();
57  sw.populateAlignment();
58 
59  int sumQ = sw.getSumQ();
60 
61  CigarRoller cigar;
62  cigar.clear();
63  sw.rollCigar(cigar);
64 
65  const char *cigarStr = cigar.getString();
66 
67  //
68  // now we pretty print the results
69  //
70 
71  bool badCigar = false, badQuality = false;
72 
73  if (strcmp(cigarStr, expectedCigarString)!=0)
74  {
75  badCigar = true;
76  errors ++;
77  }
78 
79  if (sumQ != expectedSumQ)
80  {
81  badQuality = true;
82  errors ++;
83  }
84 
85 
86 
87  if (showAllCases || errors>0)
88  {
89  cout << "=============" << endl;
90  cout << " Read: " << A << endl;
91  cout << " Reference: " << B << endl;
92  cout << " Direction: " << direction << endl;
93  cout << "Max Cell: " << sw.maxCostValue << " located at " << sw.maxCostPosition << endl;
94  cout << "M: " << sw.m << " N: " << sw.n << endl;
95  cout << "Cigar String: " << cigarStr ;
96  if (badCigar)
97  cout << " (EXPECTED: " << expectedCigarString << ")";
98  cout << endl;
99  cout << " sumQ:" << sumQ;
100  if (badQuality)
101  cout << " (EXPECTED: " << expectedSumQ << ")";
102  cout << endl;
103 
104  if (strlen(B) < 100 || showAllCases)
105  sw.printH(false);
106 
107  for (vector<pair<int,int> >::iterator i = sw.alignment.begin(); i != sw.alignment.end(); i++) cout << *i << endl;
108 
109  cout << "=============" << endl << endl;
110  }
111 
112  delete cigarStr;
113 
114  return errors;
115 }
116 
117 
118 // test with Sequence 1 = ACACACTA
119 // Sequence 2 = AGCACACA
120 int main(int argc, const char **argv)
121 {
122  int errors = 0;
123 
124  bool showAllCasesFlag = false;
125  int opt;
126 
127  while ((opt = getopt(argc, (char **) argv, "v")) != -1)
128  {
129  switch (opt)
130  {
131  case 'v':
132  showAllCasesFlag = true;
133  break;
134  default:
135  cerr << "usage: testSW [-v]" << std::endl;
136  exit(1);
137  }
138  }
139 
140 
141  // CIGAR explanation - for backward SW runs, the corresponding
142  // CIGAR string is generated from the back of the string to the
143  // front. Recall that the soft clipping is only done at the
144  // "end" of the string, taking direction into account.
145 
146  // forwards - simple
147  errors += swat(showAllCasesFlag, "1234", "\"#$-", "1235", 1, "3M1S", 0);
148 
149  // backwards - simple
150  errors += swat(showAllCasesFlag, "1234", "\"#$-", "1235", -1, "4M", 12);
151 
152  // backwards - soft left clip
153  errors += swat(showAllCasesFlag, "1234", "\"#$-", "0234", -1, "1S3M", 0);
154 
155  // delete in read (arg 1) - forward
156  errors += swat(showAllCasesFlag, "123467890", "\"#$%^&*()-", "1234567890", +1, "4M1D5M", 50);
157 
158  // insert in read (arg 1) - forward
159  errors += swat(showAllCasesFlag, "1234556789", "\"#$%^&*()-", "1234567890", +1, "5M1I4M", 50);
160 
161  // delete in read (arg 1) - backward
162  errors += swat(showAllCasesFlag, "X123467890", "#\"#$%^&*()-", "1234567890", -1, "1S4M1D5M", 50);
163 
164  // insert in read (arg 1) - backward
165  errors += swat(showAllCasesFlag, "1234556789", "\"#$%^&*()-", "0123456789", -1, "4M1I5M", 50);
166 
167  // insert in read (arg 1) - backward
168  errors += swat(showAllCasesFlag, "X1223456789", "00000000000", "00123456789", -1, "1S1M1I8M", 50);
169 
170  // insert in read (arg 1) - backward
171  errors += swat(showAllCasesFlag, "XY1223456789", "000000000000", "000123456789", -1, "2S1M1I8M", 50);
172 
173  // forward - soft right clip of 2 bases - sumQ should be 0
174  errors += swat(showAllCasesFlag, "123456700", "\"#$%^&*()-", "123456789", +1, "7M2S", 0);
175 
176  // insert in read (arg 1) - forward w/2 mismatches at end
177  errors += swat(showAllCasesFlag, "1023456700", "\"#$%^&*()-", "123456789", +1, "1M1I6M2S", 50);
178 
179  // insert in read (arg 1) - forward w/2 mismatches at end
180  errors += swat(showAllCasesFlag, "CTCCACCTCCCGGTT", "111111111111111", "TCCACCTCCCAGGTT", -1, "1S10M1D4M", 50);
181 
182  //
183  errors += swat(showAllCasesFlag, "1234", "0000", "12345", +1, "4M", 0);
184 
185  //
186  errors += swat(showAllCasesFlag, "1234X", "00000", "12345", +1, "4M1S", 0);
187 
188  //
189  errors += swat(showAllCasesFlag, "4321", "0000", "7654321", -1, "4M", 0);
190 
191  //
192  errors += swat(showAllCasesFlag, "X4321", "00000", "7654321", -1, "1S4M", 0);
193 
194  //
195  errors += swat(showAllCasesFlag, "X432A10", "0000000", "76543210", -1, "1S3M1I2M", 50);
196 
197  //
198  errors += swat(showAllCasesFlag, "1345", "0000", "12345", -1, "1M1D3M", 50);
199 
200  errors += swat(showAllCasesFlag, "45689", "00000", "1234567890", -1, "3M1D2M", 50);
201 
202 // errors += swat(showAllCasesFlag, "AATAATTTTTTATATACAGATCGCTGTAGAGTGTAGTTATAGTATGATTCCAACTTTTATTTCTTTCATGACTAATTATATGTATACATGTGCCATGTTGGTGTGCTG", "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "TCCAATGTAGGGCTGTTATAAACAGTGTTGATACATATGTTTTTGTATAAGTCTTTGTTGAATACATGCTTTCATTTTTGTAGGGTATATGTCCAGGAATTAAATTTTTGCATTATTGGGGAAGTTCAAACGTAGATCAGTAGATGTTCCCAAATGATTTTCAGGATATGTATCCATGTAAATTCCTACCAGCAATGCAGGAGAATTCCAATTGCCCATGTTCTAATCAGAATATTGTTATATCCTAAGACTAATTTTAAATATTCTGATGGGTGTAGAGTGGAGGCATAGTATGATTTCAACTTGTATTTCTTTCATGACTAATTATCTTCTATGTTAATTGTTATTTTGTATGTTTATTGCAAAGTGCCTATCCAGAATTTTTGTCTATAATTTTGTTGTGCTGTCTCTTGCTTTATGAATTTTATAGGATTCTTAATATTATAATTGAGTTATCTTTCTTTTTTATTATTATTATTATACTTTAAGTTTTAGGGTATATGTGCACAACGTGCAAGTTTGTCACATATGTATACATGTGCCATGTTGGTGTGCTGCACCCATTAACTCATCATTTAGCATTAGGTATATCTCCTAATGCTATCCCTTCCTCCTCCCCCCACCCCACAACAGTCCCCGGTGTGTGATGTTCCCCTGCCTTTGTCCTCTTTCTTATACTTGCATGAGCAATCTCCTCAAACTGATACTTGCCTTTTTTGTCCTTGGTGTGGTTTGGCTCTGTGTTCCCACCCAAATCTTCATAATACCCATGTGCCAAGGGTGGGACTGGGTGGAGGTAATTGGGTCATGGGGATGGTTTCCCTCATACTATTATGATAGTGAGTGTTTTCACGAGACCTGATGGTTTTATAACTGTGTGGCATTTCCCTTGCTTCCACTCACTCCATCCTGCCACCCTGTGAAGAAGGTGCCTGCTTCTCCTTTGGTTACTGCTATGATTGTAAGTTTCCTGAGGCCTCCCCAGCAACGCAAAACTGTGAATCAATTAAACCTTTTTCCTTTATAAATTACTAAGTCTTGGGTATTTCTTCATAGTGTTGTGAGCATAGACTAAAACAGTAAGTTGTTACCAGGAGTGGGGTACTGCTGTAAGATAACTGAGAATGTGAAAGTGACTTAGGAACTAGGTAATGAGCAGAGGTTGGAACAGTTTAAAAGGCTCAGAAGAAGACAGAAAGATGTGGGAAAGTTTGGA", -1, "77M200D31M", 50);
203 
204  errors += swat(showAllCasesFlag, "TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACGTCATCGGTCAGAAATTGGG", "000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000", "CCGAGATTGTGCCATTGCACTCCTGCCTGGGTAACAGAGTCAGACCCTGTCTCAAAAAAAAAAAAAAAAAAAAAAAAGATTAGGTTTTATAGATGGAAAATTCACAGCTCTCTCCAGATCAGAAATCTCCAAGAGTAAATTAGTGTCTTAAAGGGGTTGTAATAACTTTCCTATGTGACTAAGTGCATTATTAATCAATTTTTCTATGATCAAGTACTCCTTTACATACCTGCTAATACAATTTTTGATATGAAATCAGTCCTAGAGGGAATCAATGTAAGATACAGACTTGATGAGTGCTTGCAGTTTTTTATTGACAATCTGAAGAATGACTTGACTCTAAATTGCAGCTCAAGGCTTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATACAGGAATCAAATCTGTCTAGCCTCTCTTTTTGGCAAGGTTAAGAACAATTCCACTTCATCCTAATCCCAATGATTCCTGCCGACCCTCTTCCAAAAACTATTTAAAGACATGTTCTTCAAAGTTATATTTGTCTTTCCTTCAGGGAGAAAAAGAATACCAATCACTTATAATATGGAAACTAGCAGAAATGGGTCACATAAGTCATCTGTCAGAAATTGGGAAAATAGAGTAGGTCAGTCTTTCCAGTCATGGTACTTTTACCTTCAATCA", -1, "88M200D20M", 50);
205 
206  // prefix TTAGAATGCTATTGTGTTTGGAGATTTGAGGAAAGTGGGCGTGAAGACTTAGTGTTCATTTCCTCAACCTCTCTCTGTGTGAACATAC
207  // suffix GTCATCTGTCAGAAATTGGGA
208  cout << endl << "Total Errors found: " << errors << endl;
209 }
210 #endif
CigarRoller::clear
void clear()
Clear this object so that it has no Cigar Operations.
Definition: CigarRoller.cpp:325
CigarRoller::getString
const char * getString()
Get the string reprentation of the Cigar operations in this object, caller must delete the returned v...
Definition: CigarRoller.cpp:272
CigarRoller
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
Definition: CigarRoller.h:67
SmithWaterman
Definition: SmithWaterman.h:128