libStatGen Software  1
SamFile.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 #include <stdexcept>
18 #include "SamFile.h"
19 #include "SamFileHeader.h"
20 #include "SamRecord.h"
21 #include "BamInterface.h"
22 #include "SamInterface.h"
23 #include "BgzfFileType.h"
24 
25 // Constructor, init variables.
27  : myStatus()
28 {
29  init();
30  resetFile();
31 }
32 
33 
34 // Constructor, init variables.
36  : myStatus(errorHandlingType)
37 {
38  init();
39  resetFile();
40 }
41 
42 
43 // Constructor, init variables and open the specified file based on the
44 // specified mode (READ/WRITE).
45 SamFile::SamFile(const char* filename, OpenType mode)
46  : myStatus()
47 {
48  init(filename, mode, NULL);
49 }
50 
51 
52 // Constructor, init variables and open the specified file based on the
53 // specified mode (READ/WRITE). Default is READ..
54 SamFile::SamFile(const char* filename, OpenType mode,
55  ErrorHandler::HandlingType errorHandlingType)
56  : myStatus(errorHandlingType)
57 {
58  init(filename, mode, NULL);
59 }
60 
61 
62 // Constructor, init variables and open the specified file based on the
63 // specified mode (READ/WRITE).
64 SamFile::SamFile(const char* filename, OpenType mode, SamFileHeader* header)
65  : myStatus()
66 {
67  init(filename, mode, header);
68 }
69 
70 
71 // Constructor, init variables and open the specified file based on the
72 // specified mode (READ/WRITE). Default is READ..
73 SamFile::SamFile(const char* filename, OpenType mode,
74  ErrorHandler::HandlingType errorHandlingType,
75  SamFileHeader* header)
76  : myStatus(errorHandlingType)
77 {
78  init(filename, mode, header);
79 }
80 
81 
83 {
84  resetFile();
85  if(myStatistics != NULL)
86  {
87  delete myStatistics;
88  }
89 }
90 
91 
92 // Open a sam/bam file for reading with the specified filename.
93 bool SamFile::OpenForRead(const char * filename, SamFileHeader* header)
94 {
95  // Reset for any previously operated on files.
96  resetFile();
97 
98  int lastchar = 0;
99 
100  while (filename[lastchar] != 0) lastchar++;
101 
102  // If at least one character, check for '-'.
103  if((lastchar >= 1) && (filename[0] == '-'))
104  {
105  // Read from stdin - determine type of file to read.
106  // Determine if compressed bam.
107  if(strcmp(filename, "-.bam") == 0)
108  {
109  // Compressed bam - open as bgzf.
110  // -.bam is the filename, read compressed bam from stdin
111  filename = "-";
112 
113  myFilePtr = new InputFile;
114  // support recover mode - this switches in a reader
115  // capable of recovering from bad BGZF compression blocks.
116  myFilePtr->setAttemptRecovery(myAttemptRecovery);
117  myFilePtr->openFile(filename, "rb", InputFile::BGZF);
118 
119  myInterfacePtr = new BamInterface;
120 
121  // Read the magic string.
122  char magic[4];
123  ifread(myFilePtr, magic, 4);
124  }
125  else if(strcmp(filename, "-.ubam") == 0)
126  {
127  // uncompressed BAM File.
128  // -.ubam is the filename, read uncompressed bam from stdin.
129  // uncompressed BAM is still compressed with BGZF, but using
130  // compression level 0, so still open as BGZF since it has a
131  // BGZF header.
132  filename = "-";
133 
134  // Uncompressed, so do not require the eof block.
135 #ifdef __ZLIB_AVAILABLE__
136  BgzfFileType::setRequireEofBlock(false);
137 #endif
138  myFilePtr = ifopen(filename, "rb", InputFile::BGZF);
139 
140  myInterfacePtr = new BamInterface;
141 
142  // Read the magic string.
143  char magic[4];
144  ifread(myFilePtr, magic, 4);
145  }
146  else if((strcmp(filename, "-") == 0) || (strcmp(filename, "-.sam") == 0))
147  {
148  // SAM File.
149  // read sam from stdin
150  filename = "-";
151  myFilePtr = ifopen(filename, "rb", InputFile::UNCOMPRESSED);
152  myInterfacePtr = new SamInterface;
153  }
154  else
155  {
156  std::string errorMessage = "Invalid SAM/BAM filename, ";
157  errorMessage += filename;
158  errorMessage += ". From stdin, can only be '-', '-.sam', '-.bam', or '-.ubam'";
159  myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
160  delete myFilePtr;
161  myFilePtr = NULL;
162  return(false);
163  }
164  }
165  else
166  {
167  // Not from stdin. Read the file to determine the type.
168 
169  myFilePtr = new InputFile;
170 
171  // support recovery mode - this conditionally enables a reader
172  // capable of recovering from bad BGZF compression blocks.
173  myFilePtr->setAttemptRecovery(myAttemptRecovery);
174  bool rc = myFilePtr->openFile(filename, "rb", InputFile::DEFAULT);
175 
176  if (rc == false)
177  {
178  std::string errorMessage = "Failed to Open ";
179  errorMessage += filename;
180  errorMessage += " for reading";
181  myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
182  delete myFilePtr;
183  myFilePtr = NULL;
184  return(false);
185  }
186 
187  char magic[4];
188  ifread(myFilePtr, magic, 4);
189 
190  if (magic[0] == 'B' && magic[1] == 'A' && magic[2] == 'M' &&
191  magic[3] == 1)
192  {
193  myInterfacePtr = new BamInterface;
194  // Set that it is a bam file open for reading. This is needed to
195  // determine if an index file can be used.
196  myIsBamOpenForRead = true;
197  }
198  else
199  {
200  // Not a bam, so rewind to the beginning of the file so it
201  // can be read.
202  ifrewind(myFilePtr);
203  myInterfacePtr = new SamInterface;
204  }
205  }
206 
207  // File is open for reading.
208  myIsOpenForRead = true;
209 
210  // Read the header if one was passed in.
211  if(header != NULL)
212  {
213  return(ReadHeader(*header));
214  }
215 
216  // Successfully opened the file.
218  return(true);
219 }
220 
221 
222 // Open a sam/bam file for writing with the specified filename.
223 bool SamFile::OpenForWrite(const char * filename, SamFileHeader* header)
224 {
225  // Reset for any previously operated on files.
226  resetFile();
227 
228  int lastchar = 0;
229  while (filename[lastchar] != 0) lastchar++;
230  if (lastchar >= 4 &&
231  filename[lastchar - 4] == 'u' &&
232  filename[lastchar - 3] == 'b' &&
233  filename[lastchar - 2] == 'a' &&
234  filename[lastchar - 1] == 'm')
235  {
236  // BAM File.
237  // if -.ubam is the filename, write uncompressed bam to stdout
238  if((lastchar == 6) && (filename[0] == '-') && (filename[1] == '.'))
239  {
240  filename = "-";
241  }
242 
243  myFilePtr = ifopen(filename, "wb0", InputFile::BGZF);
244 
245  myInterfacePtr = new BamInterface;
246  }
247  else if (lastchar >= 3 &&
248  filename[lastchar - 3] == 'b' &&
249  filename[lastchar - 2] == 'a' &&
250  filename[lastchar - 1] == 'm')
251  {
252  // BAM File.
253  // if -.bam is the filename, write compressed bam to stdout
254  if((lastchar == 5) && (filename[0] == '-') && (filename[1] == '.'))
255  {
256  filename = "-";
257  }
258  myFilePtr = ifopen(filename, "wb", InputFile::BGZF);
259 
260  myInterfacePtr = new BamInterface;
261  }
262  else
263  {
264  // SAM File
265  // if - (followed by anything is the filename,
266  // write uncompressed sam to stdout
267  if((lastchar >= 1) && (filename[0] == '-'))
268  {
269  filename = "-";
270  }
271  myFilePtr = ifopen(filename, "wb", InputFile::UNCOMPRESSED);
272 
273  myInterfacePtr = new SamInterface;
274  }
275 
276  if (myFilePtr == NULL)
277  {
278  std::string errorMessage = "Failed to Open ";
279  errorMessage += filename;
280  errorMessage += " for writing";
281  myStatus.setStatus(SamStatus::FAIL_IO, errorMessage.c_str());
282  return(false);
283  }
284 
285  myIsOpenForWrite = true;
286 
287  // Write the header if one was passed in.
288  if(header != NULL)
289  {
290  return(WriteHeader(*header));
291  }
292 
293  // Successfully opened the file.
295  return(true);
296 }
297 
298 
299 // Read BAM Index file.
300 bool SamFile::ReadBamIndex(const char* bamIndexFilename)
301 {
302  // Cleanup a previously setup index.
303  if(myBamIndex != NULL)
304  {
305  delete myBamIndex;
306  myBamIndex = NULL;
307  }
308 
309  // Create a new bam index.
310  myBamIndex = new BamIndex();
311  SamStatus::Status indexStat = myBamIndex->readIndex(bamIndexFilename);
312 
313  if(indexStat != SamStatus::SUCCESS)
314  {
315  std::string errorMessage = "Failed to read the bam Index file: ";
316  errorMessage += bamIndexFilename;
317  myStatus.setStatus(indexStat, errorMessage.c_str());
318  delete myBamIndex;
319  myBamIndex = NULL;
320  return(false);
321  }
323  return(true);
324 }
325 
326 
327 // Read BAM Index file.
329 {
330  if(myFilePtr == NULL)
331  {
332  // Can't read the bam index file because the BAM file has not yet been
333  // opened, so we don't know the base filename for the index file.
334  std::string errorMessage = "Failed to read the bam Index file -"
335  " the BAM file needs to be read first in order to determine"
336  " the index filename.";
337  myStatus.setStatus(SamStatus::FAIL_ORDER, errorMessage.c_str());
338  return(false);
339  }
340 
341  const char* bamBaseName = myFilePtr->getFileName();
342 
343  std::string indexName = bamBaseName;
344  indexName += ".bai";
345 
346  bool foundFile = true;
347  try
348  {
349  if(ReadBamIndex(indexName.c_str()) == false)
350  {
351  foundFile = false;
352  }
353  }
354  catch (std::exception&)
355  {
356  foundFile = false;
357  }
358 
359  // Check to see if the index file was found.
360  if(!foundFile)
361  {
362  // Not found - try without the bam extension.
363  // Locate the start of the bam extension
364  size_t startExt = indexName.find(".bam");
365  if(startExt == std::string::npos)
366  {
367  // Could not find the .bam extension, so just return false since the
368  // call to ReadBamIndex set the status.
369  return(false);
370  }
371  // Remove ".bam" and try reading the index again.
372  indexName.erase(startExt, 4);
373  return(ReadBamIndex(indexName.c_str()));
374  }
375  return(true);
376 }
377 
378 
379 // Sets the reference to the specified genome sequence object.
381 {
382  myRefPtr = reference;
383 }
384 
385 
386 // Set the type of sequence translation to use when reading the sequence.
388 {
389  myReadTranslation = translation;
390 }
391 
392 
393 // Set the type of sequence translation to use when writing the sequence.
395 {
396  myWriteTranslation = translation;
397 }
398 
399 // Close the file if there is one open.
401 {
402  // Resetting the file will close it if it is open, and
403  // will reset all other variables.
404  resetFile();
405 }
406 
407 
408 // Returns whether or not the file has been opened.
409 // return: int - true = open; false = not open.
411 {
412  if (myFilePtr != NULL)
413  {
414  // File Pointer is set, so return if it is open.
415  return(myFilePtr->isOpen());
416  }
417  // File pointer is not set, so return false, not open.
418  return false;
419 }
420 
421 
422 // Returns whether or not the end of the file has been reached.
423 // return: int - true = EOF; false = not eof.
425 {
426  if (myFilePtr != NULL)
427  {
428  // File Pointer is set, so return if eof.
429  return(ifeof(myFilePtr));
430  }
431  // File pointer is not set, so return true, eof.
432  return true;
433 }
434 
435 
436 // Read the header from the currently opened file.
438 {
440  if(myIsOpenForRead == false)
441  {
442  // File is not open for read
444  "Cannot read header since the file is not open for reading");
445  return(false);
446  }
447 
448  if(myHasHeader == true)
449  {
450  // The header has already been read.
452  "Cannot read header since it has already been read.");
453  return(false);
454  }
455 
456  if(myInterfacePtr->readHeader(myFilePtr, header, myStatus))
457  {
458  // The header has now been successfully read.
459  myHasHeader = true;
460  return(true);
461  }
462  return(false);
463 }
464 
465 
466 // Write the header to the currently opened file.
468 {
470  if(myIsOpenForWrite == false)
471  {
472  // File is not open for write
473  // -OR-
474  // The header has already been written.
476  "Cannot write header since the file is not open for writing");
477  return(false);
478  }
479 
480  if(myHasHeader == true)
481  {
482  // The header has already been written.
484  "Cannot write header since it has already been written");
485  return(false);
486  }
487 
488  if(myInterfacePtr->writeHeader(myFilePtr, header, myStatus))
489  {
490  // The header has now been successfully written.
491  myHasHeader = true;
492  return(true);
493  }
494 
495  // return the status.
496  return(false);
497 }
498 
499 
500 // Read a record from the currently opened file.
502  SamRecord& record)
503 {
505 
506  if(myIsOpenForRead == false)
507  {
508  // File is not open for read
510  "Cannot read record since the file is not open for reading");
511  throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to opening the file."));
512  return(false);
513  }
514 
515  if(myHasHeader == false)
516  {
517  // The header has not yet been read.
518  // TODO - maybe just read the header.
520  "Cannot read record since the header has not been read.");
521  throw(std::runtime_error("SOFTWARE BUG: trying to read a SAM/BAM record prior to reading the header."));
522  return(false);
523  }
524 
525  // Check to see if a new region has been set. If so, determine the
526  // chunks for that region.
527  if(myNewSection)
528  {
529  if(!processNewSection(header))
530  {
531  // Failed processing a new section. Could be an
532  // order issue like the file not being open or the
533  // indexed file not having been read.
534  // processNewSection sets myStatus with the failure reason.
535  return(false);
536  }
537  }
538 
539  // Read until a record is not successfully read or there are no more
540  // requested records.
541  while(myStatus == SamStatus::SUCCESS)
542  {
543  record.setReference(myRefPtr);
544  record.setSequenceTranslation(myReadTranslation);
545 
546  // If reading by index, this method will setup to ensure it is in
547  // the correct position for the next record (if not already there).
548  // Sets myStatus if it could not move to a good section.
549  // Just returns true if it is not setup to read by index.
550  if(!ensureIndexedReadPosition())
551  {
552  // Either there are no more records in the section
553  // or it failed to move to the right section, so there
554  // is nothing more to read, stop looping.
555  break;
556  }
557 
558  // File is open for reading and the header has been read, so read the
559  // next record.
560  myInterfacePtr->readRecord(myFilePtr, header, record, myStatus);
562  {
563  // Failed to read the record, so break out of the loop.
564  break;
565  }
566 
567  // Successfully read a record, so check if we should filter it.
568  // First check if it is out of the section. Returns true
569  // if not reading by sections, returns false if the record
570  // is outside of the section. Sets status to NO_MORE_RECS if
571  // there is nothing left ot read in the section.
572  if(!checkRecordInSection(record))
573  {
574  // The record is not in the section.
575  // The while loop will detect if NO_MORE_RECS was set.
576  continue;
577  }
578 
579  // Check the flag for required/excluded flags.
580  uint16_t flag = record.getFlag();
581  if((flag & myRequiredFlags) != myRequiredFlags)
582  {
583  // The record does not conatain all required flags, so
584  // continue looking.
585  continue;
586  }
587  if((flag & myExcludedFlags) != 0)
588  {
589  // The record contains an excluded flag, so continue looking.
590  continue;
591  }
592 
593  //increment the record count.
594  myRecordCount++;
595 
596  if(myStatistics != NULL)
597  {
598  // Statistics should be updated.
599  myStatistics->updateStatistics(record);
600  }
601 
602  // Successfully read the record, so check the sort order.
603  if(!validateSortOrder(record, header))
604  {
605  // ValidateSortOrder sets the status on a failure.
606  return(false);
607  }
608  return(true);
609 
610  } // End while loop that checks if a desired record is found or failure.
611 
612  // Return true if a record was found.
613  return(myStatus == SamStatus::SUCCESS);
614 }
615 
616 
617 
618 // Write a record to the currently opened file.
620  SamRecord& record)
621 {
622  if(myIsOpenForWrite == false)
623  {
624  // File is not open for writing
626  "Cannot write record since the file is not open for writing");
627  return(false);
628  }
629 
630  if(myHasHeader == false)
631  {
632  // The header has not yet been written.
634  "Cannot write record since the header has not been written");
635  return(false);
636  }
637 
638  // Before trying to write the record, validate the sort order.
639  if(!validateSortOrder(record, header))
640  {
641  // Not sorted like it is supposed to be, do not write the record
643  "Cannot write the record since the file is not properly sorted.");
644  return(false);
645  }
646 
647  if(myRefPtr != NULL)
648  {
649  record.setReference(myRefPtr);
650  }
651 
652  // File is open for writing and the header has been written, so write the
653  // record.
654  myStatus = myInterfacePtr->writeRecord(myFilePtr, header, record,
655  myWriteTranslation);
656 
658  {
659  // A record was successfully written, so increment the record count.
660  myRecordCount++;
661  return(true);
662  }
663  return(false);
664 }
665 
666 
667 // Set the flag to validate that the file is sorted as it is read/written.
668 // Must be called after the file has been opened.
670 {
671  mySortedType = sortType;
672 }
673 
674 
675 // Return the number of records that have been read/written so far.
677 {
678  return(myRecordCount);
679 }
680 
681 
682 // Sets what part of the SamFile should be read.
683 bool SamFile::SetReadSection(int32_t refID)
684 {
685  // No start/end specified, so set back to default -1.
686  return(SetReadSection(refID, -1, -1));
687 }
688 
689 
690 
691 // Sets what part of the SamFile should be read.
692 bool SamFile::SetReadSection(const char* refName)
693 {
694  // No start/end specified, so set back to default -1.
695  return(SetReadSection(refName, -1, -1));
696 }
697 
698 
699 // Sets what part of the BAM file should be read.
700 bool SamFile::SetReadSection(int32_t refID, int32_t start, int32_t end,
701  bool overlap)
702 {
703  // If there is not a BAM file open for reading, return failure.
704  // Opening a new file clears the read section, so it must be
705  // set after the file is opened.
706  if(!myIsBamOpenForRead)
707  {
708  // There is not a BAM file open for reading.
710  "Cannot set section since there is no bam file open");
711  return(false);
712  }
713 
714  myNewSection = true;
715  myOverlapSection = overlap;
716  myStartPos = start;
717  myEndPos = end;
718  myRefID = refID;
719  myRefName.clear();
720  myChunksToRead.clear();
721  // Reset the end of the current chunk. We are resetting our read, so
722  // we no longer have a "current chunk" that we are reading.
723  myCurrentChunkEnd = 0;
725 
726  // Reset the sort order criteria since we moved around in the file.
727  myPrevCoord = -1;
728  myPrevRefID = 0;
729  myPrevReadName.Clear();
730 
731  return(true);
732 }
733 
734 
735 // Sets what part of the BAM file should be read.
736 bool SamFile::SetReadSection(const char* refName, int32_t start, int32_t end,
737  bool overlap)
738 {
739  // If there is not a BAM file open for reading, return failure.
740  // Opening a new file clears the read section, so it must be
741  // set after the file is opened.
742  if(!myIsBamOpenForRead)
743  {
744  // There is not a BAM file open for reading.
746  "Cannot set section since there is no bam file open");
747  return(false);
748  }
749 
750  myNewSection = true;
751  myOverlapSection = overlap;
752  myStartPos = start;
753  myEndPos = end;
754  if((strcmp(refName, "") == 0) || (strcmp(refName, "*") == 0))
755  {
756  // No Reference name specified, so read just the "-1" entries.
757  myRefID = BamIndex::REF_ID_UNMAPPED;
758  }
759  else
760  {
761  // save the reference name and revert the reference ID to unknown
762  // so it will be calculated later.
763  myRefName = refName;
764  myRefID = BamIndex::REF_ID_ALL;
765  }
766  myChunksToRead.clear();
767  // Reset the end of the current chunk. We are resetting our read, so
768  // we no longer have a "current chunk" that we are reading.
769  myCurrentChunkEnd = 0;
771 
772  // Reset the sort order criteria since we moved around in the file.
773  myPrevCoord = -1;
774  myPrevRefID = 0;
775  myPrevReadName.Clear();
776 
777  return(true);
778 }
779 
780 
781 void SamFile::SetReadFlags(uint16_t requiredFlags, uint16_t excludedFlags)
782 {
783  myRequiredFlags = requiredFlags;
784  myExcludedFlags = excludedFlags;
785 }
786 
787 
788 // Get the number of mapped reads in the specified reference id.
789 // Returns -1 for out of range refIDs.
791 {
792  // The bam index must have already been read.
793  if(myBamIndex == NULL)
794  {
796  "Cannot get num mapped reads from the index until it has been read.");
797  return(false);
798  }
799  return(myBamIndex->getNumMappedReads(refID));
800 }
801 
802 
803 // Get the number of unmapped reads in the specified reference id.
804 // Returns -1 for out of range refIDs.
806 {
807  // The bam index must have already been read.
808  if(myBamIndex == NULL)
809  {
811  "Cannot get num unmapped reads from the index until it has been read.");
812  return(false);
813  }
814  return(myBamIndex->getNumUnMappedReads(refID));
815 }
816 
817 
818 // Get the number of mapped reads in the specified reference id.
819 // Returns -1 for out of range references.
820 int32_t SamFile::getNumMappedReadsFromIndex(const char* refName,
821  SamFileHeader& header)
822 {
823  // The bam index must have already been read.
824  if(myBamIndex == NULL)
825  {
827  "Cannot get num mapped reads from the index until it has been read.");
828  return(false);
829  }
830  int32_t refID = BamIndex::REF_ID_UNMAPPED;
831  if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
832  {
833  // Reference name specified, so read just the "-1" entries.
834  refID = header.getReferenceID(refName);
835  }
836  return(myBamIndex->getNumMappedReads(refID));
837 }
838 
839 
840 // Get the number of unmapped reads in the specified reference id.
841 // Returns -1 for out of range refIDs.
842 int32_t SamFile::getNumUnMappedReadsFromIndex(const char* refName,
843  SamFileHeader& header)
844 {
845  // The bam index must have already been read.
846  if(myBamIndex == NULL)
847  {
849  "Cannot get num unmapped reads from the index until it has been read.");
850  return(false);
851  }
852  int32_t refID = BamIndex::REF_ID_UNMAPPED;
853  if((strcmp(refName, "") != 0) && (strcmp(refName, "*") != 0))
854  {
855  // Reference name specified, so read just the "-1" entries.
856  refID = header.getReferenceID(refName);
857  }
858  return(myBamIndex->getNumUnMappedReads(refID));
859 }
860 
861 
862 // Returns the number of bases in the passed in read that overlap the
863 // region that is currently set.
864 uint32_t SamFile::GetNumOverlaps(SamRecord& samRecord)
865 {
866  if(myRefPtr != NULL)
867  {
868  samRecord.setReference(myRefPtr);
869  }
870  samRecord.setSequenceTranslation(myReadTranslation);
871 
872  // Get the overlaps in the sam record for the region currently set
873  // for this file.
874  return(samRecord.getNumOverlaps(myStartPos, myEndPos));
875 }
876 
877 
878 void SamFile::GenerateStatistics(bool genStats)
879 {
880  if(genStats)
881  {
882  if(myStatistics == NULL)
883  {
884  // Want to generate statistics, but do not yet have the
885  // structure for them, so create one.
886  myStatistics = new SamStatistics();
887  }
888  }
889  else
890  {
891  // Do not generate statistics, so if myStatistics is not NULL,
892  // delete it.
893  if(myStatistics != NULL)
894  {
895  delete myStatistics;
896  myStatistics = NULL;
897  }
898  }
899 
900 }
901 
902 
904 {
905  return(myBamIndex);
906 }
907 
908 
909 // initialize.
910 void SamFile::init()
911 {
912  myFilePtr = NULL;
913  myInterfacePtr = NULL;
914  myStatistics = NULL;
915  myBamIndex = NULL;
916  myRefPtr = NULL;
917  myReadTranslation = SamRecord::NONE;
918  myWriteTranslation = SamRecord::NONE;
919  myAttemptRecovery = false;
920  myRequiredFlags = 0;
921  myExcludedFlags = 0;
922 }
923 
924 
925 void SamFile::init(const char* filename, OpenType mode, SamFileHeader* header)
926 {
927  init();
928 
929  resetFile();
930 
931  bool openStatus = true;
932  if(mode == READ)
933  {
934  // open the file for read.
935  openStatus = OpenForRead(filename, header);
936  }
937  else
938  {
939  // open the file for write.
940  openStatus = OpenForWrite(filename, header);
941  }
942  if(!openStatus)
943  {
944  // Failed to open the file - print error and abort.
945  fprintf(stderr, "%s\n", GetStatusMessage());
946  std::cerr << "FAILURE - EXITING!!!" << std::endl;
947  exit(-1);
948  }
949 }
950 
951 
952 // Reset variables for each file.
954 {
955  // Close the file.
956  if (myFilePtr != NULL)
957  {
958  // If we already have an open file, close it.
959  ifclose(myFilePtr);
960  myFilePtr = NULL;
961  }
962  if(myInterfacePtr != NULL)
963  {
964  delete myInterfacePtr;
965  myInterfacePtr = NULL;
966  }
967 
968  myIsOpenForRead = false;
969  myIsOpenForWrite = false;
970  myHasHeader = false;
971  mySortedType = UNSORTED;
972  myPrevReadName.Clear();
973  myPrevCoord = -1;
974  myPrevRefID = 0;
975  myRecordCount = 0;
977 
978  // Reset indexed bam values.
979  myIsBamOpenForRead = false;
980  myRefID = BamIndex::REF_ID_ALL;
981  myStartPos = -1;
982  myEndPos = -1;
983  myNewSection = false;
984  myOverlapSection = true;
985  myCurrentChunkEnd = 0;
986  myChunksToRead.clear();
987  if(myBamIndex != NULL)
988  {
989  delete myBamIndex;
990  myBamIndex = NULL;
991  }
992 
993  // If statistics are being generated, reset them.
994  if(myStatistics != NULL)
995  {
996  myStatistics->reset();
997  }
998 
999  myRefName.clear();
1000 }
1001 
1002 
1003 // Validate that the record is sorted compared to the previously read record
1004 // if there is one, according to the specified sort order.
1005 // If the sort order is UNSORTED, true is returned.
1007 {
1008  if(myRefPtr != NULL)
1009  {
1010  record.setReference(myRefPtr);
1011  }
1012  record.setSequenceTranslation(myReadTranslation);
1013 
1014  bool status = false;
1015  if(mySortedType == UNSORTED)
1016  {
1017  // Unsorted, so nothing to validate, just return true.
1018  status = true;
1019  }
1020  else
1021  {
1022  // Check to see if mySortedType is based on the header.
1023  if(mySortedType == FLAG)
1024  {
1025  // Determine the sorted type from what was read out of the header.
1026  mySortedType = getSortOrderFromHeader(header);
1027  }
1028 
1029  if(mySortedType == QUERY_NAME)
1030  {
1031  // Validate that it is sorted by query name.
1032  // Get the query name from the record.
1033  const char* readName = record.getReadName();
1034 
1035  // Check if it is sorted either in samtools way or picard's way.
1036  if((myPrevReadName.Compare(readName) > 0) &&
1037  (strcmp(myPrevReadName.c_str(), readName) > 0))
1038  {
1039  // return false.
1040  String errorMessage = "ERROR: File is not sorted by read name at record ";
1041  errorMessage += myRecordCount;
1042  errorMessage += "\n\tPrevious record was ";
1043  errorMessage += myPrevReadName;
1044  errorMessage += ", but this record is ";
1045  errorMessage += readName;
1047  errorMessage.c_str());
1048  status = false;
1049  }
1050  else
1051  {
1052  myPrevReadName = readName;
1053  status = true;
1054  }
1055  }
1056  else
1057  {
1058  // Validate that it is sorted by COORDINATES.
1059  // Get the leftmost coordinate and the reference index.
1060  int32_t refID = record.getReferenceID();
1061  int32_t coord = record.get0BasedPosition();
1062  // The unmapped reference id is at the end of a sorted file.
1063  if(refID == BamIndex::REF_ID_UNMAPPED)
1064  {
1065  // A new reference ID that is for the unmapped reads
1066  // is always valid.
1067  status = true;
1068  myPrevRefID = refID;
1069  myPrevCoord = coord;
1070  }
1071  else if(myPrevRefID == BamIndex::REF_ID_UNMAPPED)
1072  {
1073  // Previous reference ID was for unmapped reads, but the
1074  // current one is not, so this is not sorted.
1075  String errorMessage = "ERROR: File is not coordinate sorted at record ";
1076  errorMessage += myRecordCount;
1077  errorMessage += "\n\tPrevious record was unmapped, but this record is ";
1078  errorMessage += header.getReferenceLabel(refID) + ":" + coord;
1080  errorMessage.c_str());
1081  status = false;
1082  }
1083  else if(refID < myPrevRefID)
1084  {
1085  // Current reference id is less than the previous one,
1086  //meaning that it is not sorted.
1087  String errorMessage = "ERROR: File is not coordinate sorted at record ";
1088  errorMessage += myRecordCount;
1089  errorMessage += "\n\tPrevious record was ";
1090  errorMessage += header.getReferenceLabel(myPrevRefID) + ":" + myPrevCoord;
1091  errorMessage += ", but this record is ";
1092  errorMessage += header.getReferenceLabel(refID) + ":" + coord;
1094  errorMessage.c_str());
1095  status = false;
1096  }
1097  else
1098  {
1099  // The reference IDs are in the correct order.
1100  if(refID > myPrevRefID)
1101  {
1102  // New reference id, so set the previous coordinate to -1
1103  myPrevCoord = -1;
1104  }
1105 
1106  // Check the coordinates.
1107  if(coord < myPrevCoord)
1108  {
1109  // New Coord is less than the previous position.
1110  String errorMessage = "ERROR: File is not coordinate sorted at record ";
1111  errorMessage += myRecordCount;
1112  errorMessage += "\n\tPreviousRecord was ";
1113  errorMessage += header.getReferenceLabel(myPrevRefID) + ":" + myPrevCoord;
1114  errorMessage += ", but this record is ";
1115  errorMessage += header.getReferenceLabel(refID) + ":" + coord;
1117  errorMessage.c_str());
1118  status = false;
1119  }
1120  else
1121  {
1122  myPrevRefID = refID;
1123  myPrevCoord = coord;
1124  status = true;
1125  }
1126  }
1127  }
1128  }
1129 
1130  return(status);
1131 }
1132 
1133 
1134 SamFile::SortedType SamFile::getSortOrderFromHeader(SamFileHeader& header)
1135 {
1136  const char* tag = header.getSortOrder();
1137 
1138  // Default to unsorted since if it is not specified in the header
1139  // that is the value that should be used.
1140  SortedType headerSortOrder = UNSORTED;
1141  if(strcmp(tag, "queryname") == 0)
1142  {
1143  headerSortOrder = QUERY_NAME;
1144  }
1145  else if(strcmp(tag, "coordinate") == 0)
1146  {
1147  headerSortOrder = COORDINATE;
1148  }
1149  return(headerSortOrder);
1150 }
1151 
1152 
1153  bool SamFile::ensureIndexedReadPosition()
1154  {
1155  // If no sections are specified, return true.
1156  if(myRefID == BamIndex::REF_ID_ALL)
1157  {
1158  return(true);
1159  }
1160 
1161  // Check to see if we have more to read out of the current chunk.
1162  // By checking the current position in relation to the current
1163  // end chunk. If the current position is >= the end of the
1164  // current chunk, then we must see to the next chunk.
1165  uint64_t currentPos = iftell(myFilePtr);
1166  if(currentPos >= myCurrentChunkEnd)
1167  {
1168  // If there are no more chunks to process, return failure.
1169  if(myChunksToRead.empty())
1170  {
1172  return(false);
1173  }
1174 
1175  // There are more chunks left, so get the next chunk.
1176  Chunk newChunk = myChunksToRead.pop();
1177 
1178  // Move to the location of the new chunk if it is not adjacent
1179  // to the current chunk.
1180  if(newChunk.chunk_beg != currentPos)
1181  {
1182  // New chunk is not adjacent, so move to it.
1183  if(ifseek(myFilePtr, newChunk.chunk_beg, SEEK_SET) != true)
1184  {
1185  // seek failed, cannot retrieve next record, return failure.
1187  "Failed to seek to the next record");
1188  return(false);
1189  }
1190  }
1191  // Seek succeeded, set the end of the new chunk.
1192  myCurrentChunkEnd = newChunk.chunk_end;
1193  }
1194  return(true);
1195  }
1196 
1197 
1198 bool SamFile::checkRecordInSection(SamRecord& record)
1199 {
1200  bool recordFound = true;
1201  if(myRefID == BamIndex::REF_ID_ALL)
1202  {
1203  return(true);
1204  }
1205  // Check to see if it is in the correct reference/position.
1206  if(record.getReferenceID() != myRefID)
1207  {
1208  // Incorrect reference ID, return no more records.
1210  return(false);
1211  }
1212 
1213  // Found a record.
1214  recordFound = true;
1215 
1216  // If start/end position are set, verify that the alignment falls
1217  // within those.
1218  // If the alignment start is greater than the end of the region,
1219  // return NO_MORE_RECS.
1220  // Since myEndPos is Exclusive 0-based, anything >= myEndPos is outside
1221  // of the region.
1222  if((myEndPos != -1) && (record.get0BasedPosition() >= myEndPos))
1223  {
1225  return(false);
1226  }
1227 
1228  // We know the start is less than the end position, so the alignment
1229  // overlaps the region if the alignment end position is greater than the
1230  // start of the region.
1231  if((myStartPos != -1) && (record.get0BasedAlignmentEnd() < myStartPos))
1232  {
1233  // If it does not overlap the region, so go to the next
1234  // record...set recordFound back to false.
1235  recordFound = false;
1236  }
1237 
1238  if(!myOverlapSection)
1239  {
1240  // Needs to be fully contained. Not fully contained if
1241  // 1) the record start position is < the region start position.
1242  // or
1243  // 2) the end position is specified and the record end position
1244  // is greater than or equal to the region end position.
1245  // (equal to since the region is exclusive.
1246  if((record.get0BasedPosition() < myStartPos) ||
1247  ((myEndPos != -1) &&
1248  (record.get0BasedAlignmentEnd() >= myEndPos)))
1249  {
1250  // This record is not fully contained, so move on to the next
1251  // record.
1252  recordFound = false;
1253  }
1254  }
1255 
1256  return(recordFound);
1257 }
1258 
1259 
1260 bool SamFile::processNewSection(SamFileHeader &header)
1261 {
1262  myNewSection = false;
1263 
1264  // If there is no index file, return failure.
1265  if(myBamIndex == NULL)
1266  {
1267  // No bam index has been read.
1269  "Cannot read section since there is no index file open");
1270  throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the BAM Index file."));
1271  return(false);
1272  }
1273 
1274  // If there is not a BAM file open for reading, return failure.
1275  if(!myIsBamOpenForRead)
1276  {
1277  // There is not a BAM file open for reading.
1279  "Cannot read section since there is no bam file open");
1280  throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section without opening a BAM file."));
1281  return(false);
1282  }
1283 
1284  if(myHasHeader == false)
1285  {
1286  // The header has not yet been read.
1288  "Cannot read record since the header has not been read.");
1289  throw(std::runtime_error("SOFTWARE BUG: trying to read a BAM record by section prior to opening the header."));
1290  return(false);
1291  }
1292 
1293  // Indexed Bam open for read, so disable read buffering because iftell
1294  // will be used.
1295  // Needs to be done here after we already know that the header has been
1296  // read.
1297  myFilePtr->disableBuffering();
1298 
1299  myChunksToRead.clear();
1300  // Reset the end of the current chunk. We are resetting our read, so
1301  // we no longer have a "current chunk" that we are reading.
1302  myCurrentChunkEnd = 0;
1303 
1304  // Check to see if the read section was set based on the reference name
1305  // but not yet converted to reference id.
1306  if(!myRefName.empty())
1307  {
1308  myRefID = header.getReferenceID(myRefName.c_str());
1309  // Clear the myRefName length so this code is only executed once.
1310  myRefName.clear();
1311 
1312  // Check to see if a reference id was found.
1313  if(myRefID == SamReferenceInfo::NO_REF_ID)
1314  {
1316  return(false);
1317  }
1318  }
1319 
1320  // Get the chunks associated with this reference region.
1321  if(myBamIndex->getChunksForRegion(myRefID, myStartPos, myEndPos,
1322  myChunksToRead) == true)
1323  {
1325  }
1326  else
1327  {
1328  String errorMsg = "Failed to get the specified region, refID = ";
1329  errorMsg += myRefID;
1330  errorMsg += "; startPos = ";
1331  errorMsg += myStartPos;
1332  errorMsg += "; endPos = ";
1333  errorMsg += myEndPos;
1335  errorMsg);
1336  }
1337  return(true);
1338 }
1339 
1340 //
1341 // When the caller to SamFile::ReadRecord() catches an
1342 // exception, it may choose to call this method to resync
1343 // on the underlying binary stream.
1344 //
1345 // Arguments: a callback function that will requires length bytes
1346 // of data to validate a record header.
1347 //
1348 // The expected use case is to re-sync on the next probably valid
1349 // BAM record, so that we can resume reading even after detecting
1350 // a corrupted BAM file.
1351 //
1352 bool SamFile::attemptRecoverySync(bool (*checkSignature)(void *data) , int length)
1353 {
1354  if(myFilePtr==NULL) return false;
1355  // non-recovery aware objects will just return false:
1356  return myFilePtr->attemptRecoverySync(checkSignature, length);
1357 }
1358 
1359 // Default Constructor.
1361  : SamFile()
1362 {
1363 }
1364 
1365 
1366 // Constructor that opens the specified file for read.
1367 SamFileReader::SamFileReader(const char* filename)
1368  : SamFile(filename, READ)
1369 {
1370 }
1371 
1372 
1373 
1374 
1375 // Constructor that opens the specified file for read.
1376 SamFileReader::SamFileReader(const char* filename,
1377  ErrorHandler::HandlingType errorHandlingType)
1378  : SamFile(filename, READ, errorHandlingType)
1379 {
1380 }
1381 
1382 
1383 // Constructor that opens the specified file for read.
1384 SamFileReader::SamFileReader(const char* filename,
1385  SamFileHeader* header)
1386  : SamFile(filename, READ, header)
1387 {
1388 }
1389 
1390 
1391 // Constructor that opens the specified file for read.
1392 SamFileReader::SamFileReader(const char* filename,
1393  ErrorHandler::HandlingType errorHandlingType,
1394  SamFileHeader* header)
1395  : SamFile(filename, READ, errorHandlingType, header)
1396 {
1397 }
1398 
1399 
1400 SamFileReader::~SamFileReader()
1401 {
1402 }
1403 
1404 
1405 // Default Constructor.
1407  : SamFile()
1408 {
1409 }
1410 
1411 
1412 // Constructor that opens the specified file for write.
1413 SamFileWriter::SamFileWriter(const char* filename)
1414  : SamFile(filename, WRITE)
1415 {
1416 }
1417 
1418 
1419 // Constructor that opens the specified file for write.
1420 SamFileWriter::SamFileWriter(const char* filename,
1421  ErrorHandler::HandlingType errorHandlingType)
1422  : SamFile(filename, WRITE, errorHandlingType)
1423 {
1424 }
1425 
1426 
1427 // Constructor that opens the specified file for write.
1428 SamFileWriter::SamFileWriter(const char* filename,
1429  SamFileHeader* header)
1430  : SamFile(filename, WRITE, header)
1431 {
1432 }
1433 
1434 
1435 // Constructor that opens the specified file for write.
1436 SamFileWriter::SamFileWriter(const char* filename,
1437  ErrorHandler::HandlingType errorHandlingType,
1438  SamFileHeader* header)
1439  : SamFile(filename, WRITE, errorHandlingType, header)
1440 {
1441 }
1442 
1443 
1444 SamFileWriter::~SamFileWriter()
1445 {
1446 }
SamFile::myIsOpenForWrite
bool myIsOpenForWrite
Flag to indicate if a file is open for writing.
Definition: SamFile.h:396
BamInterface
Definition: BamInterface.h:24
SamRecord::setSequenceTranslation
void setSequenceTranslation(SequenceTranslation translation)
Set the type of sequence translation to use when getting the sequence.
Definition: SamRecord.cpp:187
SamFile::COORDINATE
@ COORDINATE
file is sorted by coordinate.
Definition: SamFile.h:49
SamRecord::get0BasedPosition
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.
Definition: SamRecord.cpp:1307
BamIndex
Definition: BamIndex.h:32
SamFile::FLAG
@ FLAG
SO flag from the header indicates the sort type.
Definition: SamFile.h:48
SamFile::IsEOF
bool IsEOF()
Returns whether or not the end of the file has been reached.
Definition: SamFile.cpp:424
SamRecord::SequenceTranslation
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
Definition: SamRecord.h:57
SamRecord::get0BasedAlignmentEnd
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.
Definition: SamRecord.cpp:1455
SamFile::ReadBamIndex
bool ReadBamIndex()
Read the bam index file using the BAM filename as a base.
Definition: SamFile.cpp:328
InputFile::setAttemptRecovery
void setAttemptRecovery(bool flag=false)
Enable (default) or disable recovery.
Definition: InputFile.h:485
SamFile::myPrevCoord
int32_t myPrevCoord
Previous values used for checking if the file is sorted.
Definition: SamFile.h:404
ifseek
bool ifseek(IFILE file, int64_t offset, int origin)
Seek to the specified position (result from an iftell), but cannot be done for stdin/stdout.
Definition: InputFile.h:701
SamFile::WriteRecord
bool WriteRecord(SamFileHeader &header, SamRecord &record)
Writes the specified record into the file.
Definition: SamFile.cpp:619
SamRecord::getReferenceID
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
Definition: SamRecord.cpp:1293
String
Definition: StringBasics.h:39
SamFile::READ
@ READ
open for reading.
Definition: SamFile.h:40
InputFile::DEFAULT
@ DEFAULT
Check the extension, if it is ".gz", treat as gzip, otherwise treat it as UNCOMPRESSED.
Definition: InputFile.h:45
BamIndex::getChunksForRegion
bool getChunksForRegion(int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList)
Get the list of chunks associated with this region.
Definition: BamIndex.cpp:218
SamFile::GetStatusMessage
const char * GetStatusMessage()
Get the Status Message of the last call that sets status.
Definition: SamFile.h:213
SamFile::myIsOpenForRead
bool myIsOpenForRead
Flag to indicate if a file is open for reading.
Definition: SamFile.h:394
StatGenStatus::NO_MORE_RECS
@ NO_MORE_RECS
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
Definition: StatGenStatus.h:36
SamFile::resetFile
void resetFile()
Resets the file prepping for a new file.
Definition: SamFile.cpp:953
SamFile::getNumUnMappedReadsFromIndex
int32_t getNumUnMappedReadsFromIndex(int32_t refID)
Get the number of unmapped reads in the specified reference id.
Definition: SamFile.cpp:805
GenomeSequence
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Definition: GenomeSequence.h:100
SamFile::IsOpen
bool IsOpen()
Returns whether or not the file has been opened successfully.
Definition: SamFile.cpp:410
ifopen
IFILE ifopen(const char *filename, const char *mode, InputFile::ifileCompression compressionMode=InputFile::DEFAULT)
Open a file with the specified name and mode, using a filename of "-" to indicate stdin/stdout.
Definition: InputFile.h:562
BamIndex::getNumMappedReads
int32_t getNumMappedReads(int32_t refID)
Get the number of mapped reads for this reference id.
Definition: BamIndex.cpp:355
SamReferenceInfo::NO_REF_ID
static const int NO_REF_ID
Constant for the value returned if a reference id does not exist for a queried reference name.
Definition: SamReferenceInfo.h:77
SamFile::ReadRecord
bool ReadRecord(SamFileHeader &header, SamRecord &record)
Reads the next record from the file & stores it in the passed in record.
Definition: SamFile.cpp:501
InputFile::UNCOMPRESSED
@ UNCOMPRESSED
uncompressed file.
Definition: InputFile.h:46
SamFile::QUERY_NAME
@ QUERY_NAME
file is sorted by queryname.
Definition: SamFile.h:50
StatGenStatus::SUCCESS
@ SUCCESS
method completed successfully.
Definition: StatGenStatus.h:32
SamFileReader::SamFileReader
SamFileReader()
Default Constructor.
Definition: SamFile.cpp:1360
SamFile::OpenType
OpenType
Enum for indicating whether to open the file for read or write.
Definition: SamFile.h:39
SamFile::GenerateStatistics
void GenerateStatistics(bool genStats)
Whether or not statistics should be generated for this file.
Definition: SamFile.cpp:878
SamFile::getNumMappedReadsFromIndex
int32_t getNumMappedReadsFromIndex(int32_t refID)
Get the number of mapped reads in the specified reference id.
Definition: SamFile.cpp:790
ErrorHandler::HandlingType
HandlingType
This specifies how this class should respond to errors.
Definition: ErrorHandler.h:29
SamFile::SortedType
SortedType
Enum for indicating the type of sort expected in the file.
Definition: SamFile.h:46
SamRecord::getFlag
uint16_t getFlag()
Get the flag (FLAG).
Definition: SamRecord.cpp:1372
InputFile::isOpen
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition: InputFile.h:423
SamFile::myStatistics
SamStatistics * myStatistics
Pointer to the statistics for this file.
Definition: SamFile.h:412
SamFile::myHasHeader
bool myHasHeader
Flag to indicate if a header has been read/written - required before being able to read/write a recor...
Definition: SamFile.h:399
SamFile::SetReadFlags
void SetReadFlags(uint16_t requiredFlags, uint16_t excludedFlags)
Specify which reads should be returned by ReadRecord.
Definition: SamFile.cpp:781
BamIndex::REF_ID_ALL
static const int32_t REF_ID_ALL
The number used to indicate that all reference ids should be used.
Definition: BamIndex.h:89
StatGenStatus::FAIL_PARSE
@ FAIL_PARSE
failed to parse a record/header - invalid format.
Definition: StatGenStatus.h:42
InputFile::getFileName
const char * getFileName() const
Get the filename that is currently opened.
Definition: InputFile.h:473
SamFile::OpenForWrite
bool OpenForWrite(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for writing with the specified filename, determining SAM/BAM from the extension (...
Definition: SamFile.cpp:223
StatGenStatus::INVALID_SORT
@ INVALID_SORT
record is invalid due to it not being sorted.
Definition: StatGenStatus.h:43
ifeof
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition: InputFile.h:654
BamIndex::getNumUnMappedReads
int32_t getNumUnMappedReads(int32_t refID)
Get the number of unmapped reads for this reference id.
Definition: BamIndex.cpp:377
SamFile::SamFile
SamFile()
Default Constructor, initializes the variables, but does not open any files.
Definition: SamFile.cpp:26
StatGenStatus::setStatus
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.
Definition: StatGenStatus.cpp:83
SamFile::myRecordCount
uint32_t myRecordCount
Keep a count of the number of records that have been read/written so far.
Definition: SamFile.h:409
SamRecord::getReadName
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Definition: SamRecord.cpp:1530
StatGenStatus::Status
Status
Return value enum for StatGenFile methods.
Definition: StatGenStatus.h:32
ifrewind
void ifrewind(IFILE file)
Reset to the beginning of the file (cannot be done for stdin/stdout).
Definition: InputFile.h:642
SamFileHeader::getReferenceLabel
const String & getReferenceLabel(int id) const
Return the reference name (chromosome) for the specified reference id.
Definition: SamFileHeader.cpp:158
ifread
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition: InputFile.h:600
SamFile::myIsBamOpenForRead
bool myIsBamOpenForRead
Values for reading Sorted BAM files via the index.
Definition: SamFile.h:418
SamFile::SetReadSection
bool SetReadSection(int32_t refID)
Sets which reference id (index into the BAM list of reference information) of the BAM file should be ...
Definition: SamFile.cpp:683
SamFileHeader
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:35
SamFile::UNSORTED
@ UNSORTED
file is not sorted.
Definition: SamFile.h:47
SamFileHeader::getSortOrder
const char * getSortOrder()
Return the Sort Order value that is set in the Header, returning "" if this field does not exist.
Definition: SamFileHeader.cpp:797
SamFile::setSortedValidation
void setSortedValidation(SortedType sortType)
Set the flag to validate that the file is sorted as it is read/written.
Definition: SamFile.cpp:669
SamRecord
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52
SamFile::~SamFile
virtual ~SamFile()
Destructor.
Definition: SamFile.cpp:82
SamFile::OpenForRead
bool OpenForRead(const char *filename, SamFileHeader *header=NULL)
Open a sam/bam file for reading with the specified filename, determing the type of file and SAM/BAM b...
Definition: SamFile.cpp:93
SamStatistics
Definition: SamStatistics.h:25
SamFile::WriteHeader
bool WriteHeader(SamFileHeader &header)
Writes the specified header into the file.
Definition: SamFile.cpp:467
SamFile::GetNumOverlaps
uint32_t GetNumOverlaps(SamRecord &samRecord)
Returns the number of bases in the passed in read that overlap the region that is currently set.
Definition: SamFile.cpp:864
SamInterface
Definition: SamInterface.h:24
InputFile
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37
StatGenStatus::FAIL_IO
@ FAIL_IO
method failed due to an I/O issue.
Definition: StatGenStatus.h:37
SamRecord::getNumOverlaps
uint32_t getNumOverlaps(int32_t start, int32_t end)
Return the number of bases in this read that overlap the passed in region.
Definition: SamRecord.cpp:1841
SamFile::GetCurrentRecordCount
uint32_t GetCurrentRecordCount()
Return the number of records that have been read/written so far.
Definition: SamFile.cpp:676
StatGenStatus::FAIL_ORDER
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
Definition: StatGenStatus.h:41
SamFileWriter::SamFileWriter
SamFileWriter()
Default Constructor.
Definition: SamFile.cpp:1406
Chunk
Definition: IndexBase.h:31
SamFile::ReadHeader
bool ReadHeader(SamFileHeader &header)
Reads the header section from the file and stores it in the passed in header.
Definition: SamFile.cpp:437
SamRecord::NONE
@ NONE
Leave the sequence as is.
Definition: SamRecord.h:58
SamFile::SetReadSequenceTranslation
void SetReadSequenceTranslation(SamRecord::SequenceTranslation translation)
Set the type of sequence translation to use when reading the sequence.
Definition: SamFile.cpp:387
SamFileHeader::getReferenceID
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified reference name (chromosome).
Definition: SamFileHeader.cpp:146
InputFile::disableBuffering
void disableBuffering()
Disable read buffering.
Definition: InputFile.h:121
SamFile::GetBamIndex
const BamIndex * GetBamIndex()
Return the bam index if one has been opened.
Definition: SamFile.cpp:903
SamFile::Close
void Close()
Close the file if there is one open.
Definition: SamFile.cpp:400
SamFile::validateSortOrder
bool validateSortOrder(SamRecord &record, SamFileHeader &header)
Validate that the record is sorted compared to the previously read record if there is one,...
Definition: SamFile.cpp:1006
SamRecord::setReference
void setReference(GenomeSequence *reference)
Set the reference to the specified genome sequence object.
Definition: SamRecord.cpp:178
SamFile
Allows the user to easily read/write a SAM/BAM file.
Definition: SamFile.h:36
SamFile::SetReference
void SetReference(GenomeSequence *reference)
Sets the reference to the specified genome sequence object.
Definition: SamFile.cpp:380
BamIndex::readIndex
SamStatus::Status readIndex(const char *filename)
Definition: BamIndex.cpp:45
ifclose
int ifclose(IFILE &file)
Close the file.
Definition: InputFile.h:580
SamFile::SetWriteSequenceTranslation
void SetWriteSequenceTranslation(SamRecord::SequenceTranslation translation)
Set the type of sequence translation to use when writing the sequence.
Definition: SamFile.cpp:394
InputFile::BGZF
@ BGZF
bgzf file.
Definition: InputFile.h:48
SamFile::myStatus
SamStatus myStatus
The status of the last SamFile command.
Definition: SamFile.h:415
iftell
int64_t iftell(IFILE file)
Get current position in the file.
Definition: InputFile.h:682
BamIndex::REF_ID_UNMAPPED
static const int32_t REF_ID_UNMAPPED
The number used for the reference id of unmapped reads.
Definition: BamIndex.h:86