libStatGen Software  1
BamInterface.cpp
1 /*
2  * Copyright (C) 2010 Regents of the University of Michigan
3  *
4  * This program is free software: you can redistribute it and/or modify
5  * it under the terms of the GNU General Public License as published by
6  * the Free Software Foundation, either version 3 of the License, or
7  * (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "BamInterface.h"
19 #include "CharBuffer.h"
20 
21 BamInterface::BamInterface()
22 {
23 }
24 
25 
26 BamInterface::~BamInterface()
27 {
28 }
29 
30 
31 // Read a BAM file's header.
32 bool BamInterface::readHeader(IFILE filePtr, SamFileHeader& header,
33  SamStatus& status)
34 {
35  if(filePtr == NULL)
36  {
37  // File is not open, return false.
39  "Cannot read header since the file pointer is null");
40  return(false);
41  }
42  if(filePtr->isOpen() == false)
43  {
45  "Cannot read header since the file is not open");
46  return(false);
47  }
48 
49  // Clear the passed in header.
50  header.resetHeader();
51 
52  int32_t headerLength;
53  int readSize = ifread(filePtr, &headerLength, sizeof(headerLength));
54 
55  if(readSize != sizeof(headerLength))
56  {
57  String errMsg = "Failed to read the BAM header length, read ";
58  errMsg += readSize;
59  errMsg += " bytes instead of ";
60  errMsg += (unsigned int)sizeof(headerLength);
61  status.setStatus(SamStatus::FAIL_IO, errMsg.c_str());
62  return(false);
63  }
64 
65  String headerStr;
66  if(headerLength > 0)
67  {
68  // Read the header.
69  readSize =
70  ifread(filePtr, headerStr.LockBuffer(headerLength + 1), headerLength);
71  headerStr[headerLength] = 0;
72  headerStr.UnlockBuffer();
73  if(readSize != headerLength)
74  {
75  // Failed to read the header.
76  status.setStatus(SamStatus::FAIL_IO, "Failed to read the BAM header.");
77  return(false);
78  }
79  }
80 
81  // Parse the header that was read.
82  if(!header.addHeader(headerStr))
83  {
84  // Status is set in the method on failure.
86  return(false);
87  }
88 
89  int referenceCount;
90  // Read the number of references sequences.
91  ifread(filePtr, &referenceCount, sizeof(int));
92 
93  // Get and clear the reference info so it can be set
94  // from the bam reference table.
95  SamReferenceInfo& refInfo =
96  header.getReferenceInfoForBamInterface();
97  refInfo.clear();
98 
99  CharBuffer refName;
100 
101  // Read each reference sequence
102  for (int i = 0; i < referenceCount; i++)
103  {
104  int nameLength;
105  int rc;
106  // Read the length of the reference name.
107  rc = ifread(filePtr, &nameLength, sizeof(int));
108  if(rc != sizeof(int))
109  {
111  "Failed to read the BAM reference dictionary.");
112  return(false);
113  }
114 
115  // Read the name.
116  refName.readFromFile(filePtr, nameLength);
117 
118  // Read the length of the reference sequence.
119  int32_t refLen;
120  rc = ifread(filePtr, &refLen, sizeof(int));
121 
122  if(rc != sizeof(int)) {
124  "Failed to read the BAM reference dictionary.");
125  return(false);
126  }
127 
128  refInfo.add(refName.c_str(), refLen);
129  }
130 
131  // Successfully read the file.
132  return(true);
133 }
134 
135 
136 bool BamInterface::writeHeader(IFILE filePtr, SamFileHeader& header,
137  SamStatus& status)
138 {
139  if((filePtr == NULL) || (filePtr->isOpen() == false))
140  {
141  // File is not open, return false.
143  "Cannot write header since the file pointer is null");
144  return(false);
145  }
146 
147  char magic[4];
148  magic[0] = 'B';
149  magic[1] = 'A';
150  magic[2] = 'M';
151  magic[3] = 1;
152 
153  // Write magic to the file.
154  ifwrite(filePtr, magic, 4);
155 
156  ////////////////////////////////
157  // Write the header to the file.
158  ////////////////////////////////
159  // Construct a string containing the entire header.
160  std::string headerString = "";
161  header.getHeaderString(headerString);
162 
163  int32_t headerLen = headerString.length();
164  int numWrite = 0;
165 
166  // Write the header length.
167  numWrite = ifwrite(filePtr, &headerLen, sizeof(int32_t));
168  if(numWrite != sizeof(int32_t))
169  {
171  "Failed to write the BAM header length.");
172  return(false);
173  }
174 
175  // Write the header to the file.
176  numWrite = ifwrite(filePtr, headerString.c_str(), headerLen);
177  if(numWrite != headerLen)
178  {
180  "Failed to write the BAM header.");
181  return(false);
182  }
183 
184  ////////////////////////////////////////////////////////
185  // Write the Reference Information.
186  const SamReferenceInfo& refInfo = header.getReferenceInfo();
187 
188  // Get the number of sequences.
189  int32_t numSeq = refInfo.getNumEntries();
190  ifwrite(filePtr, &numSeq, sizeof(int32_t));
191 
192  // Write each reference sequence
193  for (int i = 0; i < numSeq; i++)
194  {
195  const char* refName = refInfo.getReferenceName(i);
196  // Add one for the null value.
197  int32_t nameLength = strlen(refName) + 1;
198  // Write the length of the reference name.
199  ifwrite(filePtr, &nameLength, sizeof(int32_t));
200 
201  // Write the name.
202  ifwrite(filePtr, refName, nameLength);
203  // Write the length of the reference sequence.
204  int32_t refLen = refInfo.getReferenceLength(i);
205  ifwrite(filePtr, &refLen, sizeof(int32_t));
206  }
207 
208  return(true);
209 }
210 
211 
212 void BamInterface::readRecord(IFILE filePtr, SamFileHeader& header,
213  SamRecord& record,
214  SamStatus& samStatus)
215 {
216  // TODO - need to validate there are @SQ lines in both sam/bam - MAYBE!
217 
218  // SetBufferFromFile will reset the record prior to reading a new one.
219  if(record.setBufferFromFile(filePtr, header) != SamStatus::SUCCESS)
220  {
221  // Failed, so add the error message.
222  samStatus.addError(record.getStatus());
223  }
224 }
225 
226 SamStatus::Status BamInterface::writeRecord(IFILE filePtr,
227  SamFileHeader& header,
228  SamRecord& record,
229  SamRecord::SequenceTranslation translation)
230 {
231  // Write the file, returning the status.
232  return(record.writeRecordBuffer(filePtr, translation));
233 }
234 
235 
SamReferenceInfo::clear
void clear()
Reset this reference info.
Definition: SamReferenceInfo.cpp:123
SamReferenceInfo::getNumEntries
int32_t getNumEntries() const
Get the number of entries contained here.
Definition: SamReferenceInfo.cpp:93
SamRecord::SequenceTranslation
SequenceTranslation
Enum containing the settings on how to translate the sequence if a reference is available.
Definition: SamRecord.h:57
String
Definition: StringBasics.h:39
SamReferenceInfo::getReferenceLength
int32_t getReferenceLength(int index) const
Return the reference length at the specified index, returning 0 if the index is out of bounds.
Definition: SamReferenceInfo.cpp:112
SamReferenceInfo
Class for tracking the reference information mapping between the reference ids and the reference name...
Definition: SamReferenceInfo.h:28
SamFileHeader::resetHeader
void resetHeader()
Initialize the header.
Definition: SamFileHeader.cpp:90
StatGenStatus::SUCCESS
@ SUCCESS
method completed successfully.
Definition: StatGenStatus.h:32
InputFile::isOpen
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition: InputFile.h:423
SamFileHeader::getHeaderString
bool getHeaderString(std::string &header) const
Set the passed in string to the entire header string, clearing its current contents.
Definition: SamFileHeader.cpp:131
StatGenStatus
This class is used to track the status results of some methods in the BAM classes.
Definition: StatGenStatus.h:27
StatGenStatus::FAIL_PARSE
@ FAIL_PARSE
failed to parse a record/header - invalid format.
Definition: StatGenStatus.h:42
SamFileHeader::addHeader
bool addHeader(const char *header)
Add a header that is already preformatted in a const char*.
Definition: SamFileHeader.cpp:204
SamRecord::writeRecordBuffer
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.
Definition: SamRecord.cpp:1225
SamRecord::setBufferFromFile
SamStatus::Status setBufferFromFile(IFILE filePtr, SamFileHeader &header)
Read the BAM record from a file.
Definition: SamRecord.cpp:558
StatGenStatus::setStatus
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.
Definition: StatGenStatus.cpp:83
StatGenStatus::Status
Status
Return value enum for StatGenFile methods.
Definition: StatGenStatus.h:32
SamRecord::getStatus
const SamStatus & getStatus()
Returns the status associated with the last method that sets the status.
Definition: SamRecord.cpp:2391
ifread
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition: InputFile.h:600
SamFileHeader::getReferenceInfo
const SamReferenceInfo & getReferenceInfo() const
Get the Reference Information.
Definition: SamFileHeader.cpp:165
StatGenStatus::addError
void addError(Status newStatus, const char *newMessage)
Add the specified error message to the status message, setting the status to newStatus if the current...
Definition: StatGenStatus.cpp:99
SamFileHeader
This class allows a user to get/set the fields in a SAM/BAM Header.
Definition: SamFileHeader.h:35
SamRecord
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
Definition: SamRecord.h:52
CharBuffer
Definition: CharBuffer.h:25
SamReferenceInfo::getReferenceName
const char * getReferenceName(int index) const
Return the reference name at the specified index, returning "" if the index is out of bounds.
Definition: SamReferenceInfo.cpp:100
InputFile
Class for easily reading/writing files without having to worry about file type (uncompressed,...
Definition: InputFile.h:37
StatGenStatus::FAIL_IO
@ FAIL_IO
method failed due to an I/O issue.
Definition: StatGenStatus.h:37
StatGenStatus::FAIL_ORDER
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...
Definition: StatGenStatus.h:41
ifwrite
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition: InputFile.h:669
SamFileHeader::getErrorMessage
const char * getErrorMessage()
Get the failure message if a method returned failure.
Definition: SamFileHeader.h:423
SamReferenceInfo::add
void add(const char *referenceSequenceName, int32_t referenceSequenceLength)
Add reference sequence name and reference sequence length.
Definition: SamReferenceInfo.cpp:35