18 #include "SamFileHeader.h"
19 #include "SamHeaderSQ.h"
20 #include "SamHeaderRG.h"
23 const std::string SamFileHeader::EMPTY_RETURN =
"";
25 SamFileHeader::SamFileHeader()
32 mySQs.setCaseSensitive(
true);
33 myRGs.setCaseSensitive(
true);
34 myPGs.setCaseSensitive(
true);
38 SamFileHeader::~SamFileHeader()
71 std::string newString;
73 String newHeaderString = newString.c_str();
75 status &= parseHeader(newHeaderString);
77 myCurrentHeaderIndex = header.myCurrentHeaderIndex;
78 myCurrentCommentIndex = header.myCurrentCommentIndex;
81 myReferenceInfo.
clear();
83 myReferenceInfo = header.myReferenceInfo;
92 myReferenceInfo.
clear();
102 for(
unsigned int headerIndex = 0; headerIndex < myHeaderRecords.size();
105 if(myHeaderRecords[headerIndex] != NULL)
107 delete myHeaderRecords[headerIndex];
108 myHeaderRecords[headerIndex] = NULL;
111 myHeaderRecords.clear();
136 unsigned int index = 0;
137 while(getHeaderLine(index, header) !=
false)
167 return(myReferenceInfo);
175 return(myReferenceInfo);
198 String headerString = headerLine;
199 return(parseHeader(headerString));
207 String headerString = header;
208 return(parseHeader(headerString));
215 if((comment != NULL) && (strcmp(comment, EMPTY_RETURN.c_str()) != 0))
218 myComments.push_back(comment);
234 myErrorMessage =
"SamFileHeader: Failed to allocate a new HD tag";
239 myHeaderRecords.push_back(myHD);
241 if(!myHD->
setTag(tag, value))
243 myErrorMessage =
"SamFileHeader: Failed to set the specified HD tag";
261 if(strcmp(tag,
"LN") != 0)
265 "SamFileHeader:Failed to add the specified SQ key, LN not specified.";
275 myErrorMessage =
"SamFileHeader: Failed to allocate a new SQ tag";
281 myHeaderRecords.push_back(sq);
283 myReferenceInfo.
add(name, atoi(value));
289 myErrorMessage =
"SamFileHeader:Failed to add the specified SQ key";
293 else if(strcmp(tag,
"LN") == 0)
296 myErrorMessage =
"SamFileHeader:Cannot modify/remove the SQ's LN tag";
300 if(!sq->
setTag(tag, value))
302 myErrorMessage =
"Failed to set the specified SQ tag";
324 myErrorMessage =
"Failed to allocate a new RG tag";
330 myHeaderRecords.push_back(rg);
336 myErrorMessage =
"Failed to add the specified RG key";
341 if(!rg->
setTag(tag, value))
343 myErrorMessage =
"Failed to set the specified RG tag";
366 myErrorMessage =
"Failed to allocate a new PG tag";
372 myHeaderRecords.push_back(pg);
378 myErrorMessage =
"Failed to add the specified PG key";
383 if(!pg->
setTag(tag, value))
385 myErrorMessage =
"Failed to set the specified PG tag";
399 myErrorMessage =
"Failed add an HD tag - there is already one";
404 myErrorMessage =
"Failed add an HD tag - no tag specified";
409 myHeaderRecords.push_back(myHD);
420 myErrorMessage =
"SAM/BAM Header line failed to allocate SQ.";
425 if(strcmp(name, EMPTY_RETURN.c_str()) == 0)
429 "SAM/BAM Header line failure: Skipping SQ line that is missing the SN field.";
432 if(strcmp(length, EMPTY_RETURN.c_str()) == 0)
436 "SAM/BAM Header line failure: Skipping SQ line that is missing the LN field.";
442 if(mySQs.Find(name) < 0)
446 myHeaderRecords.push_back(sq);
447 myReferenceInfo.
add(name, atoi(length));
452 myErrorMessage =
"SAM/BAM Header line failure: Skipping SQ line that has a repeated SN field.";
463 myErrorMessage =
"SAM/BAM Header line failed to allocate RG.";
467 if(strcmp(
id, EMPTY_RETURN.c_str()) == 0)
470 myErrorMessage =
"SAM/BAM Header line failure: Skipping RG line that is missing the ID field.";
476 if(myRGs.Find(
id) < 0)
481 myHeaderRecords.push_back(rg);
486 myErrorMessage =
"SAM/BAM Header line failure: Skipping RG line that has a repeated ID field.";
497 myErrorMessage =
"SAM/BAM Header line failed to allocate PG.";
501 if(strcmp(
id, EMPTY_RETURN.c_str()) == 0)
504 myErrorMessage =
"SAM/BAM Header line failure: Skipping PG line that is missing the ID field.";
510 if(myPGs.Find(
id) < 0)
515 myHeaderRecords.push_back(pg);
520 myErrorMessage =
"SAM/BAM Header line failure: Skipping PG line that has a repeated ID field.";
529 bool returnVal =
true;
545 myErrorMessage =
"Failed to copy a header record, unknown type.";
576 int hashIndex = mySQs.Find(name);
592 myErrorMessage =
"SAM/BAM Header line failed to get SQ object.";
603 mySQs.Delete(hashIndex);
613 int hashIndex = myRGs.Find(
id);
629 myErrorMessage =
"SAM/BAM Header line failed to get RG object.";
640 myRGs.Delete(hashIndex);
650 int hashIndex = myPGs.Find(
id);
666 myErrorMessage =
"SAM/BAM Header line failed to get PG object.";
677 myPGs.Delete(hashIndex);
688 return(EMPTY_RETURN.c_str());
704 return(EMPTY_RETURN.c_str());
722 return(EMPTY_RETURN.c_str());
738 return(EMPTY_RETURN.c_str());
749 return(mySQs.Entries());
756 return(myRGs.Entries());
763 return(myPGs.Entries());
802 return(EMPTY_RETURN.c_str());
804 return(myHD->getSortOrder());
846 myCurrentSQIndex = 0;
854 myCurrentRGIndex = 0;
862 myCurrentPGIndex = 0;
877 while((index < myHeaderRecords.size())
878 && (foundRecord == NULL))
881 foundRecord = myHeaderRecords[index];
891 else if(foundRecord->
getType() != headerType)
914 while((myCurrentHeaderIndex < myHeaderRecords.size())
915 && (foundRecord == NULL))
918 foundRecord = myHeaderRecords[myCurrentHeaderIndex];
920 ++myCurrentHeaderIndex;
943 headerLine = EMPTY_RETURN.c_str();
947 while(headerLine == EMPTY_RETURN.c_str())
949 if(getHeaderLine(myCurrentHeaderIndex, headerLine) ==
false)
957 ++myCurrentHeaderIndex;
968 myCurrentHeaderIndex = 0;
974 for(
unsigned int i = 0; i < myComments.size(); i++)
976 commentLines +=
"@CO\t";;
977 commentLines += myComments[i];
978 commentLines +=
"\n";
987 if(myCurrentCommentIndex < myComments.size())
989 return(myComments[myCurrentCommentIndex++].c_str());
992 return(EMPTY_RETURN.c_str());
1000 myCurrentCommentIndex = 0;
1005 bool SamFileHeader::parseHeader(
String& header)
1007 std::string errorMessage =
"";
1012 std::vector<String>* types = header.Split(
'\n');
1015 for(uint32_t index = 0; index < types->size(); index++)
1018 if(!parseHeaderLine(types->at(index)))
1020 errorMessage += myErrorMessage;
1021 errorMessage +=
"\n";
1035 myErrorMessage = errorMessage;
1036 if((numErrors > 0) && (numValid == 0))
1039 std::cerr << numErrors
1040 <<
" invalid SAM/BAM Header lines were skipped due to:\n"
1041 << errorMessage << std::endl;
1044 else if(numErrors > 0)
1048 std::cerr << numErrors
1049 <<
" invalid SAM/BAM Header lines were skipped due to:\n"
1050 << errorMessage << std::endl;
1058 bool SamFileHeader::parseHeaderLine(
const String& headerLine)
1061 if((headerLine.Length() >= 4) && (headerLine[0] ==
'@') &&
1062 (headerLine[1] ==
'C') && (headerLine[2] ==
'O') &&
1063 (headerLine[3] ==
'\t'))
1066 String comment = headerLine.SubStr(4);
1073 tokens.ReplaceColumns(headerLine,
'\t');
1075 if(tokens.Length() < 1)
1082 if((tokens[0].Length() != 3) || (tokens[0][0] !=
'@'))
1086 myErrorMessage =
"SAM/BAM Header line does not start with @ & at least 2 chars.";
1091 if(tokens[0] ==
"@HD")
1100 myErrorMessage =
"SAM/BAM Header line failed to allocate HD.";
1103 myHeaderRecords.push_back(myHD);
1106 myErrorMessage =
"SAM/BAM Header line failed to store HD record.";
1113 myErrorMessage =
"SAM/BAM Header line failure: multiple HD records.";
1117 else if(tokens[0] ==
"@SQ")
1127 status &=
addSQ(sq);
1131 myErrorMessage =
"SAM/BAM Header line failed to store SQ record.";
1135 else if(tokens[0] ==
"@RG")
1145 status &=
addRG(rg);
1149 myErrorMessage =
"SAM/BAM Header line failed to store RG record.";
1153 else if(tokens[0] ==
"@PG")
1163 status &=
addPG(pg);
1167 myErrorMessage =
"SAM/BAM Header line failed to store PG record.";
1175 "SAM/BAM Header line failure: Skipping unknown header type, ";
1176 myErrorMessage += (
const char*)(tokens[0]);
1187 bool SamFileHeader::getHeaderLine(
unsigned int index, std::string& header)
const
1190 if(index < myHeaderRecords.size())
1200 unsigned int commentIndex = index - myHeaderRecords.size();
1202 if(commentIndex < myComments.size())
1207 header += myComments[commentIndex];