20 #include "SamValidation.h"
21 #include "CigarRoller.h"
24 const char* SamValidationError::enumSeverityString[] = {
27 const char* SamValidationError::enumTypeString[] = {
41 return(enumTypeString[type]);
49 mySeverity = severity;
68 return(myMessage.c_str());
74 return(enumTypeString[myType]);
80 return(enumSeverityString[mySeverity]);
89 errorString +=
") : ";
104 : myValidationErrors()
106 myErrorIter = myValidationErrors.begin();
120 std::list<const SamValidationError*>::iterator errorIter;
121 for(errorIter = myValidationErrors.begin();
122 errorIter != myValidationErrors.end(); ++errorIter)
127 myValidationErrors.clear();
128 myErrorIter = myValidationErrors.end();
134 const char* newMessage)
141 if(myValidationErrors.size() == 1)
144 myErrorIter = myValidationErrors.begin();
153 return(myValidationErrors.size());
162 if(myErrorIter == myValidationErrors.end())
168 return(*myErrorIter++);
175 myErrorIter = myValidationErrors.begin();
182 for(std::list<const SamValidationError*>::
183 const_iterator validationErrorIter =
184 myValidationErrors.begin();
185 validationErrorIter != myValidationErrors.end();
186 validationErrorIter++)
188 std::string error =
"";
189 (*validationErrorIter)->getErrorString(error);
190 errorString += error;
226 status &=
isValidTags(samRecord, validationErrors);
249 int32_t qnameLenNull = strlen(qname) + 1;
253 if(qnameLenNull != readNameLen)
258 String message =
"Invalid Query Name - the string length (";
259 message += qnameLenNull;
260 message +=
") does not match the specified query name length (";
261 message += readNameLen;
273 if((qnameLenNull < 2) || (qnameLenNull > 255))
275 String message =
"Invalid Query Name (QNAME) length: ";
276 message += qnameLenNull;
277 message +=
". Length with the terminating null must be between 2 & 255.";
289 for(
int i = 0; i < qnameLenNull; ++i)
295 message =
"Invalid character in the Query Name (QNAME): ' ' at position ";
305 message =
"Invalid character in the Query Name (QNAME): '\t' at position ";
315 message =
"Invalid character in the Query Name (QNAME): '\n' at position ";
325 message =
"Invalid character in the Query Name (QNAME): '\r' at position ";
358 if((strcmp(rname,
"*") != 0) &&
360 (samHeader.
getSQ(rname) == NULL))
364 std::string message =
"RNAME, ";
366 message +=
", was not found in a SAM Header SQ record";
386 int32_t rnameLen = strlen(rname);
394 "Reference Sequence Name (RNAME) cannot have 0 length.");
402 for(
int i = 0; i < rnameLen; ++i)
408 message =
"Invalid character in the Reference Sequence Name (RNAME): ' ' at position ";
418 message =
"Invalid character in the Reference Sequence Name (RNAME): '\t' at position ";
428 message =
"Invalid character in the Reference Sequence Name (RNAME): '\n' at position ";
438 message =
"Invalid character in the Reference Sequence Name (RNAME): '\r' at position ";
448 message =
"Invalid character in the Reference Sequence Name (RNAME): '@' at position ";
458 message =
"Invalid character in the Reference Sequence Name (RNAME): '=' at position ";
488 String message =
"Invalid Reference ID, out of range (";
490 message +=
") must be between -1 and ";
512 if((pos < 0) || (pos > 536870911))
514 String message =
"POS out of range (";
516 message +=
") must be between 0 and (2^29)-1.";
552 const char* sequence,
555 return(
isValidCigar(cigar, strlen(sequence), validationErrors));
571 int32_t cigarLen = strlen(cigar);
578 "Cigar must not be blank.");
582 if(strcmp(cigar,
"*") != 0)
594 if(cigarSeqLen != seqLen)
596 message =
"CIGAR does not evaluate to the same length as SEQ, (";
597 message += cigarSeqLen;
621 const char* sequence,
625 int seqLen = strlen(sequence);
628 if(strcmp(sequence,
"*") == 0)
644 if((seqLength != 0) && (strcmp(quality,
"*") != 0))
646 int qualLen = strlen(quality);
649 if(seqLength != qualLen)
653 String message =
"QUAL is not the same length as SEQ, (";
656 message += seqLength;
676 if(reference != NULL)
689 correctMD =
"UNKNOWN";
691 String message =
"Incorrect MD Tag, ";
692 message += *recordMD;
693 message +=
", should be ";
694 message += correctMD;
Type getType() const
Return the type enum of this validation error object.
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
int32_t getNumEntries() const
Get the number of entries contained here.
@ WARNING
Warning is used if it is just an invalid value.
@ INVALID_CIGAR
Invalid CIGAR.
@ INVALID_QNAME
Invalid read/query name.
@ INVALID_QUAL
Invalid base quality.
Severity
Severity of the error.
static bool isValidCigar(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the cigar.
@ INVALID_POS
Invalid position.
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
void getErrorString(std::string &errorString) const
Get the error string representing this object's error.
Class for tracking the reference information mapping between the reference ids and the reference name...
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
The SamValidationErrors class is a container class that holds SamValidationError Objects,...
void resetErrorIter()
Reset the iterator to the begining of the errors.
~SamValidationErrors()
Destructor.
void printError() const
Print a formatted output of the error to cerr.
static bool isValidFlag(uint16_t flag, SamValidationErrors &validationErrors)
Determines whether or not the flag is valid.
SamValidationErrors()
Constructor.
GenomeSequence * getReference()
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it...
const char * getMessage() const
Return the error message of this validation error object.
const SamValidationError * getNextError()
Return a pointer to the next error without removing it from the container, and returning null once al...
const char * getSeverityString() const
Return the string representing this object's severity of validation error.
uint16_t getFlag()
Get the flag (FLAG).
void addError(SamValidationError::Type newType, SamValidationError::Severity newSeverity, const char *newMessage)
Add the specified error to this container.
@ ERROR
Error is used if parsing could not succeed.
const String * getStringTag(const char *tag)
Get the string value for the specified tag.
static bool isValidRefID(int32_t refID, const SamReferenceInfo &refInfo, SamValidationErrors &validationErrors)
Validate whether or not the specified reference id is valid.
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
static bool isValidRname(SamFileHeader &samHeader, const char *rname, SamValidationErrors &validationErrors)
Validate the reference name including validating against the header.
@ INVALID_RNAME
Invalid reference name.
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations.
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
Severity getSeverity() const
Return the severity enum of this validation error object.
The SamValidationError class describes a validation error that occured, containing the error type,...
static bool isValidMapQuality(uint8_t mapQuality, SamValidationErrors &validationErrors)
Validate the mapping quality.
int32_t getReadLength()
Get the length of the read.
SamValidationError(Type type, Severity severity, std::string Message)
Constructor that sets the type, severity, and message for the validation error.
const char * getCigar()
Returns the SAM formatted CIGAR string.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
uint8_t getReadNameLength()
Get the length of the readname (QNAME) including the null.
const char * getTypeString() const
Return the string representing this object's type of validation error.
static bool isValidQuality(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the base quality.
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
static bool isValidQname(const char *qname, uint8_t qnameLen, SamValidationErrors &validationErrors)
Determines whether or not the specified qname is valid.
void clear()
Remove all the errors from the container.
@ INVALID_REF_ID
Invalid reference id.
static bool isValidSequence(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the sequence, but not against the cigar or quality string.
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
@ INVALID_TAG
Invalid tag.
static bool isValidTags(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the tags.
int getExpectedQueryBaseCount() const
Return the length of the read that corresponds to the current CIGAR string.
static bool isValid1BasedPos(int32_t pos, SamValidationErrors &validationErrors)
Validate the refeference position.
unsigned int numErrors()
Return the number of validation errors contained in this object.