20#include "SamValidation.h"
21#include "CigarRoller.h"
24const char* SamValidationError::enumSeverityString[] = {
27const char* SamValidationError::enumTypeString[] = {
41 return(enumTypeString[type]);
49 mySeverity = severity;
68 return(myMessage.c_str());
74 return(enumTypeString[myType]);
80 return(enumSeverityString[mySeverity]);
89 errorString +=
") : ";
104 : myValidationErrors()
106 myErrorIter = myValidationErrors.begin();
120 std::list<const SamValidationError*>::iterator errorIter;
121 for(errorIter = myValidationErrors.begin();
122 errorIter != myValidationErrors.end(); ++errorIter)
127 myValidationErrors.clear();
128 myErrorIter = myValidationErrors.end();
134 const char* newMessage)
141 if(myValidationErrors.size() == 1)
144 myErrorIter = myValidationErrors.begin();
153 return(myValidationErrors.size());
162 if(myErrorIter == myValidationErrors.end())
168 return(*myErrorIter++);
175 myErrorIter = myValidationErrors.begin();
182 for(std::list<const SamValidationError*>::
183 const_iterator validationErrorIter =
184 myValidationErrors.begin();
185 validationErrorIter != myValidationErrors.end();
186 validationErrorIter++)
188 std::string error =
"";
189 (*validationErrorIter)->getErrorString(error);
190 errorString += error;
226 status &=
isValidTags(samRecord, validationErrors);
249 int32_t qnameLenNull = strlen(qname) + 1;
253 if(qnameLenNull != readNameLen)
258 String message =
"Invalid Query Name - the string length (";
259 message += qnameLenNull;
260 message +=
") does not match the specified query name length (";
261 message += readNameLen;
273 if((qnameLenNull < 2) || (qnameLenNull > 255))
275 String message =
"Invalid Query Name (QNAME) length: ";
276 message += qnameLenNull;
277 message +=
". Length with the terminating null must be between 2 & 255.";
289 for(
int i = 0; i < qnameLenNull; ++i)
295 message =
"Invalid character in the Query Name (QNAME): ' ' at position ";
305 message =
"Invalid character in the Query Name (QNAME): '\t' at position ";
315 message =
"Invalid character in the Query Name (QNAME): '\n' at position ";
325 message =
"Invalid character in the Query Name (QNAME): '\r' at position ";
358 if((strcmp(rname,
"*") != 0) &&
360 (samHeader.
getSQ(rname) == NULL))
364 std::string message =
"RNAME, ";
366 message +=
", was not found in a SAM Header SQ record";
386 int32_t rnameLen = strlen(rname);
394 "Reference Sequence Name (RNAME) cannot have 0 length.");
402 for(
int i = 0; i < rnameLen; ++i)
408 message =
"Invalid character in the Reference Sequence Name (RNAME): ' ' at position ";
418 message =
"Invalid character in the Reference Sequence Name (RNAME): '\t' at position ";
428 message =
"Invalid character in the Reference Sequence Name (RNAME): '\n' at position ";
438 message =
"Invalid character in the Reference Sequence Name (RNAME): '\r' at position ";
448 message =
"Invalid character in the Reference Sequence Name (RNAME): '@' at position ";
458 message =
"Invalid character in the Reference Sequence Name (RNAME): '=' at position ";
488 String message =
"Invalid Reference ID, out of range (";
490 message +=
") must be between -1 and ";
512 if((pos < 0) || (pos > 536870911))
514 String message =
"POS out of range (";
516 message +=
") must be between 0 and (2^29)-1.";
552 const char* sequence,
555 if(strcmp(sequence,
"*") != 0)
557 return(
isValidCigar(cigar, strlen(sequence), validationErrors));
576 int32_t cigarLen = strlen(cigar);
583 "Cigar must not be blank.");
587 if(strcmp(cigar,
"*") != 0)
599 if((cigarSeqLen != seqLen) && (seqLen != 0))
601 message =
"CIGAR does not evaluate to the same length as SEQ, (";
602 message += cigarSeqLen;
626 const char* sequence,
630 int seqLen = strlen(sequence);
633 if(strcmp(sequence,
"*") == 0)
649 if((seqLength != 0) && (strcmp(quality,
"*") != 0))
651 int qualLen = strlen(quality);
654 if(seqLength != qualLen)
658 String message =
"QUAL is not the same length as SEQ, (";
661 message += seqLength;
681 if(reference != NULL)
694 correctMD =
"UNKNOWN";
696 String message =
"Incorrect MD Tag, ";
697 message += *recordMD;
698 message +=
", should be ";
699 message += correctMD;
The purpose of this class is to provide accessors for setting, updating, modifying the CIGAR object....
int getExpectedQueryBaseCount() const
Return the length of the read that corresponds to the current CIGAR string.
Create/Access/Modify/Load Genome Sequences stored as binary mapped files.
Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.
int32_t get1BasedPosition()
Get the 1-based(SAM) leftmost position (POS) of the record.
int32_t getReferenceID()
Get the reference sequence id of the record (BAM format rid).
GenomeSequence * getReference()
Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it...
uint8_t getReadNameLength()
Get the length of the readname (QNAME) including the null.
uint16_t getFlag()
Get the flag (FLAG).
int32_t getReadLength()
Get the length of the read.
const String * getStringTag(const char *tag)
Get the string value for the specified tag.
const char * getCigar()
Returns the SAM formatted CIGAR string.
uint8_t getMapQuality()
Get the mapping quality (MAPQ) of the record.
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
Class for tracking the reference information mapping between the reference ids and the reference name...
int32_t getNumEntries() const
Get the number of entries contained here.
The SamValidationError class describes a validation error that occured, containing the error type,...
Type getType() const
Return the type enum of this validation error object.
const char * getSeverityString() const
Return the string representing this object's severity of validation error.
void printError() const
Print a formatted output of the error to cerr.
void getErrorString(std::string &errorString) const
Get the error string representing this object's error.
Severity
Severity of the error.
@ WARNING
Warning is used if it is just an invalid value.
@ ERROR
Error is used if parsing could not succeed.
const char * getMessage() const
Return the error message of this validation error object.
SamValidationError(Type type, Severity severity, std::string Message)
Constructor that sets the type, severity, and message for the validation error.
@ INVALID_REF_ID
Invalid reference id.
@ INVALID_TAG
Invalid tag.
@ INVALID_QNAME
Invalid read/query name.
@ INVALID_CIGAR
Invalid CIGAR.
@ INVALID_POS
Invalid position.
@ INVALID_RNAME
Invalid reference name.
@ INVALID_QUAL
Invalid base quality.
Severity getSeverity() const
Return the severity enum of this validation error object.
const char * getTypeString() const
Return the string representing this object's type of validation error.
The SamValidationErrors class is a container class that holds SamValidationError Objects,...
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
const SamValidationError * getNextError()
Return a pointer to the next error without removing it from the container, and returning null once al...
SamValidationErrors()
Constructor.
void resetErrorIter()
Reset the iterator to the begining of the errors.
void clear()
Remove all the errors from the container.
unsigned int numErrors()
Return the number of validation errors contained in this object.
void addError(SamValidationError::Type newType, SamValidationError::Severity newSeverity, const char *newMessage)
Add the specified error to this container.
~SamValidationErrors()
Destructor.
static bool isValidQname(const char *qname, uint8_t qnameLen, SamValidationErrors &validationErrors)
Determines whether or not the specified qname is valid.
static bool isValidTags(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the tags.
static bool isValidFlag(uint16_t flag, SamValidationErrors &validationErrors)
Determines whether or not the flag is valid.
static bool isValidQuality(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the base quality.
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations.
static bool isValid1BasedPos(int32_t pos, SamValidationErrors &validationErrors)
Validate the refeference position.
static bool isValidRname(SamFileHeader &samHeader, const char *rname, SamValidationErrors &validationErrors)
Validate the reference name including validating against the header.
static bool isValidRefID(int32_t refID, const SamReferenceInfo &refInfo, SamValidationErrors &validationErrors)
Validate whether or not the specified reference id is valid.
static bool isValidCigar(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the cigar.
static bool isValidMapQuality(uint8_t mapQuality, SamValidationErrors &validationErrors)
Validate the mapping quality.
static bool isValidSequence(SamRecord &samRecord, SamValidationErrors &validationErrors)
Validate the sequence, but not against the cigar or quality string.