Mercurial > repos > zzhou > spp_phantompeak
diff spp/src/BamAlignment.h @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spp/src/BamAlignment.h Tue Nov 27 16:11:40 2012 -0500 @@ -0,0 +1,203 @@ +// *************************************************************************** +// BamAlignment.h (c) 2009 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 13 December 2010 (DB) +// --------------------------------------------------------------------------- +// Provides the BamAlignment data structure +// *************************************************************************** + +#ifndef BAMALIGNMENT_H +#define BAMALIGNMENT_H + +#include <api_global.h> +#include <BamAux.h> +#include <string> +#include <vector> + +namespace BamTools { + +// forward declare BamAlignment's friend classes +namespace Internal { + class BamReaderPrivate; + class BamWriterPrivate; +} // namespace Internal + +// BamAlignment data structure +// explicitly labeled as 'struct' to indicate that (most of) its fields are public +struct API_EXPORT BamAlignment { + + // constructors & destructor + public: + BamAlignment(void); + BamAlignment(const BamAlignment& other); + ~BamAlignment(void); + + // Queries against alignment flags + public: + bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate + bool IsFailedQC(void) const; // Returns true if this read failed quality control + bool IsFirstMate(void) const; // Returns true if alignment is first mate on read + bool IsMapped(void) const; // Returns true if alignment is mapped + bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped + bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand + bool IsPaired(void) const; // Returns true if alignment part of paired-end read + bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment + bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution + bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand + bool IsSecondMate(void) const; // Returns true if alignment is second mate on read + + // Manipulate alignment flags + public: + void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag + void SetIsFailedQC(bool ok); // Sets "failed quality control" flag + void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag + void SetIsMapped(bool ok); // Sets "alignment is mapped" flag + void SetIsMateMapped(bool ok); // Sets "alignment's mate is mapped" flag + void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag + void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag + void SetIsPrimaryAlignment(bool ok); // Sets "position is primary alignment" flag + void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag + void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag + void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag + + // legacy methods (deprecated, but available) + void SetIsMateUnmapped(bool ok); // Complement of IsMateMapped() flag + void SetIsSecondaryAlignment(bool ok); // Complement of IsPrimaryAlignment() flag + void SetIsUnmapped(bool ok); // Complement of IsMapped() flag + + // Tag data access methods + public: + // ------------------------------------------------------------------------------------- + // N.B. - The following tag access methods may not be used on BamAlignments fetched + // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in + // error message (to keep output clean) but will ALWAYS return false. Only user-created + // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here. + + // add tag data (create new TAG entry with TYPE and VALUE) + // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details + // returns true if new data added, false if error or TAG already exists + // N.B. - will NOT modify existing tag. Use EditTag() instead + // @tag - two character tag name + // @type - single character tag type (see SAM/BAM spec for details) + // @value - value to associate with tag + bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H + bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i + bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i + bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f + + // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present) + // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details + // returns true if edit was successfaul, false if error + // @tag - two character tag name + // @type - single character tag type (see SAM/BAM spec for details) + // @value - new value for tag + bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H + bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i + bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i + bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f + + // specific tag data access methods - these only remain for legacy support + // returns whether specific tag could be retrieved + bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (equivalent to GetTag("NM", editDistance)) + bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (equivalent to GetTag("RG", readGroup)) + + // generic tag data access methods + // returns whether tag is found & tag type is compatible with DESTINATION + // @tag - two character tag name + // @destination - if found, tag value is stored here + bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings + bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data + bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data + bool GetTag(const std::string& tag, float& destination) const; // access floating point data + + // retrieve the tag type code for TAG + // returns true if tag could be found and type determined + bool GetTagType(const std::string& tag, char& type) const; + + // remove tag data + // returns true if removal was successful, false if error + // N.B. - returns false if TAG does not exist (no removal can occur) + // @tag - two character tag name + bool RemoveTag(const std::string& tag); + + // Additional data access methods + public: + // calculates & returns alignment end position, based on starting position and CIGAR operations + // @usePadded - if true, counts inserted bases. Default is false, so that alignment end position matches the last base's position in reference + // @zeroBased - if true, returns 0-based coordinate; else returns 1-based. Setting this to false is useful when using BAM data along with other, half-open formats. + int GetEndPosition(bool usePadded = false, bool zeroBased = true) const; + + // 'internal' utility methods + private: + static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed); + static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed); + + // Data members + public: + std::string Name; // Read name + int32_t Length; // Query length + std::string QueryBases; // 'Original' sequence (as reported from sequencing machine) + std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping) + std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values) + std::string TagData; // Tag data (accessor methods will pull the requested information out) + int32_t RefID; // ID number for reference sequence + int32_t Position; // Position (0-based) where alignment starts + uint16_t Bin; // Bin in BAM file where this alignment resides + uint16_t MapQuality; // Mapping quality score + uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate + std::vector<CigarOp> CigarData; // CIGAR operations for this alignment + int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned + int32_t MatePosition; // Position (0-based) where alignment's mate starts + int32_t InsertSize; // Mate-pair insert size + + // Internal data, inaccessible to client code + // but available BamReaderPrivate & BamWriterPrivate + private: + struct BamAlignmentSupportData { + + // data members + std::string AllCharData; + uint32_t BlockLength; + uint32_t NumCigarOperations; + uint32_t QueryNameLength; + uint32_t QuerySequenceLength; + bool HasCoreOnly; + + // constructor + BamAlignmentSupportData(void) + : BlockLength(0) + , NumCigarOperations(0) + , QueryNameLength(0) + , QuerySequenceLength(0) + , HasCoreOnly(false) + { } + }; + BamAlignmentSupportData SupportData; + friend class Internal::BamReaderPrivate; + friend class Internal::BamWriterPrivate; + + // Alignment flag query constants + // Use the get/set methods above instead + private: + enum { PAIRED = 1 + , PROPER_PAIR = 2 + , UNMAPPED = 4 + , MATE_UNMAPPED = 8 + , REVERSE = 16 + , MATE_REVERSE = 32 + , READ_1 = 64 + , READ_2 = 128 + , SECONDARY = 256 + , QC_FAILED = 512 + , DUPLICATE = 1024 + }; +}; + +// convenience typedef(s) +typedef std::vector<BamAlignment> BamAlignmentVector; + +} // namespace BamTools + +#endif // BAMALIGNMENT_H