6
|
1 // ***************************************************************************
|
|
2 // BamAlignment.h (c) 2009 Derek Barnett
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 13 December 2010 (DB)
|
|
7 // ---------------------------------------------------------------------------
|
|
8 // Provides the BamAlignment data structure
|
|
9 // ***************************************************************************
|
|
10
|
|
11 #ifndef BAMALIGNMENT_H
|
|
12 #define BAMALIGNMENT_H
|
|
13
|
|
14 #include <api_global.h>
|
|
15 #include <BamAux.h>
|
|
16 #include <string>
|
|
17 #include <vector>
|
|
18
|
|
19 namespace BamTools {
|
|
20
|
|
21 // forward declare BamAlignment's friend classes
|
|
22 namespace Internal {
|
|
23 class BamReaderPrivate;
|
|
24 class BamWriterPrivate;
|
|
25 } // namespace Internal
|
|
26
|
|
27 // BamAlignment data structure
|
|
28 // explicitly labeled as 'struct' to indicate that (most of) its fields are public
|
|
29 struct API_EXPORT BamAlignment {
|
|
30
|
|
31 // constructors & destructor
|
|
32 public:
|
|
33 BamAlignment(void);
|
|
34 BamAlignment(const BamAlignment& other);
|
|
35 ~BamAlignment(void);
|
|
36
|
|
37 // Queries against alignment flags
|
|
38 public:
|
|
39 bool IsDuplicate(void) const; // Returns true if this read is a PCR duplicate
|
|
40 bool IsFailedQC(void) const; // Returns true if this read failed quality control
|
|
41 bool IsFirstMate(void) const; // Returns true if alignment is first mate on read
|
|
42 bool IsMapped(void) const; // Returns true if alignment is mapped
|
|
43 bool IsMateMapped(void) const; // Returns true if alignment's mate is mapped
|
|
44 bool IsMateReverseStrand(void) const; // Returns true if alignment's mate mapped to reverse strand
|
|
45 bool IsPaired(void) const; // Returns true if alignment part of paired-end read
|
|
46 bool IsPrimaryAlignment(void) const; // Returns true if reported position is primary alignment
|
|
47 bool IsProperPair(void) const; // Returns true if alignment is part of read that satisfied paired-end resolution
|
|
48 bool IsReverseStrand(void) const; // Returns true if alignment mapped to reverse strand
|
|
49 bool IsSecondMate(void) const; // Returns true if alignment is second mate on read
|
|
50
|
|
51 // Manipulate alignment flags
|
|
52 public:
|
|
53 void SetIsDuplicate(bool ok); // Sets "PCR duplicate" flag
|
|
54 void SetIsFailedQC(bool ok); // Sets "failed quality control" flag
|
|
55 void SetIsFirstMate(bool ok); // Sets "alignment is first mate" flag
|
|
56 void SetIsMapped(bool ok); // Sets "alignment is mapped" flag
|
|
57 void SetIsMateMapped(bool ok); // Sets "alignment's mate is mapped" flag
|
|
58 void SetIsMateReverseStrand(bool ok); // Sets "alignment's mate mapped to reverse strand" flag
|
|
59 void SetIsPaired(bool ok); // Sets "alignment part of paired-end read" flag
|
|
60 void SetIsPrimaryAlignment(bool ok); // Sets "position is primary alignment" flag
|
|
61 void SetIsProperPair(bool ok); // Sets "alignment is part of read that satisfied paired-end resolution" flag
|
|
62 void SetIsReverseStrand(bool ok); // Sets "alignment mapped to reverse strand" flag
|
|
63 void SetIsSecondMate(bool ok); // Sets "alignment is second mate on read" flag
|
|
64
|
|
65 // legacy methods (deprecated, but available)
|
|
66 void SetIsMateUnmapped(bool ok); // Complement of IsMateMapped() flag
|
|
67 void SetIsSecondaryAlignment(bool ok); // Complement of IsPrimaryAlignment() flag
|
|
68 void SetIsUnmapped(bool ok); // Complement of IsMapped() flag
|
|
69
|
|
70 // Tag data access methods
|
|
71 public:
|
|
72 // -------------------------------------------------------------------------------------
|
|
73 // N.B. - The following tag access methods may not be used on BamAlignments fetched
|
|
74 // using BamReader::GetNextAlignmentCore(). Attempting to use them will not result in
|
|
75 // error message (to keep output clean) but will ALWAYS return false. Only user-created
|
|
76 // BamAlignments or those retrieved using BamReader::GetNextAlignment() are valid here.
|
|
77
|
|
78 // add tag data (create new TAG entry with TYPE and VALUE)
|
|
79 // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details
|
|
80 // returns true if new data added, false if error or TAG already exists
|
|
81 // N.B. - will NOT modify existing tag. Use EditTag() instead
|
|
82 // @tag - two character tag name
|
|
83 // @type - single character tag type (see SAM/BAM spec for details)
|
|
84 // @value - value to associate with tag
|
|
85 bool AddTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H
|
|
86 bool AddTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i
|
|
87 bool AddTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i
|
|
88 bool AddTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f
|
|
89
|
|
90 // edit tag data (sets existing TAG with TYPE to VALUE or adds new TAG if not already present)
|
|
91 // TYPE is one of {A, i, f, Z, H} depending on VALUE - see SAM/BAM spec for details
|
|
92 // returns true if edit was successfaul, false if error
|
|
93 // @tag - two character tag name
|
|
94 // @type - single character tag type (see SAM/BAM spec for details)
|
|
95 // @value - new value for tag
|
|
96 bool EditTag(const std::string& tag, const std::string& type, const std::string& value); // type must be Z or H
|
|
97 bool EditTag(const std::string& tag, const std::string& type, const uint32_t& value); // type must be A or i
|
|
98 bool EditTag(const std::string& tag, const std::string& type, const int32_t& value); // type must be A or i
|
|
99 bool EditTag(const std::string& tag, const std::string& type, const float& value); // type must be A, i, or f
|
|
100
|
|
101 // specific tag data access methods - these only remain for legacy support
|
|
102 // returns whether specific tag could be retrieved
|
|
103 bool GetEditDistance(uint32_t& editDistance) const; // get "NM" tag data (equivalent to GetTag("NM", editDistance))
|
|
104 bool GetReadGroup(std::string& readGroup) const; // get "RG" tag data (equivalent to GetTag("RG", readGroup))
|
|
105
|
|
106 // generic tag data access methods
|
|
107 // returns whether tag is found & tag type is compatible with DESTINATION
|
|
108 // @tag - two character tag name
|
|
109 // @destination - if found, tag value is stored here
|
|
110 bool GetTag(const std::string& tag, std::string& destination) const; // access variable-length char or hex strings
|
|
111 bool GetTag(const std::string& tag, uint32_t& destination) const; // access unsigned integer data
|
|
112 bool GetTag(const std::string& tag, int32_t& destination) const; // access signed integer data
|
|
113 bool GetTag(const std::string& tag, float& destination) const; // access floating point data
|
|
114
|
|
115 // retrieve the tag type code for TAG
|
|
116 // returns true if tag could be found and type determined
|
|
117 bool GetTagType(const std::string& tag, char& type) const;
|
|
118
|
|
119 // remove tag data
|
|
120 // returns true if removal was successful, false if error
|
|
121 // N.B. - returns false if TAG does not exist (no removal can occur)
|
|
122 // @tag - two character tag name
|
|
123 bool RemoveTag(const std::string& tag);
|
|
124
|
|
125 // Additional data access methods
|
|
126 public:
|
|
127 // calculates & returns alignment end position, based on starting position and CIGAR operations
|
|
128 // @usePadded - if true, counts inserted bases. Default is false, so that alignment end position matches the last base's position in reference
|
|
129 // @zeroBased - if true, returns 0-based coordinate; else returns 1-based. Setting this to false is useful when using BAM data along with other, half-open formats.
|
|
130 int GetEndPosition(bool usePadded = false, bool zeroBased = true) const;
|
|
131
|
|
132 // 'internal' utility methods
|
|
133 private:
|
|
134 static bool FindTag(const std::string& tag, char* &pTagData, const unsigned int& tagDataLength, unsigned int& numBytesParsed);
|
|
135 static bool SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed);
|
|
136
|
|
137 // Data members
|
|
138 public:
|
|
139 std::string Name; // Read name
|
|
140 int32_t Length; // Query length
|
|
141 std::string QueryBases; // 'Original' sequence (as reported from sequencing machine)
|
|
142 std::string AlignedBases; // 'Aligned' sequence (includes any indels, padding, clipping)
|
|
143 std::string Qualities; // FASTQ qualities (ASCII characters, not numeric values)
|
|
144 std::string TagData; // Tag data (accessor methods will pull the requested information out)
|
|
145 int32_t RefID; // ID number for reference sequence
|
|
146 int32_t Position; // Position (0-based) where alignment starts
|
|
147 uint16_t Bin; // Bin in BAM file where this alignment resides
|
|
148 uint16_t MapQuality; // Mapping quality score
|
|
149 uint32_t AlignmentFlag; // Alignment bit-flag - see Is<something>() methods to query this value, SetIs<something>() methods to manipulate
|
|
150 std::vector<CigarOp> CigarData; // CIGAR operations for this alignment
|
|
151 int32_t MateRefID; // ID number for reference sequence where alignment's mate was aligned
|
|
152 int32_t MatePosition; // Position (0-based) where alignment's mate starts
|
|
153 int32_t InsertSize; // Mate-pair insert size
|
|
154
|
|
155 // Internal data, inaccessible to client code
|
|
156 // but available BamReaderPrivate & BamWriterPrivate
|
|
157 private:
|
|
158 struct BamAlignmentSupportData {
|
|
159
|
|
160 // data members
|
|
161 std::string AllCharData;
|
|
162 uint32_t BlockLength;
|
|
163 uint32_t NumCigarOperations;
|
|
164 uint32_t QueryNameLength;
|
|
165 uint32_t QuerySequenceLength;
|
|
166 bool HasCoreOnly;
|
|
167
|
|
168 // constructor
|
|
169 BamAlignmentSupportData(void)
|
|
170 : BlockLength(0)
|
|
171 , NumCigarOperations(0)
|
|
172 , QueryNameLength(0)
|
|
173 , QuerySequenceLength(0)
|
|
174 , HasCoreOnly(false)
|
|
175 { }
|
|
176 };
|
|
177 BamAlignmentSupportData SupportData;
|
|
178 friend class Internal::BamReaderPrivate;
|
|
179 friend class Internal::BamWriterPrivate;
|
|
180
|
|
181 // Alignment flag query constants
|
|
182 // Use the get/set methods above instead
|
|
183 private:
|
|
184 enum { PAIRED = 1
|
|
185 , PROPER_PAIR = 2
|
|
186 , UNMAPPED = 4
|
|
187 , MATE_UNMAPPED = 8
|
|
188 , REVERSE = 16
|
|
189 , MATE_REVERSE = 32
|
|
190 , READ_1 = 64
|
|
191 , READ_2 = 128
|
|
192 , SECONDARY = 256
|
|
193 , QC_FAILED = 512
|
|
194 , DUPLICATE = 1024
|
|
195 };
|
|
196 };
|
|
197
|
|
198 // convenience typedef(s)
|
|
199 typedef std::vector<BamAlignment> BamAlignmentVector;
|
|
200
|
|
201 } // namespace BamTools
|
|
202
|
|
203 #endif // BAMALIGNMENT_H
|