6
|
1 // ***************************************************************************
|
|
2 // BamAlignment.cpp (c) 2009 Derek Barnett
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 13 December 2010 (DB)
|
|
7 // ---------------------------------------------------------------------------
|
|
8 // Provides the BamAlignment data structure
|
|
9 // ***************************************************************************
|
|
10
|
|
11 #include <BamAlignment.h>
|
|
12 using namespace BamTools;
|
|
13
|
|
14 #include <cctype>
|
|
15 #include <cstdio>
|
|
16 #include <cstdlib>
|
|
17 #include <cstring>
|
|
18 #include <exception>
|
|
19 #include <map>
|
|
20 #include <utility>
|
|
21 using namespace std;
|
|
22
|
|
23 // default ctor
|
|
24 BamAlignment::BamAlignment(void)
|
|
25 : RefID(-1)
|
|
26 , Position(-1)
|
|
27 , MateRefID(-1)
|
|
28 , MatePosition(-1)
|
|
29 , InsertSize(0)
|
|
30 { }
|
|
31
|
|
32 // copy ctor
|
|
33 BamAlignment::BamAlignment(const BamAlignment& other)
|
|
34 : Name(other.Name)
|
|
35 , Length(other.Length)
|
|
36 , QueryBases(other.QueryBases)
|
|
37 , AlignedBases(other.AlignedBases)
|
|
38 , Qualities(other.Qualities)
|
|
39 , TagData(other.TagData)
|
|
40 , RefID(other.RefID)
|
|
41 , Position(other.Position)
|
|
42 , Bin(other.Bin)
|
|
43 , MapQuality(other.MapQuality)
|
|
44 , AlignmentFlag(other.AlignmentFlag)
|
|
45 , CigarData(other.CigarData)
|
|
46 , MateRefID(other.MateRefID)
|
|
47 , MatePosition(other.MatePosition)
|
|
48 , InsertSize(other.InsertSize)
|
|
49 , SupportData(other.SupportData)
|
|
50 { }
|
|
51
|
|
52 // dtor
|
|
53 BamAlignment::~BamAlignment(void) { }
|
|
54
|
|
55 // Queries against alignment flags
|
|
56 bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
|
|
57 bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
|
|
58 bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
|
|
59 bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
|
|
60 bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
|
|
61 bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }
|
|
62 bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
|
|
63 bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }
|
|
64 bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
|
|
65 bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
|
|
66 bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
|
|
67
|
|
68 // Manipulate alignment flags
|
|
69 void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }
|
|
70 void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }
|
|
71 void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }
|
|
72 void BamAlignment::SetIsMapped(bool ok) { SetIsUnmapped(!ok); }
|
|
73 void BamAlignment::SetIsMateMapped(bool ok) { SetIsMateUnmapped(!ok); }
|
|
74 void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
|
|
75 void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }
|
|
76 void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }
|
|
77 void BamAlignment::SetIsPrimaryAlignment(bool ok) { SetIsSecondaryAlignment(!ok); }
|
|
78 void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }
|
|
79 void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }
|
|
80 void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }
|
|
81 void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }
|
|
82 void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
|
|
83
|
|
84 // calculates alignment end position, based on starting position and CIGAR operations
|
|
85 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
|
|
86
|
|
87 // initialize alignment end to starting position
|
|
88 int alignEnd = Position;
|
|
89
|
|
90 // iterate over cigar operations
|
|
91 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
|
|
92 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
|
|
93 for ( ; cigarIter != cigarEnd; ++cigarIter) {
|
|
94 const char cigarType = (*cigarIter).Type;
|
|
95 if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
|
|
96 alignEnd += (*cigarIter).Length;
|
|
97 else if ( usePadded && cigarType == 'I' )
|
|
98 alignEnd += (*cigarIter).Length;
|
|
99 }
|
|
100
|
|
101 // adjust for zeroBased, if necessary
|
|
102 if (zeroBased)
|
|
103 return alignEnd - 1;
|
|
104 else
|
|
105 return alignEnd;
|
|
106 }
|
|
107
|
|
108 bool BamAlignment::AddTag(const string& tag, const string& type, const string& value) {
|
|
109
|
|
110 if ( SupportData.HasCoreOnly ) return false;
|
|
111 if ( tag.size() != 2 || type.size() != 1 ) return false;
|
|
112 if ( type != "Z" && type != "H" ) return false;
|
|
113
|
|
114 // localize the tag data
|
|
115 char* pTagData = (char*)TagData.data();
|
|
116 const unsigned int tagDataLength = TagData.size();
|
|
117 unsigned int numBytesParsed = 0;
|
|
118
|
|
119 // if tag already exists, return false
|
|
120 // use EditTag explicitly instead
|
|
121 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
|
|
122
|
|
123 // otherwise, copy tag data to temp buffer
|
|
124 string newTag = tag + type + value;
|
|
125 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
|
|
126 char originalTagData[newTagDataLength];
|
|
127 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
|
|
128
|
|
129 // append newTag
|
|
130 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
|
|
131
|
|
132 // store temp buffer back in TagData
|
|
133 const char* newTagData = (const char*)originalTagData;
|
|
134 TagData.assign(newTagData, newTagDataLength);
|
|
135
|
|
136 // return success
|
|
137 return true;
|
|
138 }
|
|
139
|
|
140 bool BamAlignment::AddTag(const string& tag, const string& type, const uint32_t& value) {
|
|
141
|
|
142 if ( SupportData.HasCoreOnly ) return false;
|
|
143 if ( tag.size() != 2 || type.size() != 1 ) return false;
|
|
144 if ( type == "f" || type == "Z" || type == "H" ) return false;
|
|
145
|
|
146 // localize the tag data
|
|
147 char* pTagData = (char*)TagData.data();
|
|
148 const unsigned int tagDataLength = TagData.size();
|
|
149 unsigned int numBytesParsed = 0;
|
|
150
|
|
151 // if tag already exists, return false
|
|
152 // use EditTag explicitly instead
|
|
153 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
|
|
154
|
|
155 // otherwise, convert value to string
|
|
156 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
|
|
157 un.value = value;
|
|
158
|
|
159 // copy original tag data to temp buffer
|
|
160 string newTag = tag + type;
|
|
161 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
|
|
162 char originalTagData[newTagDataLength];
|
|
163 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
|
|
164
|
|
165 // append newTag
|
|
166 strcat(originalTagData + tagDataLength, newTag.data());
|
|
167 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
|
|
168
|
|
169 // store temp buffer back in TagData
|
|
170 const char* newTagData = (const char*)originalTagData;
|
|
171 TagData.assign(newTagData, newTagDataLength);
|
|
172
|
|
173 // return success
|
|
174 return true;
|
|
175 }
|
|
176
|
|
177 bool BamAlignment::AddTag(const string& tag, const string& type, const int32_t& value) {
|
|
178 return AddTag(tag, type, (const uint32_t&)value);
|
|
179 }
|
|
180
|
|
181 bool BamAlignment::AddTag(const string& tag, const string& type, const float& value) {
|
|
182
|
|
183 if ( SupportData.HasCoreOnly ) return false;
|
|
184 if ( tag.size() != 2 || type.size() != 1 ) return false;
|
|
185 if ( type == "Z" || type == "H" ) return false;
|
|
186
|
|
187 // localize the tag data
|
|
188 char* pTagData = (char*)TagData.data();
|
|
189 const unsigned int tagDataLength = TagData.size();
|
|
190 unsigned int numBytesParsed = 0;
|
|
191
|
|
192 // if tag already exists, return false
|
|
193 // use EditTag explicitly instead
|
|
194 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
|
|
195
|
|
196 // otherwise, convert value to string
|
|
197 union { float value; char valueBuffer[sizeof(float)]; } un;
|
|
198 un.value = value;
|
|
199
|
|
200 // copy original tag data to temp buffer
|
|
201 string newTag = tag + type;
|
|
202 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
|
|
203 char originalTagData[newTagDataLength];
|
|
204 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
|
|
205
|
|
206 // append newTag
|
|
207 strcat(originalTagData + tagDataLength, newTag.data());
|
|
208 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
|
|
209
|
|
210 // store temp buffer back in TagData
|
|
211 const char* newTagData = (const char*)originalTagData;
|
|
212 TagData.assign(newTagData, newTagDataLength);
|
|
213
|
|
214 // return success
|
|
215 return true;
|
|
216 }
|
|
217
|
|
218 bool BamAlignment::EditTag(const string& tag, const string& type, const string& value) {
|
|
219
|
|
220 if ( SupportData.HasCoreOnly ) return false;
|
|
221 if ( tag.size() != 2 || type.size() != 1 ) return false;
|
|
222 if ( type != "Z" && type != "H" ) return false;
|
|
223
|
|
224 // localize the tag data
|
|
225 char* pOriginalTagData = (char*)TagData.data();
|
|
226 char* pTagData = pOriginalTagData;
|
|
227 const unsigned int originalTagDataLength = TagData.size();
|
|
228
|
|
229 unsigned int newTagDataLength = 0;
|
|
230 unsigned int numBytesParsed = 0;
|
|
231
|
|
232 // if tag found, store data in readGroup, return success
|
|
233 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
|
|
234
|
|
235 // make sure array is more than big enough
|
|
236 char newTagData[originalTagDataLength + value.size()];
|
|
237
|
|
238 // copy original tag data up til desired tag
|
|
239 const unsigned int beginningTagDataLength = numBytesParsed;
|
|
240 newTagDataLength += beginningTagDataLength;
|
|
241 memcpy(newTagData, pOriginalTagData, numBytesParsed);
|
|
242
|
|
243 // copy new VALUE in place of current tag data
|
|
244 const unsigned int dataLength = strlen(value.c_str());
|
|
245 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
|
|
246
|
|
247 // skip to next tag (if tag for removal is last, return true)
|
|
248 const char* pTagStorageType = pTagData - 1;
|
|
249 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
|
|
250
|
|
251 // copy everything from current tag (the next one after tag for removal) to end
|
|
252 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
|
|
253 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
|
|
254 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
|
|
255 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
|
|
256
|
|
257 // ensure null-terminator
|
|
258 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
|
|
259
|
|
260 // save new tag data
|
|
261 TagData.assign(newTagData, endTagOffset + endTagDataLength);
|
|
262 return true;
|
|
263 }
|
|
264
|
|
265 // tag not found, attempt AddTag
|
|
266 else return AddTag(tag, type, value);
|
|
267 }
|
|
268
|
|
269 bool BamAlignment::EditTag(const string& tag, const string& type, const uint32_t& value) {
|
|
270
|
|
271 if ( SupportData.HasCoreOnly ) return false;
|
|
272 if ( tag.size() != 2 || type.size() != 1 ) return false;
|
|
273 if ( type == "f" || type == "Z" || type == "H" ) return false;
|
|
274
|
|
275 // localize the tag data
|
|
276 char* pOriginalTagData = (char*)TagData.data();
|
|
277 char* pTagData = pOriginalTagData;
|
|
278 const unsigned int originalTagDataLength = TagData.size();
|
|
279
|
|
280 unsigned int newTagDataLength = 0;
|
|
281 unsigned int numBytesParsed = 0;
|
|
282
|
|
283 // if tag found, store data in readGroup, return success
|
|
284 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
|
|
285
|
|
286 // make sure array is more than big enough
|
|
287 char newTagData[originalTagDataLength + sizeof(value)];
|
|
288
|
|
289 // copy original tag data up til desired tag
|
|
290 const unsigned int beginningTagDataLength = numBytesParsed;
|
|
291 newTagDataLength += beginningTagDataLength;
|
|
292 memcpy(newTagData, pOriginalTagData, numBytesParsed);
|
|
293
|
|
294 // copy new VALUE in place of current tag data
|
|
295 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
|
|
296 un.value = value;
|
|
297 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
|
|
298
|
|
299 // skip to next tag (if tag for removal is last, return true)
|
|
300 const char* pTagStorageType = pTagData - 1;
|
|
301 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
|
|
302
|
|
303 // copy everything from current tag (the next one after tag for removal) to end
|
|
304 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
|
|
305 const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int);
|
|
306 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
|
|
307 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
|
|
308
|
|
309 // ensure null-terminator
|
|
310 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
|
|
311
|
|
312 // save new tag data
|
|
313 TagData.assign(newTagData, endTagOffset + endTagDataLength);
|
|
314 return true;
|
|
315 }
|
|
316
|
|
317 // tag not found, attempt AddTag
|
|
318 else return AddTag(tag, type, value);
|
|
319 }
|
|
320
|
|
321 bool BamAlignment::EditTag(const string& tag, const string& type, const int32_t& value) {
|
|
322 return EditTag(tag, type, (const uint32_t&)value);
|
|
323 }
|
|
324
|
|
325 bool BamAlignment::EditTag(const string& tag, const string& type, const float& value) {
|
|
326
|
|
327 if ( SupportData.HasCoreOnly ) return false;
|
|
328 if ( tag.size() != 2 || type.size() != 1 ) return false;
|
|
329 if ( type == "Z" || type == "H" ) return false;
|
|
330
|
|
331 // localize the tag data
|
|
332 char* pOriginalTagData = (char*)TagData.data();
|
|
333 char* pTagData = pOriginalTagData;
|
|
334 const unsigned int originalTagDataLength = TagData.size();
|
|
335
|
|
336 unsigned int newTagDataLength = 0;
|
|
337 unsigned int numBytesParsed = 0;
|
|
338
|
|
339 // if tag found, store data in readGroup, return success
|
|
340 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
|
|
341
|
|
342 // make sure array is more than big enough
|
|
343 char newTagData[originalTagDataLength + sizeof(value)];
|
|
344
|
|
345 // copy original tag data up til desired tag
|
|
346 const unsigned int beginningTagDataLength = numBytesParsed;
|
|
347 newTagDataLength += beginningTagDataLength;
|
|
348 memcpy(newTagData, pOriginalTagData, numBytesParsed);
|
|
349
|
|
350 // copy new VALUE in place of current tag data
|
|
351 union { float value; char valueBuffer[sizeof(float)]; } un;
|
|
352 un.value = value;
|
|
353 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
|
|
354
|
|
355 // skip to next tag (if tag for removal is last, return true)
|
|
356 const char* pTagStorageType = pTagData - 1;
|
|
357 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
|
|
358
|
|
359 // copy everything from current tag (the next one after tag for removal) to end
|
|
360 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
|
|
361 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
|
|
362 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
|
|
363 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
|
|
364
|
|
365 // ensure null-terminator
|
|
366 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
|
|
367
|
|
368 // save new tag data
|
|
369 TagData.assign(newTagData, endTagOffset + endTagDataLength);
|
|
370 return true;
|
|
371 }
|
|
372
|
|
373 // tag not found, attempt AddTag
|
|
374 else return AddTag(tag, type, value);
|
|
375 }
|
|
376
|
|
377 // get "NM" tag data - originally contributed by Aaron Quinlan
|
|
378 // stores data in 'editDistance', returns success/fail
|
|
379 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
|
|
380 return GetTag("NM", (uint32_t&)editDistance);
|
|
381 }
|
|
382
|
|
383 // get "RG" tag data
|
|
384 // stores data in 'readGroup', returns success/fail
|
|
385 bool BamAlignment::GetReadGroup(string& readGroup) const {
|
|
386 return GetTag("RG", readGroup);
|
|
387 }
|
|
388
|
|
389 bool BamAlignment::GetTag(const string& tag, string& destination) const {
|
|
390
|
|
391 // make sure tag data exists
|
|
392 if ( SupportData.HasCoreOnly || TagData.empty() )
|
|
393 return false;
|
|
394
|
|
395 // localize the tag data
|
|
396 char* pTagData = (char*)TagData.data();
|
|
397 const unsigned int tagDataLength = TagData.size();
|
|
398 unsigned int numBytesParsed = 0;
|
|
399
|
|
400 // if tag found, store data in readGroup, return success
|
|
401 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
|
|
402 const unsigned int dataLength = strlen(pTagData);
|
|
403 destination.clear();
|
|
404 destination.resize(dataLength);
|
|
405 memcpy( (char*)destination.data(), pTagData, dataLength );
|
|
406 return true;
|
|
407 }
|
|
408
|
|
409 // tag not found, return failure
|
|
410 return false;
|
|
411 }
|
|
412
|
|
413 bool BamAlignment::GetTag(const string& tag, uint32_t& destination) const {
|
|
414
|
|
415 // make sure tag data exists
|
|
416 if ( SupportData.HasCoreOnly || TagData.empty() )
|
|
417 return false;
|
|
418
|
|
419 // localize the tag data
|
|
420 char* pTagData = (char*)TagData.data();
|
|
421 const unsigned int tagDataLength = TagData.size();
|
|
422 unsigned int numBytesParsed = 0;
|
|
423
|
|
424 // if tag found, determine data byte-length, store data in readGroup, return success
|
|
425 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
|
|
426
|
|
427 // determine data byte-length
|
|
428 const char type = *(pTagData - 1);
|
|
429 int destinationLength = 0;
|
|
430 switch (type) {
|
|
431
|
|
432 // 1 byte data
|
|
433 case 'A':
|
|
434 case 'c':
|
|
435 case 'C':
|
|
436 destinationLength = 1;
|
|
437 break;
|
|
438
|
|
439 // 2 byte data
|
|
440 case 's':
|
|
441 case 'S':
|
|
442 destinationLength = 2;
|
|
443 break;
|
|
444
|
|
445 // 4 byte data
|
|
446 case 'i':
|
|
447 case 'I':
|
|
448 destinationLength = 4;
|
|
449 break;
|
|
450
|
|
451 // unsupported type for integer destination (float or var-length strings)
|
|
452 case 'f':
|
|
453 case 'Z':
|
|
454 case 'H':
|
|
455 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
|
|
456 return false;
|
|
457
|
|
458 // unknown tag type
|
|
459 default:
|
|
460 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
|
|
461 return false;
|
|
462 }
|
|
463
|
|
464 // store in destination
|
|
465 destination = 0;
|
|
466 memcpy(&destination, pTagData, destinationLength);
|
|
467 return true;
|
|
468 }
|
|
469
|
|
470 // tag not found, return failure
|
|
471 return false;
|
|
472 }
|
|
473
|
|
474 bool BamAlignment::GetTag(const string& tag, int32_t& destination) const {
|
|
475 return GetTag(tag, (uint32_t&)destination);
|
|
476 }
|
|
477
|
|
478 bool BamAlignment::GetTag(const string& tag, float& destination) const {
|
|
479
|
|
480 // make sure tag data exists
|
|
481 if ( SupportData.HasCoreOnly || TagData.empty() )
|
|
482 return false;
|
|
483
|
|
484 // localize the tag data
|
|
485 char* pTagData = (char*)TagData.data();
|
|
486 const unsigned int tagDataLength = TagData.size();
|
|
487 unsigned int numBytesParsed = 0;
|
|
488
|
|
489 // if tag found, determine data byte-length, store data in readGroup, return success
|
|
490 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
|
|
491
|
|
492 // determine data byte-length
|
|
493 const char type = *(pTagData - 1);
|
|
494 int destinationLength = 0;
|
|
495 switch(type) {
|
|
496
|
|
497 // 1 byte data
|
|
498 case 'A':
|
|
499 case 'c':
|
|
500 case 'C':
|
|
501 destinationLength = 1;
|
|
502 break;
|
|
503
|
|
504 // 2 byte data
|
|
505 case 's':
|
|
506 case 'S':
|
|
507 destinationLength = 2;
|
|
508 break;
|
|
509
|
|
510 // 4 byte data
|
|
511 case 'f':
|
|
512 case 'i':
|
|
513 case 'I':
|
|
514 destinationLength = 4;
|
|
515 break;
|
|
516
|
|
517 // unsupported type (var-length strings)
|
|
518 case 'Z':
|
|
519 case 'H':
|
|
520 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
|
|
521 return false;
|
|
522
|
|
523 // unknown tag type
|
|
524 default:
|
|
525 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
|
|
526 return false;
|
|
527 }
|
|
528
|
|
529 // store in destination
|
|
530 destination = 0.0;
|
|
531 memcpy(&destination, pTagData, destinationLength);
|
|
532 return true;
|
|
533 }
|
|
534
|
|
535 // tag not found, return failure
|
|
536 return false;
|
|
537 }
|
|
538
|
|
539 bool BamAlignment::GetTagType(const string& tag, char& type) const {
|
|
540
|
|
541 // make sure tag data exists
|
|
542 if ( SupportData.HasCoreOnly || TagData.empty() )
|
|
543 return false;
|
|
544
|
|
545 // localize the tag data
|
|
546 char* pTagData = (char*)TagData.data();
|
|
547 const unsigned int tagDataLength = TagData.size();
|
|
548 unsigned int numBytesParsed = 0;
|
|
549
|
|
550 // lookup tag
|
|
551 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
|
|
552
|
|
553 // retrieve tag type code
|
|
554 type = *(pTagData - 1);
|
|
555
|
|
556 // validate that type is a proper BAM tag type
|
|
557 switch(type) {
|
|
558 case 'A':
|
|
559 case 'c':
|
|
560 case 'C':
|
|
561 case 's':
|
|
562 case 'S':
|
|
563 case 'f':
|
|
564 case 'i':
|
|
565 case 'I':
|
|
566 case 'Z':
|
|
567 case 'H':
|
|
568 return true;
|
|
569
|
|
570 // unknown tag type
|
|
571 default:
|
|
572 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
|
|
573 return false;
|
|
574 }
|
|
575 }
|
|
576
|
|
577 // tag not found, return failure
|
|
578 return false;
|
|
579 }
|
|
580
|
|
581 bool BamAlignment::RemoveTag(const string& tag) {
|
|
582
|
|
583 // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
|
|
584 // also, return false if no data present to remove
|
|
585 if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
|
|
586
|
|
587 // localize the tag data
|
|
588 char* pOriginalTagData = (char*)TagData.data();
|
|
589 char* pTagData = pOriginalTagData;
|
|
590 const unsigned int originalTagDataLength = TagData.size();
|
|
591 unsigned int newTagDataLength = 0;
|
|
592 unsigned int numBytesParsed = 0;
|
|
593
|
|
594 // if tag found, store data in readGroup, return success
|
|
595 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
|
|
596
|
|
597 char newTagData[originalTagDataLength];
|
|
598
|
|
599 // copy original tag data up til desired tag
|
|
600 pTagData -= 3;
|
|
601 numBytesParsed -= 3;
|
|
602 const unsigned int beginningTagDataLength = numBytesParsed;
|
|
603 newTagDataLength += beginningTagDataLength;
|
|
604 memcpy(newTagData, pOriginalTagData, numBytesParsed);
|
|
605
|
|
606 // skip to next tag (if tag for removal is last, return true)
|
|
607 const char* pTagStorageType = pTagData + 2;
|
|
608 pTagData += 3;
|
|
609 numBytesParsed += 3;
|
|
610 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
|
|
611
|
|
612 // copy everything from current tag (the next one after tag for removal) to end
|
|
613 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
|
|
614 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
|
|
615 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
|
|
616
|
|
617 // save new tag data
|
|
618 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
|
|
619 return true;
|
|
620 }
|
|
621
|
|
622 // tag not found, no removal - return failure
|
|
623 return false;
|
|
624 }
|
|
625
|
|
626 bool BamAlignment::FindTag(const string& tag,
|
|
627 char* &pTagData,
|
|
628 const unsigned int& tagDataLength,
|
|
629 unsigned int& numBytesParsed)
|
|
630 {
|
|
631
|
|
632 while ( numBytesParsed < tagDataLength ) {
|
|
633
|
|
634 const char* pTagType = pTagData;
|
|
635 const char* pTagStorageType = pTagData + 2;
|
|
636 pTagData += 3;
|
|
637 numBytesParsed += 3;
|
|
638
|
|
639 // check the current tag, return true on match
|
|
640 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
|
|
641 return true;
|
|
642
|
|
643 // get the storage class and find the next tag
|
|
644 if ( *pTagStorageType == '\0' ) return false;
|
|
645 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
|
|
646 if ( *pTagData == '\0' ) return false;
|
|
647 }
|
|
648
|
|
649 // checked all tags, none match
|
|
650 return false;
|
|
651 }
|
|
652
|
|
653 bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
|
|
654
|
|
655 switch(storageType) {
|
|
656
|
|
657 case 'A':
|
|
658 case 'c':
|
|
659 case 'C':
|
|
660 ++numBytesParsed;
|
|
661 ++pTagData;
|
|
662 break;
|
|
663
|
|
664 case 's':
|
|
665 case 'S':
|
|
666 numBytesParsed += 2;
|
|
667 pTagData += 2;
|
|
668 break;
|
|
669
|
|
670 case 'f':
|
|
671 case 'i':
|
|
672 case 'I':
|
|
673 numBytesParsed += 4;
|
|
674 pTagData += 4;
|
|
675 break;
|
|
676
|
|
677 case 'Z':
|
|
678 case 'H':
|
|
679 while(*pTagData) {
|
|
680 ++numBytesParsed;
|
|
681 ++pTagData;
|
|
682 }
|
|
683 // increment for null-terminator
|
|
684 ++numBytesParsed;
|
|
685 ++pTagData;
|
|
686 break;
|
|
687
|
|
688 default:
|
|
689 // error case
|
|
690 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);
|
|
691 return false;
|
|
692 }
|
|
693
|
|
694 // return success
|
|
695 return true;
|
|
696 }
|