comparison spp/src/BamAlignment.cpp @ 6:ce08b0efa3fd draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:11:40 -0500
parents
children
comparison
equal deleted inserted replaced
5:608a8e0eac56 6:ce08b0efa3fd
1 // ***************************************************************************
2 // BamAlignment.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 13 December 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides the BamAlignment data structure
9 // ***************************************************************************
10
11 #include <BamAlignment.h>
12 using namespace BamTools;
13
14 #include <cctype>
15 #include <cstdio>
16 #include <cstdlib>
17 #include <cstring>
18 #include <exception>
19 #include <map>
20 #include <utility>
21 using namespace std;
22
23 // default ctor
24 BamAlignment::BamAlignment(void)
25 : RefID(-1)
26 , Position(-1)
27 , MateRefID(-1)
28 , MatePosition(-1)
29 , InsertSize(0)
30 { }
31
32 // copy ctor
33 BamAlignment::BamAlignment(const BamAlignment& other)
34 : Name(other.Name)
35 , Length(other.Length)
36 , QueryBases(other.QueryBases)
37 , AlignedBases(other.AlignedBases)
38 , Qualities(other.Qualities)
39 , TagData(other.TagData)
40 , RefID(other.RefID)
41 , Position(other.Position)
42 , Bin(other.Bin)
43 , MapQuality(other.MapQuality)
44 , AlignmentFlag(other.AlignmentFlag)
45 , CigarData(other.CigarData)
46 , MateRefID(other.MateRefID)
47 , MatePosition(other.MatePosition)
48 , InsertSize(other.InsertSize)
49 , SupportData(other.SupportData)
50 { }
51
52 // dtor
53 BamAlignment::~BamAlignment(void) { }
54
55 // Queries against alignment flags
56 bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); }
57 bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); }
58 bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); }
59 bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); }
60 bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); }
61 bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); }
62 bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); }
63 bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); }
64 bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); }
65 bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); }
66 bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); }
67
68 // Manipulate alignment flags
69 void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; }
70 void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; }
71 void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; }
72 void BamAlignment::SetIsMapped(bool ok) { SetIsUnmapped(!ok); }
73 void BamAlignment::SetIsMateMapped(bool ok) { SetIsMateUnmapped(!ok); }
74 void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; }
75 void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; }
76 void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; }
77 void BamAlignment::SetIsPrimaryAlignment(bool ok) { SetIsSecondaryAlignment(!ok); }
78 void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; }
79 void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; }
80 void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; }
81 void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; }
82 void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; }
83
84 // calculates alignment end position, based on starting position and CIGAR operations
85 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const {
86
87 // initialize alignment end to starting position
88 int alignEnd = Position;
89
90 // iterate over cigar operations
91 vector<CigarOp>::const_iterator cigarIter = CigarData.begin();
92 vector<CigarOp>::const_iterator cigarEnd = CigarData.end();
93 for ( ; cigarIter != cigarEnd; ++cigarIter) {
94 const char cigarType = (*cigarIter).Type;
95 if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' )
96 alignEnd += (*cigarIter).Length;
97 else if ( usePadded && cigarType == 'I' )
98 alignEnd += (*cigarIter).Length;
99 }
100
101 // adjust for zeroBased, if necessary
102 if (zeroBased)
103 return alignEnd - 1;
104 else
105 return alignEnd;
106 }
107
108 bool BamAlignment::AddTag(const string& tag, const string& type, const string& value) {
109
110 if ( SupportData.HasCoreOnly ) return false;
111 if ( tag.size() != 2 || type.size() != 1 ) return false;
112 if ( type != "Z" && type != "H" ) return false;
113
114 // localize the tag data
115 char* pTagData = (char*)TagData.data();
116 const unsigned int tagDataLength = TagData.size();
117 unsigned int numBytesParsed = 0;
118
119 // if tag already exists, return false
120 // use EditTag explicitly instead
121 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
122
123 // otherwise, copy tag data to temp buffer
124 string newTag = tag + type + value;
125 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term
126 char originalTagData[newTagDataLength];
127 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
128
129 // append newTag
130 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term
131
132 // store temp buffer back in TagData
133 const char* newTagData = (const char*)originalTagData;
134 TagData.assign(newTagData, newTagDataLength);
135
136 // return success
137 return true;
138 }
139
140 bool BamAlignment::AddTag(const string& tag, const string& type, const uint32_t& value) {
141
142 if ( SupportData.HasCoreOnly ) return false;
143 if ( tag.size() != 2 || type.size() != 1 ) return false;
144 if ( type == "f" || type == "Z" || type == "H" ) return false;
145
146 // localize the tag data
147 char* pTagData = (char*)TagData.data();
148 const unsigned int tagDataLength = TagData.size();
149 unsigned int numBytesParsed = 0;
150
151 // if tag already exists, return false
152 // use EditTag explicitly instead
153 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
154
155 // otherwise, convert value to string
156 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
157 un.value = value;
158
159 // copy original tag data to temp buffer
160 string newTag = tag + type;
161 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer
162 char originalTagData[newTagDataLength];
163 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
164
165 // append newTag
166 strcat(originalTagData + tagDataLength, newTag.data());
167 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int));
168
169 // store temp buffer back in TagData
170 const char* newTagData = (const char*)originalTagData;
171 TagData.assign(newTagData, newTagDataLength);
172
173 // return success
174 return true;
175 }
176
177 bool BamAlignment::AddTag(const string& tag, const string& type, const int32_t& value) {
178 return AddTag(tag, type, (const uint32_t&)value);
179 }
180
181 bool BamAlignment::AddTag(const string& tag, const string& type, const float& value) {
182
183 if ( SupportData.HasCoreOnly ) return false;
184 if ( tag.size() != 2 || type.size() != 1 ) return false;
185 if ( type == "Z" || type == "H" ) return false;
186
187 // localize the tag data
188 char* pTagData = (char*)TagData.data();
189 const unsigned int tagDataLength = TagData.size();
190 unsigned int numBytesParsed = 0;
191
192 // if tag already exists, return false
193 // use EditTag explicitly instead
194 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false;
195
196 // otherwise, convert value to string
197 union { float value; char valueBuffer[sizeof(float)]; } un;
198 un.value = value;
199
200 // copy original tag data to temp buffer
201 string newTag = tag + type;
202 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float
203 char originalTagData[newTagDataLength];
204 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term
205
206 // append newTag
207 strcat(originalTagData + tagDataLength, newTag.data());
208 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float));
209
210 // store temp buffer back in TagData
211 const char* newTagData = (const char*)originalTagData;
212 TagData.assign(newTagData, newTagDataLength);
213
214 // return success
215 return true;
216 }
217
218 bool BamAlignment::EditTag(const string& tag, const string& type, const string& value) {
219
220 if ( SupportData.HasCoreOnly ) return false;
221 if ( tag.size() != 2 || type.size() != 1 ) return false;
222 if ( type != "Z" && type != "H" ) return false;
223
224 // localize the tag data
225 char* pOriginalTagData = (char*)TagData.data();
226 char* pTagData = pOriginalTagData;
227 const unsigned int originalTagDataLength = TagData.size();
228
229 unsigned int newTagDataLength = 0;
230 unsigned int numBytesParsed = 0;
231
232 // if tag found, store data in readGroup, return success
233 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
234
235 // make sure array is more than big enough
236 char newTagData[originalTagDataLength + value.size()];
237
238 // copy original tag data up til desired tag
239 const unsigned int beginningTagDataLength = numBytesParsed;
240 newTagDataLength += beginningTagDataLength;
241 memcpy(newTagData, pOriginalTagData, numBytesParsed);
242
243 // copy new VALUE in place of current tag data
244 const unsigned int dataLength = strlen(value.c_str());
245 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 );
246
247 // skip to next tag (if tag for removal is last, return true)
248 const char* pTagStorageType = pTagData - 1;
249 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
250
251 // copy everything from current tag (the next one after tag for removal) to end
252 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
253 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1;
254 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
255 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
256
257 // ensure null-terminator
258 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
259
260 // save new tag data
261 TagData.assign(newTagData, endTagOffset + endTagDataLength);
262 return true;
263 }
264
265 // tag not found, attempt AddTag
266 else return AddTag(tag, type, value);
267 }
268
269 bool BamAlignment::EditTag(const string& tag, const string& type, const uint32_t& value) {
270
271 if ( SupportData.HasCoreOnly ) return false;
272 if ( tag.size() != 2 || type.size() != 1 ) return false;
273 if ( type == "f" || type == "Z" || type == "H" ) return false;
274
275 // localize the tag data
276 char* pOriginalTagData = (char*)TagData.data();
277 char* pTagData = pOriginalTagData;
278 const unsigned int originalTagDataLength = TagData.size();
279
280 unsigned int newTagDataLength = 0;
281 unsigned int numBytesParsed = 0;
282
283 // if tag found, store data in readGroup, return success
284 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
285
286 // make sure array is more than big enough
287 char newTagData[originalTagDataLength + sizeof(value)];
288
289 // copy original tag data up til desired tag
290 const unsigned int beginningTagDataLength = numBytesParsed;
291 newTagDataLength += beginningTagDataLength;
292 memcpy(newTagData, pOriginalTagData, numBytesParsed);
293
294 // copy new VALUE in place of current tag data
295 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un;
296 un.value = value;
297 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int));
298
299 // skip to next tag (if tag for removal is last, return true)
300 const char* pTagStorageType = pTagData - 1;
301 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
302
303 // copy everything from current tag (the next one after tag for removal) to end
304 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
305 const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int);
306 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
307 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
308
309 // ensure null-terminator
310 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
311
312 // save new tag data
313 TagData.assign(newTagData, endTagOffset + endTagDataLength);
314 return true;
315 }
316
317 // tag not found, attempt AddTag
318 else return AddTag(tag, type, value);
319 }
320
321 bool BamAlignment::EditTag(const string& tag, const string& type, const int32_t& value) {
322 return EditTag(tag, type, (const uint32_t&)value);
323 }
324
325 bool BamAlignment::EditTag(const string& tag, const string& type, const float& value) {
326
327 if ( SupportData.HasCoreOnly ) return false;
328 if ( tag.size() != 2 || type.size() != 1 ) return false;
329 if ( type == "Z" || type == "H" ) return false;
330
331 // localize the tag data
332 char* pOriginalTagData = (char*)TagData.data();
333 char* pTagData = pOriginalTagData;
334 const unsigned int originalTagDataLength = TagData.size();
335
336 unsigned int newTagDataLength = 0;
337 unsigned int numBytesParsed = 0;
338
339 // if tag found, store data in readGroup, return success
340 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
341
342 // make sure array is more than big enough
343 char newTagData[originalTagDataLength + sizeof(value)];
344
345 // copy original tag data up til desired tag
346 const unsigned int beginningTagDataLength = numBytesParsed;
347 newTagDataLength += beginningTagDataLength;
348 memcpy(newTagData, pOriginalTagData, numBytesParsed);
349
350 // copy new VALUE in place of current tag data
351 union { float value; char valueBuffer[sizeof(float)]; } un;
352 un.value = value;
353 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float));
354
355 // skip to next tag (if tag for removal is last, return true)
356 const char* pTagStorageType = pTagData - 1;
357 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
358
359 // copy everything from current tag (the next one after tag for removal) to end
360 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
361 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float);
362 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
363 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength);
364
365 // ensure null-terminator
366 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0;
367
368 // save new tag data
369 TagData.assign(newTagData, endTagOffset + endTagDataLength);
370 return true;
371 }
372
373 // tag not found, attempt AddTag
374 else return AddTag(tag, type, value);
375 }
376
377 // get "NM" tag data - originally contributed by Aaron Quinlan
378 // stores data in 'editDistance', returns success/fail
379 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const {
380 return GetTag("NM", (uint32_t&)editDistance);
381 }
382
383 // get "RG" tag data
384 // stores data in 'readGroup', returns success/fail
385 bool BamAlignment::GetReadGroup(string& readGroup) const {
386 return GetTag("RG", readGroup);
387 }
388
389 bool BamAlignment::GetTag(const string& tag, string& destination) const {
390
391 // make sure tag data exists
392 if ( SupportData.HasCoreOnly || TagData.empty() )
393 return false;
394
395 // localize the tag data
396 char* pTagData = (char*)TagData.data();
397 const unsigned int tagDataLength = TagData.size();
398 unsigned int numBytesParsed = 0;
399
400 // if tag found, store data in readGroup, return success
401 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
402 const unsigned int dataLength = strlen(pTagData);
403 destination.clear();
404 destination.resize(dataLength);
405 memcpy( (char*)destination.data(), pTagData, dataLength );
406 return true;
407 }
408
409 // tag not found, return failure
410 return false;
411 }
412
413 bool BamAlignment::GetTag(const string& tag, uint32_t& destination) const {
414
415 // make sure tag data exists
416 if ( SupportData.HasCoreOnly || TagData.empty() )
417 return false;
418
419 // localize the tag data
420 char* pTagData = (char*)TagData.data();
421 const unsigned int tagDataLength = TagData.size();
422 unsigned int numBytesParsed = 0;
423
424 // if tag found, determine data byte-length, store data in readGroup, return success
425 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
426
427 // determine data byte-length
428 const char type = *(pTagData - 1);
429 int destinationLength = 0;
430 switch (type) {
431
432 // 1 byte data
433 case 'A':
434 case 'c':
435 case 'C':
436 destinationLength = 1;
437 break;
438
439 // 2 byte data
440 case 's':
441 case 'S':
442 destinationLength = 2;
443 break;
444
445 // 4 byte data
446 case 'i':
447 case 'I':
448 destinationLength = 4;
449 break;
450
451 // unsupported type for integer destination (float or var-length strings)
452 case 'f':
453 case 'Z':
454 case 'H':
455 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
456 return false;
457
458 // unknown tag type
459 default:
460 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
461 return false;
462 }
463
464 // store in destination
465 destination = 0;
466 memcpy(&destination, pTagData, destinationLength);
467 return true;
468 }
469
470 // tag not found, return failure
471 return false;
472 }
473
474 bool BamAlignment::GetTag(const string& tag, int32_t& destination) const {
475 return GetTag(tag, (uint32_t&)destination);
476 }
477
478 bool BamAlignment::GetTag(const string& tag, float& destination) const {
479
480 // make sure tag data exists
481 if ( SupportData.HasCoreOnly || TagData.empty() )
482 return false;
483
484 // localize the tag data
485 char* pTagData = (char*)TagData.data();
486 const unsigned int tagDataLength = TagData.size();
487 unsigned int numBytesParsed = 0;
488
489 // if tag found, determine data byte-length, store data in readGroup, return success
490 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
491
492 // determine data byte-length
493 const char type = *(pTagData - 1);
494 int destinationLength = 0;
495 switch(type) {
496
497 // 1 byte data
498 case 'A':
499 case 'c':
500 case 'C':
501 destinationLength = 1;
502 break;
503
504 // 2 byte data
505 case 's':
506 case 'S':
507 destinationLength = 2;
508 break;
509
510 // 4 byte data
511 case 'f':
512 case 'i':
513 case 'I':
514 destinationLength = 4;
515 break;
516
517 // unsupported type (var-length strings)
518 case 'Z':
519 case 'H':
520 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type);
521 return false;
522
523 // unknown tag type
524 default:
525 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
526 return false;
527 }
528
529 // store in destination
530 destination = 0.0;
531 memcpy(&destination, pTagData, destinationLength);
532 return true;
533 }
534
535 // tag not found, return failure
536 return false;
537 }
538
539 bool BamAlignment::GetTagType(const string& tag, char& type) const {
540
541 // make sure tag data exists
542 if ( SupportData.HasCoreOnly || TagData.empty() )
543 return false;
544
545 // localize the tag data
546 char* pTagData = (char*)TagData.data();
547 const unsigned int tagDataLength = TagData.size();
548 unsigned int numBytesParsed = 0;
549
550 // lookup tag
551 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) {
552
553 // retrieve tag type code
554 type = *(pTagData - 1);
555
556 // validate that type is a proper BAM tag type
557 switch(type) {
558 case 'A':
559 case 'c':
560 case 'C':
561 case 's':
562 case 'S':
563 case 'f':
564 case 'i':
565 case 'I':
566 case 'Z':
567 case 'H':
568 return true;
569
570 // unknown tag type
571 default:
572 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type);
573 return false;
574 }
575 }
576
577 // tag not found, return failure
578 return false;
579 }
580
581 bool BamAlignment::RemoveTag(const string& tag) {
582
583 // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed
584 // also, return false if no data present to remove
585 if ( SupportData.HasCoreOnly || TagData.empty() ) return false;
586
587 // localize the tag data
588 char* pOriginalTagData = (char*)TagData.data();
589 char* pTagData = pOriginalTagData;
590 const unsigned int originalTagDataLength = TagData.size();
591 unsigned int newTagDataLength = 0;
592 unsigned int numBytesParsed = 0;
593
594 // if tag found, store data in readGroup, return success
595 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) {
596
597 char newTagData[originalTagDataLength];
598
599 // copy original tag data up til desired tag
600 pTagData -= 3;
601 numBytesParsed -= 3;
602 const unsigned int beginningTagDataLength = numBytesParsed;
603 newTagDataLength += beginningTagDataLength;
604 memcpy(newTagData, pOriginalTagData, numBytesParsed);
605
606 // skip to next tag (if tag for removal is last, return true)
607 const char* pTagStorageType = pTagData + 2;
608 pTagData += 3;
609 numBytesParsed += 3;
610 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true;
611
612 // copy everything from current tag (the next one after tag for removal) to end
613 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength);
614 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength;
615 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength );
616
617 // save new tag data
618 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength);
619 return true;
620 }
621
622 // tag not found, no removal - return failure
623 return false;
624 }
625
626 bool BamAlignment::FindTag(const string& tag,
627 char* &pTagData,
628 const unsigned int& tagDataLength,
629 unsigned int& numBytesParsed)
630 {
631
632 while ( numBytesParsed < tagDataLength ) {
633
634 const char* pTagType = pTagData;
635 const char* pTagStorageType = pTagData + 2;
636 pTagData += 3;
637 numBytesParsed += 3;
638
639 // check the current tag, return true on match
640 if ( strncmp(pTagType, tag.c_str(), 2) == 0 )
641 return true;
642
643 // get the storage class and find the next tag
644 if ( *pTagStorageType == '\0' ) return false;
645 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false;
646 if ( *pTagData == '\0' ) return false;
647 }
648
649 // checked all tags, none match
650 return false;
651 }
652
653 bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) {
654
655 switch(storageType) {
656
657 case 'A':
658 case 'c':
659 case 'C':
660 ++numBytesParsed;
661 ++pTagData;
662 break;
663
664 case 's':
665 case 'S':
666 numBytesParsed += 2;
667 pTagData += 2;
668 break;
669
670 case 'f':
671 case 'i':
672 case 'I':
673 numBytesParsed += 4;
674 pTagData += 4;
675 break;
676
677 case 'Z':
678 case 'H':
679 while(*pTagData) {
680 ++numBytesParsed;
681 ++pTagData;
682 }
683 // increment for null-terminator
684 ++numBytesParsed;
685 ++pTagData;
686 break;
687
688 default:
689 // error case
690 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType);
691 return false;
692 }
693
694 // return success
695 return true;
696 }