Mercurial > repos > zzhou > spp_phantompeak
comparison spp/src/BamAlignment.cpp @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:608a8e0eac56 | 6:ce08b0efa3fd |
---|---|
1 // *************************************************************************** | |
2 // BamAlignment.cpp (c) 2009 Derek Barnett | |
3 // Marth Lab, Department of Biology, Boston College | |
4 // All rights reserved. | |
5 // --------------------------------------------------------------------------- | |
6 // Last modified: 13 December 2010 (DB) | |
7 // --------------------------------------------------------------------------- | |
8 // Provides the BamAlignment data structure | |
9 // *************************************************************************** | |
10 | |
11 #include <BamAlignment.h> | |
12 using namespace BamTools; | |
13 | |
14 #include <cctype> | |
15 #include <cstdio> | |
16 #include <cstdlib> | |
17 #include <cstring> | |
18 #include <exception> | |
19 #include <map> | |
20 #include <utility> | |
21 using namespace std; | |
22 | |
23 // default ctor | |
24 BamAlignment::BamAlignment(void) | |
25 : RefID(-1) | |
26 , Position(-1) | |
27 , MateRefID(-1) | |
28 , MatePosition(-1) | |
29 , InsertSize(0) | |
30 { } | |
31 | |
32 // copy ctor | |
33 BamAlignment::BamAlignment(const BamAlignment& other) | |
34 : Name(other.Name) | |
35 , Length(other.Length) | |
36 , QueryBases(other.QueryBases) | |
37 , AlignedBases(other.AlignedBases) | |
38 , Qualities(other.Qualities) | |
39 , TagData(other.TagData) | |
40 , RefID(other.RefID) | |
41 , Position(other.Position) | |
42 , Bin(other.Bin) | |
43 , MapQuality(other.MapQuality) | |
44 , AlignmentFlag(other.AlignmentFlag) | |
45 , CigarData(other.CigarData) | |
46 , MateRefID(other.MateRefID) | |
47 , MatePosition(other.MatePosition) | |
48 , InsertSize(other.InsertSize) | |
49 , SupportData(other.SupportData) | |
50 { } | |
51 | |
52 // dtor | |
53 BamAlignment::~BamAlignment(void) { } | |
54 | |
55 // Queries against alignment flags | |
56 bool BamAlignment::IsDuplicate(void) const { return ( (AlignmentFlag & DUPLICATE) != 0 ); } | |
57 bool BamAlignment::IsFailedQC(void) const { return ( (AlignmentFlag & QC_FAILED) != 0 ); } | |
58 bool BamAlignment::IsFirstMate(void) const { return ( (AlignmentFlag & READ_1) != 0 ); } | |
59 bool BamAlignment::IsMapped(void) const { return ( (AlignmentFlag & UNMAPPED) == 0 ); } | |
60 bool BamAlignment::IsMateMapped(void) const { return ( (AlignmentFlag & MATE_UNMAPPED) == 0 ); } | |
61 bool BamAlignment::IsMateReverseStrand(void) const { return ( (AlignmentFlag & MATE_REVERSE) != 0 ); } | |
62 bool BamAlignment::IsPaired(void) const { return ( (AlignmentFlag & PAIRED) != 0 ); } | |
63 bool BamAlignment::IsPrimaryAlignment(void) const { return ( (AlignmentFlag & SECONDARY) == 0 ); } | |
64 bool BamAlignment::IsProperPair(void) const { return ( (AlignmentFlag & PROPER_PAIR) != 0 ); } | |
65 bool BamAlignment::IsReverseStrand(void) const { return ( (AlignmentFlag & REVERSE) != 0 ); } | |
66 bool BamAlignment::IsSecondMate(void) const { return ( (AlignmentFlag & READ_2) != 0 ); } | |
67 | |
68 // Manipulate alignment flags | |
69 void BamAlignment::SetIsDuplicate(bool ok) { if (ok) AlignmentFlag |= DUPLICATE; else AlignmentFlag &= ~DUPLICATE; } | |
70 void BamAlignment::SetIsFailedQC(bool ok) { if (ok) AlignmentFlag |= QC_FAILED; else AlignmentFlag &= ~QC_FAILED; } | |
71 void BamAlignment::SetIsFirstMate(bool ok) { if (ok) AlignmentFlag |= READ_1; else AlignmentFlag &= ~READ_1; } | |
72 void BamAlignment::SetIsMapped(bool ok) { SetIsUnmapped(!ok); } | |
73 void BamAlignment::SetIsMateMapped(bool ok) { SetIsMateUnmapped(!ok); } | |
74 void BamAlignment::SetIsMateUnmapped(bool ok) { if (ok) AlignmentFlag |= MATE_UNMAPPED; else AlignmentFlag &= ~MATE_UNMAPPED; } | |
75 void BamAlignment::SetIsMateReverseStrand(bool ok) { if (ok) AlignmentFlag |= MATE_REVERSE; else AlignmentFlag &= ~MATE_REVERSE; } | |
76 void BamAlignment::SetIsPaired(bool ok) { if (ok) AlignmentFlag |= PAIRED; else AlignmentFlag &= ~PAIRED; } | |
77 void BamAlignment::SetIsPrimaryAlignment(bool ok) { SetIsSecondaryAlignment(!ok); } | |
78 void BamAlignment::SetIsProperPair(bool ok) { if (ok) AlignmentFlag |= PROPER_PAIR; else AlignmentFlag &= ~PROPER_PAIR; } | |
79 void BamAlignment::SetIsReverseStrand(bool ok) { if (ok) AlignmentFlag |= REVERSE; else AlignmentFlag &= ~REVERSE; } | |
80 void BamAlignment::SetIsSecondaryAlignment(bool ok) { if (ok) AlignmentFlag |= SECONDARY; else AlignmentFlag &= ~SECONDARY; } | |
81 void BamAlignment::SetIsSecondMate(bool ok) { if (ok) AlignmentFlag |= READ_2; else AlignmentFlag &= ~READ_2; } | |
82 void BamAlignment::SetIsUnmapped(bool ok) { if (ok) AlignmentFlag |= UNMAPPED; else AlignmentFlag &= ~UNMAPPED; } | |
83 | |
84 // calculates alignment end position, based on starting position and CIGAR operations | |
85 int BamAlignment::GetEndPosition(bool usePadded, bool zeroBased) const { | |
86 | |
87 // initialize alignment end to starting position | |
88 int alignEnd = Position; | |
89 | |
90 // iterate over cigar operations | |
91 vector<CigarOp>::const_iterator cigarIter = CigarData.begin(); | |
92 vector<CigarOp>::const_iterator cigarEnd = CigarData.end(); | |
93 for ( ; cigarIter != cigarEnd; ++cigarIter) { | |
94 const char cigarType = (*cigarIter).Type; | |
95 if ( cigarType == 'M' || cigarType == 'D' || cigarType == 'N' ) | |
96 alignEnd += (*cigarIter).Length; | |
97 else if ( usePadded && cigarType == 'I' ) | |
98 alignEnd += (*cigarIter).Length; | |
99 } | |
100 | |
101 // adjust for zeroBased, if necessary | |
102 if (zeroBased) | |
103 return alignEnd - 1; | |
104 else | |
105 return alignEnd; | |
106 } | |
107 | |
108 bool BamAlignment::AddTag(const string& tag, const string& type, const string& value) { | |
109 | |
110 if ( SupportData.HasCoreOnly ) return false; | |
111 if ( tag.size() != 2 || type.size() != 1 ) return false; | |
112 if ( type != "Z" && type != "H" ) return false; | |
113 | |
114 // localize the tag data | |
115 char* pTagData = (char*)TagData.data(); | |
116 const unsigned int tagDataLength = TagData.size(); | |
117 unsigned int numBytesParsed = 0; | |
118 | |
119 // if tag already exists, return false | |
120 // use EditTag explicitly instead | |
121 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; | |
122 | |
123 // otherwise, copy tag data to temp buffer | |
124 string newTag = tag + type + value; | |
125 const int newTagDataLength = tagDataLength + newTag.size() + 1; // leave room for null-term | |
126 char originalTagData[newTagDataLength]; | |
127 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term | |
128 | |
129 // append newTag | |
130 strcat(originalTagData + tagDataLength, newTag.data()); // removes original null-term, appends newTag + null-term | |
131 | |
132 // store temp buffer back in TagData | |
133 const char* newTagData = (const char*)originalTagData; | |
134 TagData.assign(newTagData, newTagDataLength); | |
135 | |
136 // return success | |
137 return true; | |
138 } | |
139 | |
140 bool BamAlignment::AddTag(const string& tag, const string& type, const uint32_t& value) { | |
141 | |
142 if ( SupportData.HasCoreOnly ) return false; | |
143 if ( tag.size() != 2 || type.size() != 1 ) return false; | |
144 if ( type == "f" || type == "Z" || type == "H" ) return false; | |
145 | |
146 // localize the tag data | |
147 char* pTagData = (char*)TagData.data(); | |
148 const unsigned int tagDataLength = TagData.size(); | |
149 unsigned int numBytesParsed = 0; | |
150 | |
151 // if tag already exists, return false | |
152 // use EditTag explicitly instead | |
153 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; | |
154 | |
155 // otherwise, convert value to string | |
156 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un; | |
157 un.value = value; | |
158 | |
159 // copy original tag data to temp buffer | |
160 string newTag = tag + type; | |
161 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new integer | |
162 char originalTagData[newTagDataLength]; | |
163 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term | |
164 | |
165 // append newTag | |
166 strcat(originalTagData + tagDataLength, newTag.data()); | |
167 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(unsigned int)); | |
168 | |
169 // store temp buffer back in TagData | |
170 const char* newTagData = (const char*)originalTagData; | |
171 TagData.assign(newTagData, newTagDataLength); | |
172 | |
173 // return success | |
174 return true; | |
175 } | |
176 | |
177 bool BamAlignment::AddTag(const string& tag, const string& type, const int32_t& value) { | |
178 return AddTag(tag, type, (const uint32_t&)value); | |
179 } | |
180 | |
181 bool BamAlignment::AddTag(const string& tag, const string& type, const float& value) { | |
182 | |
183 if ( SupportData.HasCoreOnly ) return false; | |
184 if ( tag.size() != 2 || type.size() != 1 ) return false; | |
185 if ( type == "Z" || type == "H" ) return false; | |
186 | |
187 // localize the tag data | |
188 char* pTagData = (char*)TagData.data(); | |
189 const unsigned int tagDataLength = TagData.size(); | |
190 unsigned int numBytesParsed = 0; | |
191 | |
192 // if tag already exists, return false | |
193 // use EditTag explicitly instead | |
194 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) return false; | |
195 | |
196 // otherwise, convert value to string | |
197 union { float value; char valueBuffer[sizeof(float)]; } un; | |
198 un.value = value; | |
199 | |
200 // copy original tag data to temp buffer | |
201 string newTag = tag + type; | |
202 const int newTagDataLength = tagDataLength + newTag.size() + 4; // leave room for new float | |
203 char originalTagData[newTagDataLength]; | |
204 memcpy(originalTagData, TagData.c_str(), tagDataLength + 1); // '+1' for TagData null-term | |
205 | |
206 // append newTag | |
207 strcat(originalTagData + tagDataLength, newTag.data()); | |
208 memcpy(originalTagData + tagDataLength + newTag.size(), un.valueBuffer, sizeof(float)); | |
209 | |
210 // store temp buffer back in TagData | |
211 const char* newTagData = (const char*)originalTagData; | |
212 TagData.assign(newTagData, newTagDataLength); | |
213 | |
214 // return success | |
215 return true; | |
216 } | |
217 | |
218 bool BamAlignment::EditTag(const string& tag, const string& type, const string& value) { | |
219 | |
220 if ( SupportData.HasCoreOnly ) return false; | |
221 if ( tag.size() != 2 || type.size() != 1 ) return false; | |
222 if ( type != "Z" && type != "H" ) return false; | |
223 | |
224 // localize the tag data | |
225 char* pOriginalTagData = (char*)TagData.data(); | |
226 char* pTagData = pOriginalTagData; | |
227 const unsigned int originalTagDataLength = TagData.size(); | |
228 | |
229 unsigned int newTagDataLength = 0; | |
230 unsigned int numBytesParsed = 0; | |
231 | |
232 // if tag found, store data in readGroup, return success | |
233 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { | |
234 | |
235 // make sure array is more than big enough | |
236 char newTagData[originalTagDataLength + value.size()]; | |
237 | |
238 // copy original tag data up til desired tag | |
239 const unsigned int beginningTagDataLength = numBytesParsed; | |
240 newTagDataLength += beginningTagDataLength; | |
241 memcpy(newTagData, pOriginalTagData, numBytesParsed); | |
242 | |
243 // copy new VALUE in place of current tag data | |
244 const unsigned int dataLength = strlen(value.c_str()); | |
245 memcpy(newTagData + beginningTagDataLength, (char*)value.c_str(), dataLength+1 ); | |
246 | |
247 // skip to next tag (if tag for removal is last, return true) | |
248 const char* pTagStorageType = pTagData - 1; | |
249 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; | |
250 | |
251 // copy everything from current tag (the next one after tag for removal) to end | |
252 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength); | |
253 const unsigned int endTagOffset = beginningTagDataLength + dataLength + 1; | |
254 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength; | |
255 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength); | |
256 | |
257 // ensure null-terminator | |
258 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0; | |
259 | |
260 // save new tag data | |
261 TagData.assign(newTagData, endTagOffset + endTagDataLength); | |
262 return true; | |
263 } | |
264 | |
265 // tag not found, attempt AddTag | |
266 else return AddTag(tag, type, value); | |
267 } | |
268 | |
269 bool BamAlignment::EditTag(const string& tag, const string& type, const uint32_t& value) { | |
270 | |
271 if ( SupportData.HasCoreOnly ) return false; | |
272 if ( tag.size() != 2 || type.size() != 1 ) return false; | |
273 if ( type == "f" || type == "Z" || type == "H" ) return false; | |
274 | |
275 // localize the tag data | |
276 char* pOriginalTagData = (char*)TagData.data(); | |
277 char* pTagData = pOriginalTagData; | |
278 const unsigned int originalTagDataLength = TagData.size(); | |
279 | |
280 unsigned int newTagDataLength = 0; | |
281 unsigned int numBytesParsed = 0; | |
282 | |
283 // if tag found, store data in readGroup, return success | |
284 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { | |
285 | |
286 // make sure array is more than big enough | |
287 char newTagData[originalTagDataLength + sizeof(value)]; | |
288 | |
289 // copy original tag data up til desired tag | |
290 const unsigned int beginningTagDataLength = numBytesParsed; | |
291 newTagDataLength += beginningTagDataLength; | |
292 memcpy(newTagData, pOriginalTagData, numBytesParsed); | |
293 | |
294 // copy new VALUE in place of current tag data | |
295 union { unsigned int value; char valueBuffer[sizeof(unsigned int)]; } un; | |
296 un.value = value; | |
297 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(unsigned int)); | |
298 | |
299 // skip to next tag (if tag for removal is last, return true) | |
300 const char* pTagStorageType = pTagData - 1; | |
301 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; | |
302 | |
303 // copy everything from current tag (the next one after tag for removal) to end | |
304 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength); | |
305 const unsigned int endTagOffset = beginningTagDataLength + sizeof(unsigned int); | |
306 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength; | |
307 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength); | |
308 | |
309 // ensure null-terminator | |
310 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0; | |
311 | |
312 // save new tag data | |
313 TagData.assign(newTagData, endTagOffset + endTagDataLength); | |
314 return true; | |
315 } | |
316 | |
317 // tag not found, attempt AddTag | |
318 else return AddTag(tag, type, value); | |
319 } | |
320 | |
321 bool BamAlignment::EditTag(const string& tag, const string& type, const int32_t& value) { | |
322 return EditTag(tag, type, (const uint32_t&)value); | |
323 } | |
324 | |
325 bool BamAlignment::EditTag(const string& tag, const string& type, const float& value) { | |
326 | |
327 if ( SupportData.HasCoreOnly ) return false; | |
328 if ( tag.size() != 2 || type.size() != 1 ) return false; | |
329 if ( type == "Z" || type == "H" ) return false; | |
330 | |
331 // localize the tag data | |
332 char* pOriginalTagData = (char*)TagData.data(); | |
333 char* pTagData = pOriginalTagData; | |
334 const unsigned int originalTagDataLength = TagData.size(); | |
335 | |
336 unsigned int newTagDataLength = 0; | |
337 unsigned int numBytesParsed = 0; | |
338 | |
339 // if tag found, store data in readGroup, return success | |
340 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { | |
341 | |
342 // make sure array is more than big enough | |
343 char newTagData[originalTagDataLength + sizeof(value)]; | |
344 | |
345 // copy original tag data up til desired tag | |
346 const unsigned int beginningTagDataLength = numBytesParsed; | |
347 newTagDataLength += beginningTagDataLength; | |
348 memcpy(newTagData, pOriginalTagData, numBytesParsed); | |
349 | |
350 // copy new VALUE in place of current tag data | |
351 union { float value; char valueBuffer[sizeof(float)]; } un; | |
352 un.value = value; | |
353 memcpy(newTagData + beginningTagDataLength, un.valueBuffer, sizeof(float)); | |
354 | |
355 // skip to next tag (if tag for removal is last, return true) | |
356 const char* pTagStorageType = pTagData - 1; | |
357 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; | |
358 | |
359 // copy everything from current tag (the next one after tag for removal) to end | |
360 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength); | |
361 const unsigned int endTagOffset = beginningTagDataLength + sizeof(float); | |
362 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength; | |
363 memcpy(newTagData + endTagOffset, pTagData, endTagDataLength); | |
364 | |
365 // ensure null-terminator | |
366 newTagData[ endTagOffset + endTagDataLength + 1 ] = 0; | |
367 | |
368 // save new tag data | |
369 TagData.assign(newTagData, endTagOffset + endTagDataLength); | |
370 return true; | |
371 } | |
372 | |
373 // tag not found, attempt AddTag | |
374 else return AddTag(tag, type, value); | |
375 } | |
376 | |
377 // get "NM" tag data - originally contributed by Aaron Quinlan | |
378 // stores data in 'editDistance', returns success/fail | |
379 bool BamAlignment::GetEditDistance(uint32_t& editDistance) const { | |
380 return GetTag("NM", (uint32_t&)editDistance); | |
381 } | |
382 | |
383 // get "RG" tag data | |
384 // stores data in 'readGroup', returns success/fail | |
385 bool BamAlignment::GetReadGroup(string& readGroup) const { | |
386 return GetTag("RG", readGroup); | |
387 } | |
388 | |
389 bool BamAlignment::GetTag(const string& tag, string& destination) const { | |
390 | |
391 // make sure tag data exists | |
392 if ( SupportData.HasCoreOnly || TagData.empty() ) | |
393 return false; | |
394 | |
395 // localize the tag data | |
396 char* pTagData = (char*)TagData.data(); | |
397 const unsigned int tagDataLength = TagData.size(); | |
398 unsigned int numBytesParsed = 0; | |
399 | |
400 // if tag found, store data in readGroup, return success | |
401 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { | |
402 const unsigned int dataLength = strlen(pTagData); | |
403 destination.clear(); | |
404 destination.resize(dataLength); | |
405 memcpy( (char*)destination.data(), pTagData, dataLength ); | |
406 return true; | |
407 } | |
408 | |
409 // tag not found, return failure | |
410 return false; | |
411 } | |
412 | |
413 bool BamAlignment::GetTag(const string& tag, uint32_t& destination) const { | |
414 | |
415 // make sure tag data exists | |
416 if ( SupportData.HasCoreOnly || TagData.empty() ) | |
417 return false; | |
418 | |
419 // localize the tag data | |
420 char* pTagData = (char*)TagData.data(); | |
421 const unsigned int tagDataLength = TagData.size(); | |
422 unsigned int numBytesParsed = 0; | |
423 | |
424 // if tag found, determine data byte-length, store data in readGroup, return success | |
425 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { | |
426 | |
427 // determine data byte-length | |
428 const char type = *(pTagData - 1); | |
429 int destinationLength = 0; | |
430 switch (type) { | |
431 | |
432 // 1 byte data | |
433 case 'A': | |
434 case 'c': | |
435 case 'C': | |
436 destinationLength = 1; | |
437 break; | |
438 | |
439 // 2 byte data | |
440 case 's': | |
441 case 'S': | |
442 destinationLength = 2; | |
443 break; | |
444 | |
445 // 4 byte data | |
446 case 'i': | |
447 case 'I': | |
448 destinationLength = 4; | |
449 break; | |
450 | |
451 // unsupported type for integer destination (float or var-length strings) | |
452 case 'f': | |
453 case 'Z': | |
454 case 'H': | |
455 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type); | |
456 return false; | |
457 | |
458 // unknown tag type | |
459 default: | |
460 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type); | |
461 return false; | |
462 } | |
463 | |
464 // store in destination | |
465 destination = 0; | |
466 memcpy(&destination, pTagData, destinationLength); | |
467 return true; | |
468 } | |
469 | |
470 // tag not found, return failure | |
471 return false; | |
472 } | |
473 | |
474 bool BamAlignment::GetTag(const string& tag, int32_t& destination) const { | |
475 return GetTag(tag, (uint32_t&)destination); | |
476 } | |
477 | |
478 bool BamAlignment::GetTag(const string& tag, float& destination) const { | |
479 | |
480 // make sure tag data exists | |
481 if ( SupportData.HasCoreOnly || TagData.empty() ) | |
482 return false; | |
483 | |
484 // localize the tag data | |
485 char* pTagData = (char*)TagData.data(); | |
486 const unsigned int tagDataLength = TagData.size(); | |
487 unsigned int numBytesParsed = 0; | |
488 | |
489 // if tag found, determine data byte-length, store data in readGroup, return success | |
490 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { | |
491 | |
492 // determine data byte-length | |
493 const char type = *(pTagData - 1); | |
494 int destinationLength = 0; | |
495 switch(type) { | |
496 | |
497 // 1 byte data | |
498 case 'A': | |
499 case 'c': | |
500 case 'C': | |
501 destinationLength = 1; | |
502 break; | |
503 | |
504 // 2 byte data | |
505 case 's': | |
506 case 'S': | |
507 destinationLength = 2; | |
508 break; | |
509 | |
510 // 4 byte data | |
511 case 'f': | |
512 case 'i': | |
513 case 'I': | |
514 destinationLength = 4; | |
515 break; | |
516 | |
517 // unsupported type (var-length strings) | |
518 case 'Z': | |
519 case 'H': | |
520 fprintf(stderr, "ERROR: Cannot store tag of type %c in integer destination\n", type); | |
521 return false; | |
522 | |
523 // unknown tag type | |
524 default: | |
525 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type); | |
526 return false; | |
527 } | |
528 | |
529 // store in destination | |
530 destination = 0.0; | |
531 memcpy(&destination, pTagData, destinationLength); | |
532 return true; | |
533 } | |
534 | |
535 // tag not found, return failure | |
536 return false; | |
537 } | |
538 | |
539 bool BamAlignment::GetTagType(const string& tag, char& type) const { | |
540 | |
541 // make sure tag data exists | |
542 if ( SupportData.HasCoreOnly || TagData.empty() ) | |
543 return false; | |
544 | |
545 // localize the tag data | |
546 char* pTagData = (char*)TagData.data(); | |
547 const unsigned int tagDataLength = TagData.size(); | |
548 unsigned int numBytesParsed = 0; | |
549 | |
550 // lookup tag | |
551 if ( FindTag(tag, pTagData, tagDataLength, numBytesParsed) ) { | |
552 | |
553 // retrieve tag type code | |
554 type = *(pTagData - 1); | |
555 | |
556 // validate that type is a proper BAM tag type | |
557 switch(type) { | |
558 case 'A': | |
559 case 'c': | |
560 case 'C': | |
561 case 's': | |
562 case 'S': | |
563 case 'f': | |
564 case 'i': | |
565 case 'I': | |
566 case 'Z': | |
567 case 'H': | |
568 return true; | |
569 | |
570 // unknown tag type | |
571 default: | |
572 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", type); | |
573 return false; | |
574 } | |
575 } | |
576 | |
577 // tag not found, return failure | |
578 return false; | |
579 } | |
580 | |
581 bool BamAlignment::RemoveTag(const string& tag) { | |
582 | |
583 // BamAlignments fetched using BamReader::GetNextAlignmentCore() are not allowed | |
584 // also, return false if no data present to remove | |
585 if ( SupportData.HasCoreOnly || TagData.empty() ) return false; | |
586 | |
587 // localize the tag data | |
588 char* pOriginalTagData = (char*)TagData.data(); | |
589 char* pTagData = pOriginalTagData; | |
590 const unsigned int originalTagDataLength = TagData.size(); | |
591 unsigned int newTagDataLength = 0; | |
592 unsigned int numBytesParsed = 0; | |
593 | |
594 // if tag found, store data in readGroup, return success | |
595 if ( FindTag(tag, pTagData, originalTagDataLength, numBytesParsed) ) { | |
596 | |
597 char newTagData[originalTagDataLength]; | |
598 | |
599 // copy original tag data up til desired tag | |
600 pTagData -= 3; | |
601 numBytesParsed -= 3; | |
602 const unsigned int beginningTagDataLength = numBytesParsed; | |
603 newTagDataLength += beginningTagDataLength; | |
604 memcpy(newTagData, pOriginalTagData, numBytesParsed); | |
605 | |
606 // skip to next tag (if tag for removal is last, return true) | |
607 const char* pTagStorageType = pTagData + 2; | |
608 pTagData += 3; | |
609 numBytesParsed += 3; | |
610 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return true; | |
611 | |
612 // copy everything from current tag (the next one after tag for removal) to end | |
613 const unsigned int skippedDataLength = (numBytesParsed - beginningTagDataLength); | |
614 const unsigned int endTagDataLength = originalTagDataLength - beginningTagDataLength - skippedDataLength; | |
615 memcpy(newTagData + beginningTagDataLength, pTagData, endTagDataLength ); | |
616 | |
617 // save new tag data | |
618 TagData.assign(newTagData, beginningTagDataLength + endTagDataLength); | |
619 return true; | |
620 } | |
621 | |
622 // tag not found, no removal - return failure | |
623 return false; | |
624 } | |
625 | |
626 bool BamAlignment::FindTag(const string& tag, | |
627 char* &pTagData, | |
628 const unsigned int& tagDataLength, | |
629 unsigned int& numBytesParsed) | |
630 { | |
631 | |
632 while ( numBytesParsed < tagDataLength ) { | |
633 | |
634 const char* pTagType = pTagData; | |
635 const char* pTagStorageType = pTagData + 2; | |
636 pTagData += 3; | |
637 numBytesParsed += 3; | |
638 | |
639 // check the current tag, return true on match | |
640 if ( strncmp(pTagType, tag.c_str(), 2) == 0 ) | |
641 return true; | |
642 | |
643 // get the storage class and find the next tag | |
644 if ( *pTagStorageType == '\0' ) return false; | |
645 if ( !SkipToNextTag(*pTagStorageType, pTagData, numBytesParsed) ) return false; | |
646 if ( *pTagData == '\0' ) return false; | |
647 } | |
648 | |
649 // checked all tags, none match | |
650 return false; | |
651 } | |
652 | |
653 bool BamAlignment::SkipToNextTag(const char storageType, char* &pTagData, unsigned int& numBytesParsed) { | |
654 | |
655 switch(storageType) { | |
656 | |
657 case 'A': | |
658 case 'c': | |
659 case 'C': | |
660 ++numBytesParsed; | |
661 ++pTagData; | |
662 break; | |
663 | |
664 case 's': | |
665 case 'S': | |
666 numBytesParsed += 2; | |
667 pTagData += 2; | |
668 break; | |
669 | |
670 case 'f': | |
671 case 'i': | |
672 case 'I': | |
673 numBytesParsed += 4; | |
674 pTagData += 4; | |
675 break; | |
676 | |
677 case 'Z': | |
678 case 'H': | |
679 while(*pTagData) { | |
680 ++numBytesParsed; | |
681 ++pTagData; | |
682 } | |
683 // increment for null-terminator | |
684 ++numBytesParsed; | |
685 ++pTagData; | |
686 break; | |
687 | |
688 default: | |
689 // error case | |
690 fprintf(stderr, "ERROR: Unknown tag storage class encountered: [%c]\n", storageType); | |
691 return false; | |
692 } | |
693 | |
694 // return success | |
695 return true; | |
696 } |