6
|
1 // ***************************************************************************
|
|
2 // BamToolsIndex.cpp (c) 2010 Derek Barnett
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 22 November 2010 (DB)
|
|
7 // ---------------------------------------------------------------------------
|
|
8 // Provides index operations for the BamTools index format (".bti")
|
|
9 // ***************************************************************************
|
|
10
|
|
11 #include <BamAlignment.h>
|
|
12 #include <BamReader.h>
|
|
13 #include <BGZF.h>
|
|
14 #include <BamToolsIndex_p.h>
|
|
15 using namespace BamTools;
|
|
16 using namespace BamTools::Internal;
|
|
17
|
|
18 #include <cstdio>
|
|
19 #include <cstdlib>
|
|
20 #include <algorithm>
|
|
21 #include <iostream>
|
|
22 #include <map>
|
|
23 using namespace std;
|
|
24
|
|
25 BamToolsIndex::BamToolsIndex(BgzfData* bgzf, BamReader* reader)
|
|
26 : BamIndex(bgzf, reader)
|
|
27 , m_blockSize(1000)
|
|
28 , m_dataBeginOffset(0)
|
|
29 , m_hasFullDataCache(false)
|
|
30 , m_inputVersion(0)
|
|
31 , m_outputVersion(BTI_1_2) // latest version - used for writing new index files
|
|
32 {
|
|
33 m_isBigEndian = BamTools::SystemIsBigEndian();
|
|
34 }
|
|
35
|
|
36 // dtor
|
|
37 BamToolsIndex::~BamToolsIndex(void) {
|
|
38 ClearAllData();
|
|
39 }
|
|
40
|
|
41 // creates index data (in-memory) from current reader data
|
|
42 bool BamToolsIndex::Build(void) {
|
|
43
|
|
44 // be sure reader & BGZF file are valid & open for reading
|
|
45 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
|
|
46 return false;
|
|
47
|
|
48 // move file pointer to beginning of alignments
|
|
49 if ( !m_reader->Rewind() ) return false;
|
|
50
|
|
51 // initialize index data structure with space for all references
|
|
52 const int numReferences = (int)m_references.size();
|
|
53 m_indexData.clear();
|
|
54 m_hasFullDataCache = false;
|
|
55 SetReferenceCount(numReferences);
|
|
56
|
|
57 // set up counters and markers
|
|
58 int32_t currentBlockCount = 0;
|
|
59 int64_t currentAlignmentOffset = m_BGZF->Tell();
|
|
60 int32_t blockRefId = 0;
|
|
61 int32_t blockMaxEndPosition = 0;
|
|
62 int64_t blockStartOffset = currentAlignmentOffset;
|
|
63 int32_t blockStartPosition = -1;
|
|
64
|
|
65 // plow through alignments, storing index entries
|
|
66 BamAlignment al;
|
|
67 while ( m_reader->GetNextAlignmentCore(al) ) {
|
|
68
|
|
69 // if block contains data (not the first time through) AND alignment is on a new reference
|
|
70 if ( currentBlockCount > 0 && al.RefID != blockRefId ) {
|
|
71
|
|
72 // store previous data
|
|
73 BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
|
|
74 SaveOffsetEntry(blockRefId, entry);
|
|
75
|
|
76 // intialize new block for current alignment's reference
|
|
77 currentBlockCount = 0;
|
|
78 blockMaxEndPosition = al.GetEndPosition();
|
|
79 blockStartOffset = currentAlignmentOffset;
|
|
80 }
|
|
81
|
|
82 // if beginning of block, save first alignment's refID & position
|
|
83 if ( currentBlockCount == 0 ) {
|
|
84 blockRefId = al.RefID;
|
|
85 blockStartPosition = al.Position;
|
|
86 }
|
|
87
|
|
88 // increment block counter
|
|
89 ++currentBlockCount;
|
|
90
|
|
91 // check end position
|
|
92 int32_t alignmentEndPosition = al.GetEndPosition();
|
|
93 if ( alignmentEndPosition > blockMaxEndPosition )
|
|
94 blockMaxEndPosition = alignmentEndPosition;
|
|
95
|
|
96 // if block is full, get offset for next block, reset currentBlockCount
|
|
97 if ( currentBlockCount == m_blockSize ) {
|
|
98 BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
|
|
99 SaveOffsetEntry(blockRefId, entry);
|
|
100 blockStartOffset = m_BGZF->Tell();
|
|
101 currentBlockCount = 0;
|
|
102 }
|
|
103
|
|
104 // not the best name, but for the next iteration, this value will be the offset of the *current* alignment
|
|
105 // necessary because we won't know if this next alignment is on a new reference until we actually read it
|
|
106 currentAlignmentOffset = m_BGZF->Tell();
|
|
107 }
|
|
108
|
|
109 // store final block with data
|
|
110 BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
|
|
111 SaveOffsetEntry(blockRefId, entry);
|
|
112
|
|
113 // set flag
|
|
114 m_hasFullDataCache = true;
|
|
115
|
|
116 // return success/failure of rewind
|
|
117 return m_reader->Rewind();
|
|
118 }
|
|
119
|
|
120 // check index file magic number, return true if OK
|
|
121 bool BamToolsIndex::CheckMagicNumber(void) {
|
|
122
|
|
123 // see if index is valid BAM index
|
|
124 char magic[4];
|
|
125 size_t elementsRead = fread(magic, 1, 4, m_indexStream);
|
|
126 if ( elementsRead != 4 ) return false;
|
|
127 if ( strncmp(magic, "BTI\1", 4) != 0 ) {
|
|
128 fprintf(stderr, "Problem with index file - invalid format.\n");
|
|
129 return false;
|
|
130 }
|
|
131
|
|
132 // otherwise ok
|
|
133 return true;
|
|
134 }
|
|
135
|
|
136 // check index file version, return true if OK
|
|
137 bool BamToolsIndex::CheckVersion(void) {
|
|
138
|
|
139 // read version from file
|
|
140 size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, m_indexStream);
|
|
141 if ( elementsRead != 1 ) return false;
|
|
142 if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
|
|
143
|
|
144 // if version is negative, or zero
|
|
145 if ( m_inputVersion <= 0 ) {
|
|
146 fprintf(stderr, "Problem with index file - invalid version.\n");
|
|
147 return false;
|
|
148 }
|
|
149
|
|
150 // if version is newer than can be supported by this version of bamtools
|
|
151 else if ( m_inputVersion > m_outputVersion ) {
|
|
152 fprintf(stderr, "Problem with index file - attempting to use an outdated version of BamTools with a newer index file.\n");
|
|
153 fprintf(stderr, "Please update BamTools to a more recent version to support this index file.\n");
|
|
154 return false;
|
|
155 }
|
|
156
|
|
157 // ------------------------------------------------------------------
|
|
158 // check for deprecated, unsupported versions
|
|
159 // (typically whose format did not accomodate a particular bug fix)
|
|
160
|
|
161 else if ( (Version)m_inputVersion == BTI_1_0 ) {
|
|
162 fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to accessing data near reference ends.\n");
|
|
163 fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
|
|
164 return false;
|
|
165 }
|
|
166
|
|
167 else if ( (Version)m_inputVersion == BTI_1_1 ) {
|
|
168 fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to handling empty references.\n");
|
|
169 fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
|
|
170 return false;
|
|
171 }
|
|
172
|
|
173 // otherwise ok
|
|
174 else return true;
|
|
175 }
|
|
176
|
|
177 // clear all current index offset data in memory
|
|
178 void BamToolsIndex::ClearAllData(void) {
|
|
179 BamToolsIndexData::const_iterator indexIter = m_indexData.begin();
|
|
180 BamToolsIndexData::const_iterator indexEnd = m_indexData.end();
|
|
181 for ( ; indexIter != indexEnd; ++indexIter ) {
|
|
182 const int& refId = (*indexIter).first;
|
|
183 ClearReferenceOffsets(refId);
|
|
184 }
|
|
185 }
|
|
186
|
|
187 // clear all index offset data for desired reference
|
|
188 void BamToolsIndex::ClearReferenceOffsets(const int& refId) {
|
|
189 if ( m_indexData.find(refId) == m_indexData.end() ) return;
|
|
190 vector<BamToolsIndexEntry>& offsets = m_indexData[refId].Offsets;
|
|
191 offsets.clear();
|
|
192 m_hasFullDataCache = false;
|
|
193 }
|
|
194
|
|
195 // return file position after header metadata
|
|
196 const off_t BamToolsIndex::DataBeginOffset(void) const {
|
|
197 return m_dataBeginOffset;
|
|
198 }
|
|
199
|
|
200 // calculate BAM file offset for desired region
|
|
201 // return true if no error (*NOT* equivalent to "has alignments or valid offset")
|
|
202 // check @hasAlignmentsInRegion to determine this status
|
|
203 // @region - target region
|
|
204 // @offset - resulting seek target
|
|
205 // @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status
|
|
206 // N.B. - ignores isRightBoundSpecified
|
|
207 bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
|
|
208
|
|
209 // return false if leftBound refID is not found in index data
|
|
210 BamToolsIndexData::const_iterator indexIter = m_indexData.find(region.LeftRefID);
|
|
211 if ( indexIter == m_indexData.end()) return false;
|
|
212
|
|
213 // load index data for region if not already cached
|
|
214 if ( !IsDataLoaded(region.LeftRefID) ) {
|
|
215 bool loadedOk = true;
|
|
216 loadedOk &= SkipToReference(region.LeftRefID);
|
|
217 loadedOk &= LoadReference(region.LeftRefID);
|
|
218 if ( !loadedOk ) return false;
|
|
219 }
|
|
220
|
|
221 // localize index data for this reference (& sanity check that data actually exists)
|
|
222 indexIter = m_indexData.find(region.LeftRefID);
|
|
223 if ( indexIter == m_indexData.end()) return false;
|
|
224 const vector<BamToolsIndexEntry>& referenceOffsets = (*indexIter).second.Offsets;
|
|
225 if ( referenceOffsets.empty() ) return false;
|
|
226
|
|
227 // -------------------------------------------------------
|
|
228 // calculate nearest index to jump to
|
|
229
|
|
230 // save first offset
|
|
231 offset = (*referenceOffsets.begin()).StartOffset;
|
|
232
|
|
233 // iterate over offsets entries on this reference
|
|
234 vector<BamToolsIndexEntry>::const_iterator offsetIter = referenceOffsets.begin();
|
|
235 vector<BamToolsIndexEntry>::const_iterator offsetEnd = referenceOffsets.end();
|
|
236 for ( ; offsetIter != offsetEnd; ++offsetIter ) {
|
|
237 const BamToolsIndexEntry& entry = (*offsetIter);
|
|
238 // break if alignment 'entry' overlaps region
|
|
239 if ( entry.MaxEndPosition >= region.LeftPosition ) break;
|
|
240 offset = (*offsetIter).StartOffset;
|
|
241 }
|
|
242
|
|
243 // set flag based on whether an index entry was found for this region
|
|
244 *hasAlignmentsInRegion = ( offsetIter != offsetEnd );
|
|
245
|
|
246 // if cache mode set to none, dump the data we just loaded
|
|
247 if (m_cacheMode == BamIndex::NoIndexCaching )
|
|
248 ClearReferenceOffsets(region.LeftRefID);
|
|
249
|
|
250 // return success
|
|
251 return true;
|
|
252 }
|
|
253
|
|
254 // returns whether reference has alignments or no
|
|
255 bool BamToolsIndex::HasAlignments(const int& refId) const {
|
|
256
|
|
257 BamToolsIndexData::const_iterator indexIter = m_indexData.find(refId);
|
|
258 if ( indexIter == m_indexData.end()) return false;
|
|
259 const BamToolsReferenceEntry& refEntry = (*indexIter).second;
|
|
260 return refEntry.HasAlignments;
|
|
261 }
|
|
262
|
|
263 // return true if all index data is cached
|
|
264 bool BamToolsIndex::HasFullDataCache(void) const {
|
|
265 return m_hasFullDataCache;
|
|
266 }
|
|
267
|
|
268 // returns true if index cache has data for desired reference
|
|
269 bool BamToolsIndex::IsDataLoaded(const int& refId) const {
|
|
270
|
|
271 BamToolsIndexData::const_iterator indexIter = m_indexData.find(refId);
|
|
272 if ( indexIter == m_indexData.end()) return false;
|
|
273 const BamToolsReferenceEntry& refEntry = (*indexIter).second;
|
|
274
|
|
275 if ( !refEntry.HasAlignments ) return true; // no data period
|
|
276
|
|
277 // return whether offsets list contains data
|
|
278 return !refEntry.Offsets.empty();
|
|
279 }
|
|
280
|
|
281 // attempts to use index to jump to region; returns success/fail
|
|
282 bool BamToolsIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
|
|
283
|
|
284 // clear flag
|
|
285 *hasAlignmentsInRegion = false;
|
|
286
|
|
287 // check valid BamReader state
|
|
288 if ( m_reader == 0 || m_BGZF == 0 || !m_reader->IsOpen() ) {
|
|
289 fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n");
|
|
290 return false;
|
|
291 }
|
|
292
|
|
293 // make sure left-bound position is valid
|
|
294 if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength )
|
|
295 return false;
|
|
296
|
|
297 // calculate nearest offset to jump to
|
|
298 int64_t offset;
|
|
299 if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
|
|
300 fprintf(stderr, "ERROR: Could not jump - unable to calculate offset for specified region.\n");
|
|
301 return false;
|
|
302 }
|
|
303
|
|
304 // return success/failure of seek
|
|
305 return m_BGZF->Seek(offset);
|
|
306 }
|
|
307
|
|
308 // clears index data from all references except the first
|
|
309 void BamToolsIndex::KeepOnlyFirstReferenceOffsets(void) {
|
|
310 BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
|
|
311 KeepOnlyReferenceOffsets( (*indexBegin).first );
|
|
312 }
|
|
313
|
|
314 // clears index data from all references except the one specified
|
|
315 void BamToolsIndex::KeepOnlyReferenceOffsets(const int& refId) {
|
|
316 BamToolsIndexData::iterator mapIter = m_indexData.begin();
|
|
317 BamToolsIndexData::iterator mapEnd = m_indexData.end();
|
|
318 for ( ; mapIter != mapEnd; ++mapIter ) {
|
|
319 const int entryRefId = (*mapIter).first;
|
|
320 if ( entryRefId != refId )
|
|
321 ClearReferenceOffsets(entryRefId);
|
|
322 }
|
|
323 }
|
|
324
|
|
325 // load index data for all references, return true if loaded OK
|
|
326 bool BamToolsIndex::LoadAllReferences(bool saveData) {
|
|
327
|
|
328 // skip if data already loaded
|
|
329 if ( m_hasFullDataCache ) return true;
|
|
330
|
|
331 // read in number of references
|
|
332 int32_t numReferences;
|
|
333 if ( !LoadReferenceCount(numReferences) ) return false;
|
|
334 //SetReferenceCount(numReferences);
|
|
335
|
|
336 // iterate over reference entries
|
|
337 bool loadedOk = true;
|
|
338 for ( int i = 0; i < numReferences; ++i )
|
|
339 loadedOk &= LoadReference(i, saveData);
|
|
340
|
|
341 // set flag
|
|
342 if ( loadedOk && saveData )
|
|
343 m_hasFullDataCache = true;
|
|
344
|
|
345 // return success/failure of load
|
|
346 return loadedOk;
|
|
347 }
|
|
348
|
|
349 // load header data from index file, return true if loaded OK
|
|
350 bool BamToolsIndex::LoadHeader(void) {
|
|
351
|
|
352 // check magic number
|
|
353 if ( !CheckMagicNumber() ) return false;
|
|
354
|
|
355 // check BTI version
|
|
356 if ( !CheckVersion() ) return false;
|
|
357
|
|
358 // read in block size
|
|
359 size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, m_indexStream);
|
|
360 if ( elementsRead != 1 ) return false;
|
|
361 if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
|
|
362
|
|
363 // store offset of beginning of data
|
|
364 m_dataBeginOffset = ftell64(m_indexStream);
|
|
365
|
|
366 // return success/failure of load
|
|
367 return (elementsRead == 1);
|
|
368 }
|
|
369
|
|
370 // load a single index entry from file, return true if loaded OK
|
|
371 // @saveData - save data in memory if true, just read & discard if false
|
|
372 bool BamToolsIndex::LoadIndexEntry(const int& refId, bool saveData) {
|
|
373
|
|
374 // read in index entry data members
|
|
375 size_t elementsRead = 0;
|
|
376 BamToolsIndexEntry entry;
|
|
377 elementsRead += fread(&entry.MaxEndPosition, sizeof(entry.MaxEndPosition), 1, m_indexStream);
|
|
378 elementsRead += fread(&entry.StartOffset, sizeof(entry.StartOffset), 1, m_indexStream);
|
|
379 elementsRead += fread(&entry.StartPosition, sizeof(entry.StartPosition), 1, m_indexStream);
|
|
380 if ( elementsRead != 3 ) {
|
|
381 cerr << "Error reading index entry. Expected 3 elements, read in: " << elementsRead << endl;
|
|
382 return false;
|
|
383 }
|
|
384
|
|
385 // swap endian-ness if necessary
|
|
386 if ( m_isBigEndian ) {
|
|
387 SwapEndian_32(entry.MaxEndPosition);
|
|
388 SwapEndian_64(entry.StartOffset);
|
|
389 SwapEndian_32(entry.StartPosition);
|
|
390 }
|
|
391
|
|
392 // save data
|
|
393 if ( saveData )
|
|
394 SaveOffsetEntry(refId, entry);
|
|
395
|
|
396 // return success/failure of load
|
|
397 return true;
|
|
398 }
|
|
399
|
|
400 // load a single reference from file, return true if loaded OK
|
|
401 // @saveData - save data in memory if true, just read & discard if false
|
|
402 bool BamToolsIndex::LoadFirstReference(bool saveData) {
|
|
403 BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
|
|
404 return LoadReference( (*indexBegin).first, saveData );
|
|
405 }
|
|
406
|
|
407 // load a single reference from file, return true if loaded OK
|
|
408 // @saveData - save data in memory if true, just read & discard if false
|
|
409 bool BamToolsIndex::LoadReference(const int& refId, bool saveData) {
|
|
410
|
|
411 // read in number of offsets for this reference
|
|
412 uint32_t numOffsets;
|
|
413 size_t elementsRead = fread(&numOffsets, sizeof(numOffsets), 1, m_indexStream);
|
|
414 if ( elementsRead != 1 ) return false;
|
|
415 if ( m_isBigEndian ) SwapEndian_32(numOffsets);
|
|
416
|
|
417 // initialize offsets container for this reference
|
|
418 SetOffsetCount(refId, (int)numOffsets);
|
|
419
|
|
420 // iterate over offset entries
|
|
421 for ( unsigned int j = 0; j < numOffsets; ++j )
|
|
422 LoadIndexEntry(refId, saveData);
|
|
423
|
|
424 // return success/failure of load
|
|
425 return true;
|
|
426 }
|
|
427
|
|
428 // loads number of references, return true if loaded OK
|
|
429 bool BamToolsIndex::LoadReferenceCount(int& numReferences) {
|
|
430
|
|
431 size_t elementsRead = 0;
|
|
432
|
|
433 // read reference count
|
|
434 elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
|
|
435 if ( m_isBigEndian ) SwapEndian_32(numReferences);
|
|
436
|
|
437 // return success/failure of load
|
|
438 return ( elementsRead == 1 );
|
|
439 }
|
|
440
|
|
441 // saves an index offset entry in memory
|
|
442 void BamToolsIndex::SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry) {
|
|
443 BamToolsReferenceEntry& refEntry = m_indexData[refId];
|
|
444 refEntry.HasAlignments = true;
|
|
445 refEntry.Offsets.push_back(entry);
|
|
446 }
|
|
447
|
|
448 // pre-allocates size for offset vector
|
|
449 void BamToolsIndex::SetOffsetCount(const int& refId, const int& offsetCount) {
|
|
450 BamToolsReferenceEntry& refEntry = m_indexData[refId];
|
|
451 refEntry.Offsets.reserve(offsetCount);
|
|
452 refEntry.HasAlignments = ( offsetCount > 0);
|
|
453 }
|
|
454
|
|
455 // initializes index data structure to hold @count references
|
|
456 void BamToolsIndex::SetReferenceCount(const int& count) {
|
|
457 for ( int i = 0; i < count; ++i )
|
|
458 m_indexData[i].HasAlignments = false;
|
|
459 }
|
|
460
|
|
461 // position file pointer to first reference begin, return true if skipped OK
|
|
462 bool BamToolsIndex::SkipToFirstReference(void) {
|
|
463 BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
|
|
464 return SkipToReference( (*indexBegin).first );
|
|
465 }
|
|
466
|
|
467 // position file pointer to desired reference begin, return true if skipped OK
|
|
468 bool BamToolsIndex::SkipToReference(const int& refId) {
|
|
469
|
|
470 // attempt rewind
|
|
471 if ( !Rewind() ) return false;
|
|
472
|
|
473 // read in number of references
|
|
474 int32_t numReferences;
|
|
475 size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
|
|
476 if ( elementsRead != 1 ) return false;
|
|
477 if ( m_isBigEndian ) SwapEndian_32(numReferences);
|
|
478
|
|
479 // iterate over reference entries
|
|
480 bool skippedOk = true;
|
|
481 int currentRefId = 0;
|
|
482 while (currentRefId != refId) {
|
|
483 skippedOk &= LoadReference(currentRefId, false);
|
|
484 ++currentRefId;
|
|
485 }
|
|
486
|
|
487 // return success/failure of skip
|
|
488 return skippedOk;
|
|
489 }
|
|
490
|
|
491 // write header to new index file
|
|
492 bool BamToolsIndex::WriteHeader(void) {
|
|
493
|
|
494 size_t elementsWritten = 0;
|
|
495
|
|
496 // write BTI index format 'magic number'
|
|
497 elementsWritten += fwrite("BTI\1", 1, 4, m_indexStream);
|
|
498
|
|
499 // write BTI index format version
|
|
500 int32_t currentVersion = (int32_t)m_outputVersion;
|
|
501 if ( m_isBigEndian ) SwapEndian_32(currentVersion);
|
|
502 elementsWritten += fwrite(¤tVersion, sizeof(currentVersion), 1, m_indexStream);
|
|
503
|
|
504 // write block size
|
|
505 int32_t blockSize = m_blockSize;
|
|
506 if ( m_isBigEndian ) SwapEndian_32(blockSize);
|
|
507 elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, m_indexStream);
|
|
508
|
|
509 // store offset of beginning of data
|
|
510 m_dataBeginOffset = ftell64(m_indexStream);
|
|
511
|
|
512 // return success/failure of write
|
|
513 return ( elementsWritten == 6 );
|
|
514 }
|
|
515
|
|
516 // write index data for all references to new index file
|
|
517 bool BamToolsIndex::WriteAllReferences(void) {
|
|
518
|
|
519 size_t elementsWritten = 0;
|
|
520
|
|
521 // write number of references
|
|
522 int32_t numReferences = (int32_t)m_indexData.size();
|
|
523 if ( m_isBigEndian ) SwapEndian_32(numReferences);
|
|
524 elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
|
|
525
|
|
526 // iterate through references in index
|
|
527 bool refOk = true;
|
|
528 BamToolsIndexData::const_iterator refIter = m_indexData.begin();
|
|
529 BamToolsIndexData::const_iterator refEnd = m_indexData.end();
|
|
530 for ( ; refIter != refEnd; ++refIter )
|
|
531 refOk &= WriteReferenceEntry( (*refIter).second );
|
|
532
|
|
533 return ( (elementsWritten == 1) && refOk );
|
|
534 }
|
|
535
|
|
536 // write current reference index data to new index file
|
|
537 bool BamToolsIndex::WriteReferenceEntry(const BamToolsReferenceEntry& refEntry) {
|
|
538
|
|
539 size_t elementsWritten = 0;
|
|
540
|
|
541 // write number of offsets listed for this reference
|
|
542 uint32_t numOffsets = refEntry.Offsets.size();
|
|
543 if ( m_isBigEndian ) SwapEndian_32(numOffsets);
|
|
544 elementsWritten += fwrite(&numOffsets, sizeof(numOffsets), 1, m_indexStream);
|
|
545
|
|
546 // iterate over offset entries
|
|
547 bool entriesOk = true;
|
|
548 vector<BamToolsIndexEntry>::const_iterator offsetIter = refEntry.Offsets.begin();
|
|
549 vector<BamToolsIndexEntry>::const_iterator offsetEnd = refEntry.Offsets.end();
|
|
550 for ( ; offsetIter != offsetEnd; ++offsetIter )
|
|
551 entriesOk &= WriteIndexEntry( (*offsetIter) );
|
|
552
|
|
553 return ( (elementsWritten == 1) && entriesOk );
|
|
554 }
|
|
555
|
|
556 // write current index offset entry to new index file
|
|
557 bool BamToolsIndex::WriteIndexEntry(const BamToolsIndexEntry& entry) {
|
|
558
|
|
559 // copy entry data
|
|
560 int32_t maxEndPosition = entry.MaxEndPosition;
|
|
561 int64_t startOffset = entry.StartOffset;
|
|
562 int32_t startPosition = entry.StartPosition;
|
|
563
|
|
564 // swap endian-ness if necessary
|
|
565 if ( m_isBigEndian ) {
|
|
566 SwapEndian_32(maxEndPosition);
|
|
567 SwapEndian_64(startOffset);
|
|
568 SwapEndian_32(startPosition);
|
|
569 }
|
|
570
|
|
571 // write the reference index entry
|
|
572 size_t elementsWritten = 0;
|
|
573 elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, m_indexStream);
|
|
574 elementsWritten += fwrite(&startOffset, sizeof(startOffset), 1, m_indexStream);
|
|
575 elementsWritten += fwrite(&startPosition, sizeof(startPosition), 1, m_indexStream);
|
|
576 return ( elementsWritten == 3 );
|
|
577 }
|