annotate spp/src/BamToolsIndex_p.cpp @ 6:ce08b0efa3fd draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:11:40 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
1 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
2 // BamToolsIndex.cpp (c) 2010 Derek Barnett
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
3 // Marth Lab, Department of Biology, Boston College
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
4 // All rights reserved.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
5 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
6 // Last modified: 22 November 2010 (DB)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
7 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
8 // Provides index operations for the BamTools index format (".bti")
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
9 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
10
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
11 #include <BamAlignment.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
12 #include <BamReader.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
13 #include <BGZF.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
14 #include <BamToolsIndex_p.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
15 using namespace BamTools;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
16 using namespace BamTools::Internal;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
17
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
18 #include <cstdio>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
19 #include <cstdlib>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
20 #include <algorithm>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
21 #include <iostream>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
22 #include <map>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
23 using namespace std;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
24
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
25 BamToolsIndex::BamToolsIndex(BgzfData* bgzf, BamReader* reader)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
26 : BamIndex(bgzf, reader)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
27 , m_blockSize(1000)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
28 , m_dataBeginOffset(0)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
29 , m_hasFullDataCache(false)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
30 , m_inputVersion(0)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
31 , m_outputVersion(BTI_1_2) // latest version - used for writing new index files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
32 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
33 m_isBigEndian = BamTools::SystemIsBigEndian();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
34 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
35
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
36 // dtor
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
37 BamToolsIndex::~BamToolsIndex(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
38 ClearAllData();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
39 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
40
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
41 // creates index data (in-memory) from current reader data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
42 bool BamToolsIndex::Build(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
43
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
44 // be sure reader & BGZF file are valid & open for reading
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
45 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
46 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
47
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
48 // move file pointer to beginning of alignments
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
49 if ( !m_reader->Rewind() ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
50
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
51 // initialize index data structure with space for all references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
52 const int numReferences = (int)m_references.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
53 m_indexData.clear();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
54 m_hasFullDataCache = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
55 SetReferenceCount(numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
56
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
57 // set up counters and markers
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
58 int32_t currentBlockCount = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
59 int64_t currentAlignmentOffset = m_BGZF->Tell();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
60 int32_t blockRefId = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
61 int32_t blockMaxEndPosition = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
62 int64_t blockStartOffset = currentAlignmentOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
63 int32_t blockStartPosition = -1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
64
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
65 // plow through alignments, storing index entries
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
66 BamAlignment al;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
67 while ( m_reader->GetNextAlignmentCore(al) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
68
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
69 // if block contains data (not the first time through) AND alignment is on a new reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
70 if ( currentBlockCount > 0 && al.RefID != blockRefId ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
71
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
72 // store previous data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
73 BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
74 SaveOffsetEntry(blockRefId, entry);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
75
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
76 // intialize new block for current alignment's reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
77 currentBlockCount = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
78 blockMaxEndPosition = al.GetEndPosition();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
79 blockStartOffset = currentAlignmentOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
80 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
81
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
82 // if beginning of block, save first alignment's refID & position
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
83 if ( currentBlockCount == 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
84 blockRefId = al.RefID;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
85 blockStartPosition = al.Position;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
86 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
87
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
88 // increment block counter
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
89 ++currentBlockCount;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
90
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
91 // check end position
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
92 int32_t alignmentEndPosition = al.GetEndPosition();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
93 if ( alignmentEndPosition > blockMaxEndPosition )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
94 blockMaxEndPosition = alignmentEndPosition;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
95
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
96 // if block is full, get offset for next block, reset currentBlockCount
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
97 if ( currentBlockCount == m_blockSize ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
98 BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
99 SaveOffsetEntry(blockRefId, entry);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
100 blockStartOffset = m_BGZF->Tell();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
101 currentBlockCount = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
102 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
103
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
104 // not the best name, but for the next iteration, this value will be the offset of the *current* alignment
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
105 // necessary because we won't know if this next alignment is on a new reference until we actually read it
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
106 currentAlignmentOffset = m_BGZF->Tell();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
107 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
108
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
109 // store final block with data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
110 BamToolsIndexEntry entry(blockMaxEndPosition, blockStartOffset, blockStartPosition);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
111 SaveOffsetEntry(blockRefId, entry);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
112
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
113 // set flag
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
114 m_hasFullDataCache = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
115
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
116 // return success/failure of rewind
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
117 return m_reader->Rewind();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
118 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
119
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
120 // check index file magic number, return true if OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
121 bool BamToolsIndex::CheckMagicNumber(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
122
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
123 // see if index is valid BAM index
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
124 char magic[4];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
125 size_t elementsRead = fread(magic, 1, 4, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
126 if ( elementsRead != 4 ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
127 if ( strncmp(magic, "BTI\1", 4) != 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
128 fprintf(stderr, "Problem with index file - invalid format.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
129 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
130 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
131
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
132 // otherwise ok
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
133 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
134 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
135
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
136 // check index file version, return true if OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
137 bool BamToolsIndex::CheckVersion(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
138
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
139 // read version from file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
140 size_t elementsRead = fread(&m_inputVersion, sizeof(m_inputVersion), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
141 if ( elementsRead != 1 ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
142 if ( m_isBigEndian ) SwapEndian_32(m_inputVersion);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
143
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
144 // if version is negative, or zero
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
145 if ( m_inputVersion <= 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
146 fprintf(stderr, "Problem with index file - invalid version.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
147 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
148 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
149
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
150 // if version is newer than can be supported by this version of bamtools
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
151 else if ( m_inputVersion > m_outputVersion ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
152 fprintf(stderr, "Problem with index file - attempting to use an outdated version of BamTools with a newer index file.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
153 fprintf(stderr, "Please update BamTools to a more recent version to support this index file.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
154 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
155 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
156
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
157 // ------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
158 // check for deprecated, unsupported versions
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
159 // (typically whose format did not accomodate a particular bug fix)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
160
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
161 else if ( (Version)m_inputVersion == BTI_1_0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
162 fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to accessing data near reference ends.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
163 fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
164 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
165 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
166
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
167 else if ( (Version)m_inputVersion == BTI_1_1 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
168 fprintf(stderr, "\nProblem with index file - this version of the index contains a bug related to handling empty references.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
169 fprintf(stderr, "\nPlease run \'bamtools index -bti -in yourData.bam\' to generate an up-to-date BamToolsIndex.\n\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
170 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
171 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
172
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
173 // otherwise ok
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
174 else return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
175 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
176
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
177 // clear all current index offset data in memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
178 void BamToolsIndex::ClearAllData(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
179 BamToolsIndexData::const_iterator indexIter = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
180 BamToolsIndexData::const_iterator indexEnd = m_indexData.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
181 for ( ; indexIter != indexEnd; ++indexIter ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
182 const int& refId = (*indexIter).first;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
183 ClearReferenceOffsets(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
184 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
185 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
186
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
187 // clear all index offset data for desired reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
188 void BamToolsIndex::ClearReferenceOffsets(const int& refId) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
189 if ( m_indexData.find(refId) == m_indexData.end() ) return;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
190 vector<BamToolsIndexEntry>& offsets = m_indexData[refId].Offsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
191 offsets.clear();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
192 m_hasFullDataCache = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
193 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
194
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
195 // return file position after header metadata
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
196 const off_t BamToolsIndex::DataBeginOffset(void) const {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
197 return m_dataBeginOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
198 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
199
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
200 // calculate BAM file offset for desired region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
201 // return true if no error (*NOT* equivalent to "has alignments or valid offset")
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
202 // check @hasAlignmentsInRegion to determine this status
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
203 // @region - target region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
204 // @offset - resulting seek target
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
205 // @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
206 // N.B. - ignores isRightBoundSpecified
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
207 bool BamToolsIndex::GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
208
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
209 // return false if leftBound refID is not found in index data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
210 BamToolsIndexData::const_iterator indexIter = m_indexData.find(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
211 if ( indexIter == m_indexData.end()) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
212
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
213 // load index data for region if not already cached
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
214 if ( !IsDataLoaded(region.LeftRefID) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
215 bool loadedOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
216 loadedOk &= SkipToReference(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
217 loadedOk &= LoadReference(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
218 if ( !loadedOk ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
219 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
220
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
221 // localize index data for this reference (& sanity check that data actually exists)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
222 indexIter = m_indexData.find(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
223 if ( indexIter == m_indexData.end()) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
224 const vector<BamToolsIndexEntry>& referenceOffsets = (*indexIter).second.Offsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
225 if ( referenceOffsets.empty() ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
226
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
227 // -------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
228 // calculate nearest index to jump to
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
229
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
230 // save first offset
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
231 offset = (*referenceOffsets.begin()).StartOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
232
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
233 // iterate over offsets entries on this reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
234 vector<BamToolsIndexEntry>::const_iterator offsetIter = referenceOffsets.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
235 vector<BamToolsIndexEntry>::const_iterator offsetEnd = referenceOffsets.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
236 for ( ; offsetIter != offsetEnd; ++offsetIter ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
237 const BamToolsIndexEntry& entry = (*offsetIter);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
238 // break if alignment 'entry' overlaps region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
239 if ( entry.MaxEndPosition >= region.LeftPosition ) break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
240 offset = (*offsetIter).StartOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
241 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
242
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
243 // set flag based on whether an index entry was found for this region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
244 *hasAlignmentsInRegion = ( offsetIter != offsetEnd );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
245
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
246 // if cache mode set to none, dump the data we just loaded
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
247 if (m_cacheMode == BamIndex::NoIndexCaching )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
248 ClearReferenceOffsets(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
249
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
250 // return success
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
251 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
252 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
253
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
254 // returns whether reference has alignments or no
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
255 bool BamToolsIndex::HasAlignments(const int& refId) const {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
256
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
257 BamToolsIndexData::const_iterator indexIter = m_indexData.find(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
258 if ( indexIter == m_indexData.end()) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
259 const BamToolsReferenceEntry& refEntry = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
260 return refEntry.HasAlignments;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
261 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
262
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
263 // return true if all index data is cached
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
264 bool BamToolsIndex::HasFullDataCache(void) const {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
265 return m_hasFullDataCache;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
266 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
267
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
268 // returns true if index cache has data for desired reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
269 bool BamToolsIndex::IsDataLoaded(const int& refId) const {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
270
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
271 BamToolsIndexData::const_iterator indexIter = m_indexData.find(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
272 if ( indexIter == m_indexData.end()) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
273 const BamToolsReferenceEntry& refEntry = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
274
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
275 if ( !refEntry.HasAlignments ) return true; // no data period
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
276
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
277 // return whether offsets list contains data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
278 return !refEntry.Offsets.empty();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
279 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
280
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
281 // attempts to use index to jump to region; returns success/fail
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
282 bool BamToolsIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
283
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
284 // clear flag
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
285 *hasAlignmentsInRegion = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
286
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
287 // check valid BamReader state
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
288 if ( m_reader == 0 || m_BGZF == 0 || !m_reader->IsOpen() ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
289 fprintf(stderr, "ERROR: Could not jump: invalid BamReader state.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
290 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
291 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
292
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
293 // make sure left-bound position is valid
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
294 if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
295 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
296
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
297 // calculate nearest offset to jump to
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
298 int64_t offset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
299 if ( !GetOffset(region, offset, hasAlignmentsInRegion) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
300 fprintf(stderr, "ERROR: Could not jump - unable to calculate offset for specified region.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
301 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
302 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
303
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
304 // return success/failure of seek
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
305 return m_BGZF->Seek(offset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
306 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
307
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
308 // clears index data from all references except the first
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
309 void BamToolsIndex::KeepOnlyFirstReferenceOffsets(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
310 BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
311 KeepOnlyReferenceOffsets( (*indexBegin).first );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
312 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
313
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
314 // clears index data from all references except the one specified
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
315 void BamToolsIndex::KeepOnlyReferenceOffsets(const int& refId) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
316 BamToolsIndexData::iterator mapIter = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
317 BamToolsIndexData::iterator mapEnd = m_indexData.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
318 for ( ; mapIter != mapEnd; ++mapIter ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
319 const int entryRefId = (*mapIter).first;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
320 if ( entryRefId != refId )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
321 ClearReferenceOffsets(entryRefId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
322 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
323 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
324
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
325 // load index data for all references, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
326 bool BamToolsIndex::LoadAllReferences(bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
327
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
328 // skip if data already loaded
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
329 if ( m_hasFullDataCache ) return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
330
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
331 // read in number of references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
332 int32_t numReferences;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
333 if ( !LoadReferenceCount(numReferences) ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
334 //SetReferenceCount(numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
335
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
336 // iterate over reference entries
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
337 bool loadedOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
338 for ( int i = 0; i < numReferences; ++i )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
339 loadedOk &= LoadReference(i, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
340
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
341 // set flag
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
342 if ( loadedOk && saveData )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
343 m_hasFullDataCache = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
344
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
345 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
346 return loadedOk;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
347 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
348
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
349 // load header data from index file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
350 bool BamToolsIndex::LoadHeader(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
351
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
352 // check magic number
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
353 if ( !CheckMagicNumber() ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
354
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
355 // check BTI version
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
356 if ( !CheckVersion() ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
357
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
358 // read in block size
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
359 size_t elementsRead = fread(&m_blockSize, sizeof(m_blockSize), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
360 if ( elementsRead != 1 ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
361 if ( m_isBigEndian ) SwapEndian_32(m_blockSize);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
362
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
363 // store offset of beginning of data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
364 m_dataBeginOffset = ftell64(m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
365
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
366 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
367 return (elementsRead == 1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
368 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
369
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
370 // load a single index entry from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
371 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
372 bool BamToolsIndex::LoadIndexEntry(const int& refId, bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
373
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
374 // read in index entry data members
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
375 size_t elementsRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
376 BamToolsIndexEntry entry;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
377 elementsRead += fread(&entry.MaxEndPosition, sizeof(entry.MaxEndPosition), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
378 elementsRead += fread(&entry.StartOffset, sizeof(entry.StartOffset), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
379 elementsRead += fread(&entry.StartPosition, sizeof(entry.StartPosition), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
380 if ( elementsRead != 3 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
381 cerr << "Error reading index entry. Expected 3 elements, read in: " << elementsRead << endl;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
382 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
383 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
384
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
385 // swap endian-ness if necessary
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
386 if ( m_isBigEndian ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
387 SwapEndian_32(entry.MaxEndPosition);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
388 SwapEndian_64(entry.StartOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
389 SwapEndian_32(entry.StartPosition);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
390 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
391
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
392 // save data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
393 if ( saveData )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
394 SaveOffsetEntry(refId, entry);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
395
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
396 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
397 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
398 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
399
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
400 // load a single reference from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
401 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
402 bool BamToolsIndex::LoadFirstReference(bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
403 BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
404 return LoadReference( (*indexBegin).first, saveData );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
405 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
406
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
407 // load a single reference from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
408 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
409 bool BamToolsIndex::LoadReference(const int& refId, bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
410
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
411 // read in number of offsets for this reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
412 uint32_t numOffsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
413 size_t elementsRead = fread(&numOffsets, sizeof(numOffsets), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
414 if ( elementsRead != 1 ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
415 if ( m_isBigEndian ) SwapEndian_32(numOffsets);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
416
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
417 // initialize offsets container for this reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
418 SetOffsetCount(refId, (int)numOffsets);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
419
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
420 // iterate over offset entries
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
421 for ( unsigned int j = 0; j < numOffsets; ++j )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
422 LoadIndexEntry(refId, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
423
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
424 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
425 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
426 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
427
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
428 // loads number of references, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
429 bool BamToolsIndex::LoadReferenceCount(int& numReferences) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
430
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
431 size_t elementsRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
432
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
433 // read reference count
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
434 elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
435 if ( m_isBigEndian ) SwapEndian_32(numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
436
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
437 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
438 return ( elementsRead == 1 );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
439 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
440
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
441 // saves an index offset entry in memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
442 void BamToolsIndex::SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
443 BamToolsReferenceEntry& refEntry = m_indexData[refId];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
444 refEntry.HasAlignments = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
445 refEntry.Offsets.push_back(entry);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
446 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
447
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
448 // pre-allocates size for offset vector
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
449 void BamToolsIndex::SetOffsetCount(const int& refId, const int& offsetCount) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
450 BamToolsReferenceEntry& refEntry = m_indexData[refId];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
451 refEntry.Offsets.reserve(offsetCount);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
452 refEntry.HasAlignments = ( offsetCount > 0);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
453 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
454
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
455 // initializes index data structure to hold @count references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
456 void BamToolsIndex::SetReferenceCount(const int& count) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
457 for ( int i = 0; i < count; ++i )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
458 m_indexData[i].HasAlignments = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
459 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
460
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
461 // position file pointer to first reference begin, return true if skipped OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
462 bool BamToolsIndex::SkipToFirstReference(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
463 BamToolsIndexData::const_iterator indexBegin = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
464 return SkipToReference( (*indexBegin).first );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
465 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
466
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
467 // position file pointer to desired reference begin, return true if skipped OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
468 bool BamToolsIndex::SkipToReference(const int& refId) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
469
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
470 // attempt rewind
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
471 if ( !Rewind() ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
472
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
473 // read in number of references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
474 int32_t numReferences;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
475 size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
476 if ( elementsRead != 1 ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
477 if ( m_isBigEndian ) SwapEndian_32(numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
478
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
479 // iterate over reference entries
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
480 bool skippedOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
481 int currentRefId = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
482 while (currentRefId != refId) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
483 skippedOk &= LoadReference(currentRefId, false);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
484 ++currentRefId;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
485 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
486
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
487 // return success/failure of skip
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
488 return skippedOk;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
489 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
490
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
491 // write header to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
492 bool BamToolsIndex::WriteHeader(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
493
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
494 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
495
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
496 // write BTI index format 'magic number'
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
497 elementsWritten += fwrite("BTI\1", 1, 4, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
498
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
499 // write BTI index format version
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
500 int32_t currentVersion = (int32_t)m_outputVersion;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
501 if ( m_isBigEndian ) SwapEndian_32(currentVersion);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
502 elementsWritten += fwrite(&currentVersion, sizeof(currentVersion), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
503
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
504 // write block size
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
505 int32_t blockSize = m_blockSize;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
506 if ( m_isBigEndian ) SwapEndian_32(blockSize);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
507 elementsWritten += fwrite(&blockSize, sizeof(blockSize), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
508
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
509 // store offset of beginning of data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
510 m_dataBeginOffset = ftell64(m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
511
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
512 // return success/failure of write
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
513 return ( elementsWritten == 6 );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
514 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
515
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
516 // write index data for all references to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
517 bool BamToolsIndex::WriteAllReferences(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
518
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
519 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
520
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
521 // write number of references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
522 int32_t numReferences = (int32_t)m_indexData.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
523 if ( m_isBigEndian ) SwapEndian_32(numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
524 elementsWritten += fwrite(&numReferences, sizeof(numReferences), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
525
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
526 // iterate through references in index
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
527 bool refOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
528 BamToolsIndexData::const_iterator refIter = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
529 BamToolsIndexData::const_iterator refEnd = m_indexData.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
530 for ( ; refIter != refEnd; ++refIter )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
531 refOk &= WriteReferenceEntry( (*refIter).second );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
532
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
533 return ( (elementsWritten == 1) && refOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
534 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
535
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
536 // write current reference index data to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
537 bool BamToolsIndex::WriteReferenceEntry(const BamToolsReferenceEntry& refEntry) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
538
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
539 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
540
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
541 // write number of offsets listed for this reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
542 uint32_t numOffsets = refEntry.Offsets.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
543 if ( m_isBigEndian ) SwapEndian_32(numOffsets);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
544 elementsWritten += fwrite(&numOffsets, sizeof(numOffsets), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
545
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
546 // iterate over offset entries
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
547 bool entriesOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
548 vector<BamToolsIndexEntry>::const_iterator offsetIter = refEntry.Offsets.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
549 vector<BamToolsIndexEntry>::const_iterator offsetEnd = refEntry.Offsets.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
550 for ( ; offsetIter != offsetEnd; ++offsetIter )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
551 entriesOk &= WriteIndexEntry( (*offsetIter) );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
552
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
553 return ( (elementsWritten == 1) && entriesOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
554 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
555
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
556 // write current index offset entry to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
557 bool BamToolsIndex::WriteIndexEntry(const BamToolsIndexEntry& entry) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
558
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
559 // copy entry data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
560 int32_t maxEndPosition = entry.MaxEndPosition;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
561 int64_t startOffset = entry.StartOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
562 int32_t startPosition = entry.StartPosition;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
563
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
564 // swap endian-ness if necessary
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
565 if ( m_isBigEndian ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
566 SwapEndian_32(maxEndPosition);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
567 SwapEndian_64(startOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
568 SwapEndian_32(startPosition);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
569 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
570
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
571 // write the reference index entry
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
572 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
573 elementsWritten += fwrite(&maxEndPosition, sizeof(maxEndPosition), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
574 elementsWritten += fwrite(&startOffset, sizeof(startOffset), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
575 elementsWritten += fwrite(&startPosition, sizeof(startPosition), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
576 return ( elementsWritten == 3 );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
577 }