annotate spp/src/BamStandardIndex_p.cpp @ 15:e689b83b0257 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:15:21 -0500
parents ce08b0efa3fd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
1 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
2 // BamStandardIndex.cpp (c) 2010 Derek Barnett
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
3 // Marth Lab, Department of Biology, Boston College
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
4 // All rights reserved.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
5 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
6 // Last modified: 22 November 2010 (DB)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
7 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
8 // Provides index operations for the standardized BAM index format (".bai")
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
9 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
10
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
11 #include <BamAlignment.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
12 #include <BamReader.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
13 #include <BGZF.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
14 #include <BamStandardIndex_p.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
15 using namespace BamTools;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
16 using namespace BamTools::Internal;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
17
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
18 #include <cstdio>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
19 #include <cstdlib>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
20 #include <algorithm>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
21 #include <iostream>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
22 #include <map>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
23 using namespace std;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
24
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
25 BamStandardIndex::BamStandardIndex(BgzfData* bgzf, BamReader* reader)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
26 : BamIndex(bgzf, reader)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
27 , m_dataBeginOffset(0)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
28 , m_hasFullDataCache(false)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
29 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
30 m_isBigEndian = BamTools::SystemIsBigEndian();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
31 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
32
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
33 BamStandardIndex::~BamStandardIndex(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
34 ClearAllData();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
35 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
36
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
37 // calculate bins that overlap region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
38 int BamStandardIndex::BinsFromRegion(const BamRegion& region,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
39 const bool isRightBoundSpecified,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
40 uint16_t bins[MAX_BIN])
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
41 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
42 // get region boundaries
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
43 uint32_t begin = (unsigned int)region.LeftPosition;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
44 uint32_t end;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
45
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
46 // if right bound specified AND left&right bounds are on same reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
47 // OK to use right bound position
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
48 if ( isRightBoundSpecified && ( region.LeftRefID == region.RightRefID ) )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
49 end = (unsigned int)region.RightPosition;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
50
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
51 // otherwise, use end of left bound reference as cutoff
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
52 else
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
53 end = (unsigned int)m_references.at(region.LeftRefID).RefLength - 1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
54
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
55 // initialize list, bin '0' always a valid bin
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
56 int i = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
57 bins[i++] = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
58
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
59 // get rest of bins that contain this region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
60 unsigned int k;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
61 for (k = 1 + (begin>>26); k <= 1 + (end>>26); ++k) { bins[i++] = k; }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
62 for (k = 9 + (begin>>23); k <= 9 + (end>>23); ++k) { bins[i++] = k; }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
63 for (k = 73 + (begin>>20); k <= 73 + (end>>20); ++k) { bins[i++] = k; }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
64 for (k = 585 + (begin>>17); k <= 585 + (end>>17); ++k) { bins[i++] = k; }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
65 for (k = 4681 + (begin>>14); k <= 4681 + (end>>14); ++k) { bins[i++] = k; }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
66
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
67 // return number of bins stored
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
68 return i;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
69 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
70
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
71 // creates index data (in-memory) from current reader data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
72 bool BamStandardIndex::Build(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
73
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
74 // be sure reader & BGZF file are valid & open for reading
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
75 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
76 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
77
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
78 // move file pointer to beginning of alignments
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
79 m_reader->Rewind();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
80
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
81 // get reference count, reserve index space
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
82 const int numReferences = (int)m_references.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
83 m_indexData.clear();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
84 m_hasFullDataCache = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
85 SetReferenceCount(numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
86
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
87 // sets default constant for bin, ID, offset, coordinate variables
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
88 const uint32_t defaultValue = 0xffffffffu;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
89
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
90 // bin data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
91 uint32_t saveBin(defaultValue);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
92 uint32_t lastBin(defaultValue);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
93
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
94 // reference ID data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
95 int32_t saveRefID(defaultValue);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
96 int32_t lastRefID(defaultValue);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
97
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
98 // offset data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
99 uint64_t saveOffset = m_BGZF->Tell();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
100 uint64_t lastOffset = saveOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
101
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
102 // coordinate data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
103 int32_t lastCoordinate = defaultValue;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
104
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
105 BamAlignment bAlignment;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
106 while ( m_reader->GetNextAlignmentCore(bAlignment) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
107
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
108 // change of chromosome, save ID, reset bin
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
109 if ( lastRefID != bAlignment.RefID ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
110 lastRefID = bAlignment.RefID;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
111 lastBin = defaultValue;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
112 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
113
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
114 // if lastCoordinate greater than BAM position - file not sorted properly
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
115 else if ( lastCoordinate > bAlignment.Position ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
116 fprintf(stderr, "BAM file not properly sorted:\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
117 fprintf(stderr, "Alignment %s : %d > %d on reference (id = %d)", bAlignment.Name.c_str(),
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
118 lastCoordinate, bAlignment.Position, bAlignment.RefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
119 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
120 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
121
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
122 // if valid reference && BAM bin spans some minimum cutoff (smaller bin ids span larger regions)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
123 if ( (bAlignment.RefID >= 0) && (bAlignment.Bin < 4681) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
124
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
125 // save linear offset entry (matched to BAM entry refID)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
126 BamStandardIndexData::iterator indexIter = m_indexData.find(bAlignment.RefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
127 if ( indexIter == m_indexData.end() ) return false; // error
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
128 ReferenceIndex& refIndex = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
129 LinearOffsetVector& offsets = refIndex.Offsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
130 SaveLinearOffset(offsets, bAlignment, lastOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
131 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
132
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
133 // if current BamAlignment bin != lastBin, "then possibly write the binning index"
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
134 if ( bAlignment.Bin != lastBin ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
135
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
136 // if not first time through
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
137 if ( saveBin != defaultValue ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
138
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
139 // save Bam bin entry
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
140 BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
141 if ( indexIter == m_indexData.end() ) return false; // error
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
142 ReferenceIndex& refIndex = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
143 BamBinMap& binMap = refIndex.Bins;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
144 SaveBinEntry(binMap, saveBin, saveOffset, lastOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
145 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
146
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
147 // update saveOffset
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
148 saveOffset = lastOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
149
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
150 // update bin values
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
151 saveBin = bAlignment.Bin;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
152 lastBin = bAlignment.Bin;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
153
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
154 // update saveRefID
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
155 saveRefID = bAlignment.RefID;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
156
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
157 // if invalid RefID, break out
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
158 if ( saveRefID < 0 ) break;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
159 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
160
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
161 // make sure that current file pointer is beyond lastOffset
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
162 if ( m_BGZF->Tell() <= (int64_t)lastOffset ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
163 fprintf(stderr, "Error in BGZF offsets.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
164 exit(1);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
165 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
166
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
167 // update lastOffset
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
168 lastOffset = m_BGZF->Tell();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
169
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
170 // update lastCoordinate
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
171 lastCoordinate = bAlignment.Position;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
172 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
173
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
174 // save any leftover BAM data (as long as refID is valid)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
175 if ( saveRefID >= 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
176 // save Bam bin entry
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
177 BamStandardIndexData::iterator indexIter = m_indexData.find(saveRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
178 if ( indexIter == m_indexData.end() ) return false; // error
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
179 ReferenceIndex& refIndex = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
180 BamBinMap& binMap = refIndex.Bins;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
181 SaveBinEntry(binMap, saveBin, saveOffset, lastOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
182 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
183
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
184 // simplify index by merging chunks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
185 MergeChunks();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
186
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
187 // iterate through references in index
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
188 // sort offsets in linear offset vector
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
189 BamStandardIndexData::iterator indexIter = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
190 BamStandardIndexData::iterator indexEnd = m_indexData.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
191 for ( int i = 0; indexIter != indexEnd; ++indexIter, ++i ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
192
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
193 // get reference index data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
194 ReferenceIndex& refIndex = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
195 LinearOffsetVector& offsets = refIndex.Offsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
196
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
197 // sort linear offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
198 sort(offsets.begin(), offsets.end());
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
199 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
200
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
201 // rewind file pointer to beginning of alignments, return success/fail
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
202 return m_reader->Rewind();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
203 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
204
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
205 // check index file magic number, return true if OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
206 bool BamStandardIndex::CheckMagicNumber(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
207
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
208 // read in magic number
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
209 char magic[4];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
210 size_t elementsRead = fread(magic, sizeof(char), 4, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
211
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
212 // compare to expected value
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
213 if ( strncmp(magic, "BAI\1", 4) != 0 ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
214 fprintf(stderr, "Problem with index file - invalid format.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
215 fclose(m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
216 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
217 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
218
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
219 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
220 return (elementsRead == 4);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
221 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
222
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
223 // clear all current index offset data in memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
224 void BamStandardIndex::ClearAllData(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
225 BamStandardIndexData::const_iterator indexIter = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
226 BamStandardIndexData::const_iterator indexEnd = m_indexData.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
227 for ( ; indexIter != indexEnd; ++indexIter ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
228 const int& refId = (*indexIter).first;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
229 ClearReferenceOffsets(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
230 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
231 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
232
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
233 // clear all index offset data for desired reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
234 void BamStandardIndex::ClearReferenceOffsets(const int& refId) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
235
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
236 // look up refId, skip if not found
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
237 BamStandardIndexData::iterator indexIter = m_indexData.find(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
238 if ( indexIter == m_indexData.end() ) return ;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
239
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
240 // clear reference data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
241 ReferenceIndex& refEntry = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
242 refEntry.Bins.clear();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
243 refEntry.Offsets.clear();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
244
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
245 // set flag
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
246 m_hasFullDataCache = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
247 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
248
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
249 // return file position after header metadata
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
250 const off_t BamStandardIndex::DataBeginOffset(void) const {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
251 return m_dataBeginOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
252 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
253
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
254 // calculates offset(s) for a given region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
255 bool BamStandardIndex::GetOffsets(const BamRegion& region,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
256 const bool isRightBoundSpecified,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
257 vector<int64_t>& offsets,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
258 bool* hasAlignmentsInRegion)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
259 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
260 // return false if leftBound refID is not found in index data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
261 if ( m_indexData.find(region.LeftRefID) == m_indexData.end() )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
262 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
263
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
264 // load index data for region if not already cached
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
265 if ( !IsDataLoaded(region.LeftRefID) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
266 bool loadedOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
267 loadedOk &= SkipToReference(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
268 loadedOk &= LoadReference(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
269 if ( !loadedOk ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
270 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
271
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
272 // calculate which bins overlap this region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
273 uint16_t* bins = (uint16_t*)calloc(MAX_BIN, 2);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
274 int numBins = BinsFromRegion(region, isRightBoundSpecified, bins);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
275
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
276 // get bins for this reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
277 BamStandardIndexData::const_iterator indexIter = m_indexData.find(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
278 if ( indexIter == m_indexData.end() ) return false; // error
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
279 const ReferenceIndex& refIndex = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
280 const BamBinMap& binMap = refIndex.Bins;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
281
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
282 // get minimum offset to consider
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
283 const LinearOffsetVector& linearOffsets = refIndex.Offsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
284 const uint64_t minOffset = ( (unsigned int)(region.LeftPosition>>BAM_LIDX_SHIFT) >= linearOffsets.size() )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
285 ? 0 : linearOffsets.at(region.LeftPosition>>BAM_LIDX_SHIFT);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
286
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
287 // store all alignment 'chunk' starts (file offsets) for bins in this region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
288 for ( int i = 0; i < numBins; ++i ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
289
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
290 const uint16_t binKey = bins[i];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
291 map<uint32_t, ChunkVector>::const_iterator binIter = binMap.find(binKey);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
292 if ( (binIter != binMap.end()) && ((*binIter).first == binKey) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
293
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
294 // iterate over chunks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
295 const ChunkVector& chunks = (*binIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
296 std::vector<Chunk>::const_iterator chunksIter = chunks.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
297 std::vector<Chunk>::const_iterator chunksEnd = chunks.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
298 for ( ; chunksIter != chunksEnd; ++chunksIter) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
299
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
300 // if valid chunk found, store its file offset
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
301 const Chunk& chunk = (*chunksIter);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
302 if ( chunk.Stop > minOffset )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
303 offsets.push_back( chunk.Start );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
304 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
305 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
306 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
307
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
308 // clean up memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
309 free(bins);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
310
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
311 // sort the offsets before returning
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
312 sort(offsets.begin(), offsets.end());
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
313
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
314 // set flag & return success
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
315 *hasAlignmentsInRegion = (offsets.size() != 0 );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
316
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
317 // if cache mode set to none, dump the data we just loaded
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
318 if (m_cacheMode == BamIndex::NoIndexCaching )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
319 ClearReferenceOffsets(region.LeftRefID);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
320
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
321 // return succes
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
322 return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
323 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
324
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
325 // returns whether reference has alignments or no
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
326 bool BamStandardIndex::HasAlignments(const int& refId) const {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
327 BamStandardIndexData::const_iterator indexIter = m_indexData.find(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
328 if ( indexIter == m_indexData.end() ) return false; // error
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
329 const ReferenceIndex& refEntry = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
330 return refEntry.HasAlignments;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
331 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
332
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
333 // return true if all index data is cached
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
334 bool BamStandardIndex::HasFullDataCache(void) const {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
335 return m_hasFullDataCache;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
336 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
337
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
338 // returns true if index cache has data for desired reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
339 bool BamStandardIndex::IsDataLoaded(const int& refId) const {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
340
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
341 // look up refId, return false if not found
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
342 BamStandardIndexData::const_iterator indexIter = m_indexData.find(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
343 if ( indexIter == m_indexData.end() ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
344
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
345 // see if reference has alignments
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
346 // if not, it's not a problem to have no offset data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
347 const ReferenceIndex& refEntry = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
348 if ( !refEntry.HasAlignments ) return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
349
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
350 // return whether bin map contains data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
351 return ( !refEntry.Bins.empty() );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
352 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
353
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
354 // attempts to use index to jump to region; returns success/fail
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
355 bool BamStandardIndex::Jump(const BamRegion& region, bool* hasAlignmentsInRegion) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
356
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
357 // be sure reader & BGZF file are valid & open for reading
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
358 if ( m_reader == 0 || m_BGZF == 0 || !m_BGZF->IsOpen )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
359 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
360
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
361 // make sure left-bound position is valid
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
362 if ( region.LeftPosition > m_references.at(region.LeftRefID).RefLength )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
363 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
364
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
365 // calculate offsets for this region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
366 // if failed, print message, set flag, and return failure
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
367 vector<int64_t> offsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
368 if ( !GetOffsets(region, region.isRightBoundSpecified(), offsets, hasAlignmentsInRegion) ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
369 fprintf(stderr, "ERROR: Could not jump: unable to calculate offset(s) for specified region.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
370 *hasAlignmentsInRegion = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
371 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
372 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
373
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
374 // iterate through offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
375 BamAlignment bAlignment;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
376 bool result = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
377 for ( vector<int64_t>::const_iterator o = offsets.begin(); o != offsets.end(); ++o) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
378
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
379 // attempt seek & load first available alignment
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
380 // set flag to true if data exists
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
381 result &= m_BGZF->Seek(*o);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
382 *hasAlignmentsInRegion = m_reader->GetNextAlignmentCore(bAlignment);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
383
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
384 // if this alignment corresponds to desired position
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
385 // return success of seeking back to the offset before the 'current offset' (to cover overlaps)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
386 if ( ((bAlignment.RefID == region.LeftRefID) &&
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
387 ((bAlignment.Position + bAlignment.Length) > region.LeftPosition)) ||
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
388 (bAlignment.RefID > region.LeftRefID) )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
389 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
390 if ( o != offsets.begin() ) --o;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
391 return m_BGZF->Seek(*o);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
392 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
393 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
394
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
395 // if error in jumping, print message & set flag
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
396 if ( !result ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
397 fprintf(stderr, "ERROR: Could not jump: unable to determine correct offset for specified region.\n");
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
398 *hasAlignmentsInRegion = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
399 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
400
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
401 // return success/failure
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
402 return result;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
403 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
404
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
405 // clears index data from all references except the first
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
406 void BamStandardIndex::KeepOnlyFirstReferenceOffsets(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
407 BamStandardIndexData::const_iterator indexBegin = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
408 KeepOnlyReferenceOffsets((*indexBegin).first);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
409 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
410
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
411 // clears index data from all references except the one specified
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
412 void BamStandardIndex::KeepOnlyReferenceOffsets(const int& refId) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
413 BamStandardIndexData::iterator mapIter = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
414 BamStandardIndexData::iterator mapEnd = m_indexData.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
415 for ( ; mapIter != mapEnd; ++mapIter ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
416 const int entryRefId = (*mapIter).first;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
417 if ( entryRefId != refId )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
418 ClearReferenceOffsets(entryRefId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
419 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
420 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
421
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
422 bool BamStandardIndex::LoadAllReferences(bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
423
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
424 // skip if data already loaded
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
425 if ( m_hasFullDataCache ) return true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
426
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
427 // get number of reference sequences
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
428 uint32_t numReferences;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
429 if ( !LoadReferenceCount((int&)numReferences) )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
430 return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
431
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
432 // iterate over reference entries
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
433 bool loadedOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
434 for ( int i = 0; i < (int)numReferences; ++i )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
435 loadedOk &= LoadReference(i, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
436
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
437 // set flag
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
438 if ( loadedOk && saveData )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
439 m_hasFullDataCache = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
440
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
441 // return success/failure of loading references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
442 return loadedOk;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
443 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
444
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
445 // load header data from index file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
446 bool BamStandardIndex::LoadHeader(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
447
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
448 bool loadedOk = CheckMagicNumber();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
449
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
450 // store offset of beginning of data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
451 m_dataBeginOffset = ftell64(m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
452
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
453 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
454 return loadedOk;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
455 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
456
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
457 // load a single index bin entry from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
458 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
459 bool BamStandardIndex::LoadBin(ReferenceIndex& refEntry, bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
460
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
461 size_t elementsRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
462
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
463 // get bin ID
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
464 uint32_t binId;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
465 elementsRead += fread(&binId, sizeof(binId), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
466 if ( m_isBigEndian ) SwapEndian_32(binId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
467
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
468 // load alignment chunks for this bin
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
469 ChunkVector chunks;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
470 bool chunksOk = LoadChunks(chunks, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
471
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
472 // store bin entry
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
473 if ( chunksOk && saveData )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
474 refEntry.Bins.insert(pair<uint32_t, ChunkVector>(binId, chunks));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
475
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
476 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
477 return ( (elementsRead == 1) && chunksOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
478 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
479
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
480 bool BamStandardIndex::LoadBins(ReferenceIndex& refEntry, bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
481
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
482 size_t elementsRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
483
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
484 // get number of bins
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
485 int32_t numBins;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
486 elementsRead += fread(&numBins, sizeof(numBins), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
487 if ( m_isBigEndian ) SwapEndian_32(numBins);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
488
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
489 // set flag
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
490 refEntry.HasAlignments = ( numBins != 0 );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
491
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
492 // iterate over bins
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
493 bool binsOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
494 for ( int i = 0; i < numBins; ++i )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
495 binsOk &= LoadBin(refEntry, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
496
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
497 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
498 return ( (elementsRead == 1) && binsOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
499 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
500
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
501 // load a single index bin entry from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
502 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
503 bool BamStandardIndex::LoadChunk(ChunkVector& chunks, bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
504
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
505 size_t elementsRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
506
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
507 // read in chunk data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
508 uint64_t start;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
509 uint64_t stop;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
510 elementsRead += fread(&start, sizeof(start), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
511 elementsRead += fread(&stop, sizeof(stop), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
512
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
513 // swap endian-ness if necessary
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
514 if ( m_isBigEndian ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
515 SwapEndian_64(start);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
516 SwapEndian_64(stop);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
517 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
518
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
519 // save data if requested
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
520 if ( saveData ) chunks.push_back( Chunk(start, stop) );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
521
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
522 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
523 return ( elementsRead == 2 );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
524 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
525
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
526 bool BamStandardIndex::LoadChunks(ChunkVector& chunks, bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
527
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
528 size_t elementsRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
529
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
530 // read in number of chunks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
531 uint32_t numChunks;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
532 elementsRead += fread(&numChunks, sizeof(numChunks), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
533 if ( m_isBigEndian ) SwapEndian_32(numChunks);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
534
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
535 // initialize space for chunks if we're storing this data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
536 if ( saveData ) chunks.reserve(numChunks);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
537
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
538 // iterate over chunks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
539 bool chunksOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
540 for ( int i = 0; i < (int)numChunks; ++i )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
541 chunksOk &= LoadChunk(chunks, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
542
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
543 // sort chunk vector
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
544 sort( chunks.begin(), chunks.end(), ChunkLessThan );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
545
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
546 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
547 return ( (elementsRead == 1) && chunksOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
548 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
549
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
550 // load a single index linear offset entry from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
551 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
552 bool BamStandardIndex::LoadLinearOffsets(ReferenceIndex& refEntry, bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
553
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
554 size_t elementsRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
555
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
556 // read in number of linear offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
557 int32_t numLinearOffsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
558 elementsRead += fread(&numLinearOffsets, sizeof(numLinearOffsets), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
559 if ( m_isBigEndian ) SwapEndian_32(numLinearOffsets);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
560
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
561 // set up destination vector (if we're saving the data)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
562 LinearOffsetVector linearOffsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
563 if ( saveData ) linearOffsets.reserve(numLinearOffsets);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
564
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
565 // iterate over linear offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
566 uint64_t linearOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
567 for ( int i = 0; i < numLinearOffsets; ++i ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
568 elementsRead += fread(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
569 if ( m_isBigEndian ) SwapEndian_64(linearOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
570 if ( saveData ) linearOffsets.push_back(linearOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
571 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
572
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
573 // sort linear offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
574 sort ( linearOffsets.begin(), linearOffsets.end() );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
575
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
576 // save in reference index entry if desired
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
577 if ( saveData ) refEntry.Offsets = linearOffsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
578
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
579 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
580 return ( elementsRead == (size_t)(numLinearOffsets + 1) );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
581 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
582
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
583 bool BamStandardIndex::LoadFirstReference(bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
584 BamStandardIndexData::const_iterator indexBegin = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
585 return LoadReference((*indexBegin).first, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
586 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
587
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
588 // load a single reference from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
589 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
590 bool BamStandardIndex::LoadReference(const int& refId, bool saveData) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
591
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
592 // look up refId
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
593 BamStandardIndexData::iterator indexIter = m_indexData.find(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
594
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
595 // if reference not previously loaded, create new entry
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
596 if ( indexIter == m_indexData.end() ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
597 ReferenceIndex newEntry;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
598 newEntry.HasAlignments = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
599 m_indexData.insert( pair<int32_t, ReferenceIndex>(refId, newEntry) );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
600 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
601
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
602 // load reference data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
603 indexIter = m_indexData.find(refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
604 ReferenceIndex& entry = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
605 bool loadedOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
606 loadedOk &= LoadBins(entry, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
607 loadedOk &= LoadLinearOffsets(entry, saveData);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
608 return loadedOk;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
609 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
610
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
611 // loads number of references, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
612 bool BamStandardIndex::LoadReferenceCount(int& numReferences) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
613
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
614 size_t elementsRead = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
615
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
616 // read reference count
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
617 elementsRead += fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
618 if ( m_isBigEndian ) SwapEndian_32(numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
619
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
620 // return success/failure of load
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
621 return ( elementsRead == 1 );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
622 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
623
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
624 // merges 'alignment chunks' in BAM bin (used for index building)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
625 void BamStandardIndex::MergeChunks(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
626
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
627 // iterate over reference enties
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
628 BamStandardIndexData::iterator indexIter = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
629 BamStandardIndexData::iterator indexEnd = m_indexData.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
630 for ( ; indexIter != indexEnd; ++indexIter ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
631
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
632 // get BAM bin map for this reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
633 ReferenceIndex& refIndex = (*indexIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
634 BamBinMap& bamBinMap = refIndex.Bins;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
635
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
636 // iterate over BAM bins
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
637 BamBinMap::iterator binIter = bamBinMap.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
638 BamBinMap::iterator binEnd = bamBinMap.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
639 for ( ; binIter != binEnd; ++binIter ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
640
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
641 // get chunk vector for this bin
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
642 ChunkVector& binChunks = (*binIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
643 if ( binChunks.size() == 0 ) continue;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
644
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
645 ChunkVector mergedChunks;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
646 mergedChunks.push_back( binChunks[0] );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
647
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
648 // iterate over chunks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
649 int i = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
650 ChunkVector::iterator chunkIter = binChunks.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
651 ChunkVector::iterator chunkEnd = binChunks.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
652 for ( ++chunkIter; chunkIter != chunkEnd; ++chunkIter) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
653
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
654 // get 'currentChunk' based on numeric index
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
655 Chunk& currentChunk = mergedChunks[i];
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
656
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
657 // get iteratorChunk based on vector iterator
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
658 Chunk& iteratorChunk = (*chunkIter);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
659
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
660 // if chunk ends where (iterator) chunk starts, then merge
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
661 if ( currentChunk.Stop>>16 == iteratorChunk.Start>>16 )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
662 currentChunk.Stop = iteratorChunk.Stop;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
663
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
664 // otherwise
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
665 else {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
666 // set currentChunk + 1 to iteratorChunk
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
667 mergedChunks.push_back(iteratorChunk);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
668 ++i;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
669 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
670 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
671
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
672 // saved merged chunk vector
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
673 (*binIter).second = mergedChunks;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
674 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
675 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
676 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
677
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
678 // saves BAM bin entry for index
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
679 void BamStandardIndex::SaveBinEntry(BamBinMap& binMap,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
680 const uint32_t& saveBin,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
681 const uint64_t& saveOffset,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
682 const uint64_t& lastOffset)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
683 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
684 // look up saveBin
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
685 BamBinMap::iterator binIter = binMap.find(saveBin);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
686
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
687 // create new chunk
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
688 Chunk newChunk(saveOffset, lastOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
689
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
690 // if entry doesn't exist
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
691 if ( binIter == binMap.end() ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
692 ChunkVector newChunks;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
693 newChunks.push_back(newChunk);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
694 binMap.insert( pair<uint32_t, ChunkVector>(saveBin, newChunks));
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
695 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
696
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
697 // otherwise
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
698 else {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
699 ChunkVector& binChunks = (*binIter).second;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
700 binChunks.push_back( newChunk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
701 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
702 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
703
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
704 // saves linear offset entry for index
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
705 void BamStandardIndex::SaveLinearOffset(LinearOffsetVector& offsets,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
706 const BamAlignment& bAlignment,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
707 const uint64_t& lastOffset)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
708 {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
709 // get converted offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
710 int beginOffset = bAlignment.Position >> BAM_LIDX_SHIFT;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
711 int endOffset = (bAlignment.GetEndPosition() - 1) >> BAM_LIDX_SHIFT;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
712
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
713 // resize vector if necessary
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
714 int oldSize = offsets.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
715 int newSize = endOffset + 1;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
716 if ( oldSize < newSize )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
717 offsets.resize(newSize, 0);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
718
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
719 // store offset
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
720 for( int i = beginOffset + 1; i <= endOffset; ++i ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
721 if ( offsets[i] == 0 )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
722 offsets[i] = lastOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
723 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
724 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
725
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
726 // initializes index data structure to hold @count references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
727 void BamStandardIndex::SetReferenceCount(const int& count) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
728 for ( int i = 0; i < count; ++i )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
729 m_indexData[i].HasAlignments = false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
730 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
731
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
732 bool BamStandardIndex::SkipToFirstReference(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
733 BamStandardIndexData::const_iterator indexBegin = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
734 return SkipToReference( (*indexBegin).first );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
735 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
736
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
737 // position file pointer to desired reference begin, return true if skipped OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
738 bool BamStandardIndex::SkipToReference(const int& refId) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
739
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
740 // attempt rewind
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
741 if ( !Rewind() ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
742
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
743 // read in number of references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
744 uint32_t numReferences;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
745 size_t elementsRead = fread(&numReferences, sizeof(numReferences), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
746 if ( elementsRead != 1 ) return false;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
747 if ( m_isBigEndian ) SwapEndian_32(numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
748
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
749 // iterate over reference entries
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
750 bool skippedOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
751 int currentRefId = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
752 while (currentRefId != refId) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
753 skippedOk &= LoadReference(currentRefId, false);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
754 ++currentRefId;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
755 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
756
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
757 // return success
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
758 return skippedOk;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
759 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
760
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
761 // write header to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
762 bool BamStandardIndex::WriteHeader(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
763
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
764 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
765
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
766 // write magic number
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
767 elementsWritten += fwrite("BAI\1", sizeof(char), 4, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
768
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
769 // store offset of beginning of data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
770 m_dataBeginOffset = ftell64(m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
771
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
772 // return success/failure of write
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
773 return (elementsWritten == 4);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
774 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
775
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
776 // write index data for all references to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
777 bool BamStandardIndex::WriteAllReferences(void) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
778
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
779 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
780
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
781 // write number of reference sequences
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
782 int32_t numReferenceSeqs = m_indexData.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
783 if ( m_isBigEndian ) SwapEndian_32(numReferenceSeqs);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
784 elementsWritten += fwrite(&numReferenceSeqs, sizeof(numReferenceSeqs), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
785
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
786 // iterate over reference sequences
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
787 bool refsOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
788 BamStandardIndexData::const_iterator indexIter = m_indexData.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
789 BamStandardIndexData::const_iterator indexEnd = m_indexData.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
790 for ( ; indexIter != indexEnd; ++ indexIter )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
791 refsOk &= WriteReference( (*indexIter).second );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
792
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
793 // return success/failure of write
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
794 return ( (elementsWritten == 1) && refsOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
795 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
796
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
797 // write index data for bin to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
798 bool BamStandardIndex::WriteBin(const uint32_t& binId, const ChunkVector& chunks) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
799
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
800 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
801
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
802 // write BAM bin ID
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
803 uint32_t binKey = binId;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
804 if ( m_isBigEndian ) SwapEndian_32(binKey);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
805 elementsWritten += fwrite(&binKey, sizeof(binKey), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
806
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
807 // write chunks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
808 bool chunksOk = WriteChunks(chunks);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
809
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
810 // return success/failure of write
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
811 return ( (elementsWritten == 1) && chunksOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
812 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
813
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
814 // write index data for bins to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
815 bool BamStandardIndex::WriteBins(const BamBinMap& bins) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
816
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
817 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
818
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
819 // write number of bins
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
820 int32_t binCount = bins.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
821 if ( m_isBigEndian ) SwapEndian_32(binCount);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
822 elementsWritten += fwrite(&binCount, sizeof(binCount), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
823
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
824 // iterate over bins
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
825 bool binsOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
826 BamBinMap::const_iterator binIter = bins.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
827 BamBinMap::const_iterator binEnd = bins.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
828 for ( ; binIter != binEnd; ++binIter )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
829 binsOk &= WriteBin( (*binIter).first, (*binIter).second );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
830
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
831 // return success/failure of write
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
832 return ( (elementsWritten == 1) && binsOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
833 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
834
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
835 // write index data for chunk entry to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
836 bool BamStandardIndex::WriteChunk(const Chunk& chunk) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
837
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
838 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
839
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
840 // localize alignment chunk offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
841 uint64_t start = chunk.Start;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
842 uint64_t stop = chunk.Stop;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
843
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
844 // swap endian-ness if necessary
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
845 if ( m_isBigEndian ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
846 SwapEndian_64(start);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
847 SwapEndian_64(stop);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
848 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
849
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
850 // write to index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
851 elementsWritten += fwrite(&start, sizeof(start), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
852 elementsWritten += fwrite(&stop, sizeof(stop), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
853
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
854 // return success/failure of write
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
855 return ( elementsWritten == 2 );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
856 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
857
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
858 // write index data for chunk entry to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
859 bool BamStandardIndex::WriteChunks(const ChunkVector& chunks) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
860
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
861 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
862
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
863 // write chunks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
864 int32_t chunkCount = chunks.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
865 if ( m_isBigEndian ) SwapEndian_32(chunkCount);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
866 elementsWritten += fwrite(&chunkCount, sizeof(chunkCount), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
867
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
868 // iterate over chunks
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
869 bool chunksOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
870 ChunkVector::const_iterator chunkIter = chunks.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
871 ChunkVector::const_iterator chunkEnd = chunks.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
872 for ( ; chunkIter != chunkEnd; ++chunkIter )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
873 chunksOk &= WriteChunk( (*chunkIter) );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
874
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
875 // return success/failure of write
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
876 return ( (elementsWritten == 1) && chunksOk );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
877 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
878
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
879 // write index data for linear offsets entry to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
880 bool BamStandardIndex::WriteLinearOffsets(const LinearOffsetVector& offsets) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
881
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
882 size_t elementsWritten = 0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
883
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
884 // write number of linear offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
885 int32_t offsetCount = offsets.size();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
886 if ( m_isBigEndian ) SwapEndian_32(offsetCount);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
887 elementsWritten += fwrite(&offsetCount, sizeof(offsetCount), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
888
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
889 // iterate over linear offsets
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
890 LinearOffsetVector::const_iterator offsetIter = offsets.begin();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
891 LinearOffsetVector::const_iterator offsetEnd = offsets.end();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
892 for ( ; offsetIter != offsetEnd; ++offsetIter ) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
893
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
894 // write linear offset
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
895 uint64_t linearOffset = (*offsetIter);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
896 if ( m_isBigEndian ) SwapEndian_64(linearOffset);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
897 elementsWritten += fwrite(&linearOffset, sizeof(linearOffset), 1, m_indexStream);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
898 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
899
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
900 // return success/failure of write
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
901 return ( elementsWritten == (size_t)(offsetCount + 1) );
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
902 }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
903
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
904 // write index data for a single reference to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
905 bool BamStandardIndex::WriteReference(const ReferenceIndex& refEntry) {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
906 bool refOk = true;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
907 refOk &= WriteBins(refEntry.Bins);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
908 refOk &= WriteLinearOffsets(refEntry.Offsets);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
909 return refOk;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
910 }