6
|
1 // ***************************************************************************
|
|
2 // BamIndex.cpp (c) 2009 Derek Barnett
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 22 November 2010 (DB)
|
|
7 // ---------------------------------------------------------------------------
|
|
8 // Provides index functionality - both for the default (standardized) BAM
|
|
9 // index format (.bai) as well as a BamTools-specific (nonstandard) index
|
|
10 // format (.bti).
|
|
11 // ***************************************************************************
|
|
12
|
|
13 #include <BamIndex.h>
|
|
14 #include <BamReader.h>
|
|
15 #include <BGZF.h>
|
|
16 #include <BamStandardIndex_p.h>
|
|
17 #include <BamToolsIndex_p.h>
|
|
18 using namespace BamTools;
|
|
19 using namespace BamTools::Internal;
|
|
20
|
|
21 #include <cstdio>
|
|
22 #include <cstdlib>
|
|
23 #include <algorithm>
|
|
24 #include <iostream>
|
|
25 #include <map>
|
|
26 using namespace std;
|
|
27
|
|
28 // --------------------------------------------------
|
|
29 // BamIndex factory methods
|
|
30
|
|
31 // returns index based on BAM filename 'stub'
|
|
32 // checks first for preferred type, returns that type if found
|
|
33 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
|
|
34 //
|
|
35 // ** default preferred type is BamToolsIndex ** use this anytime it exists
|
|
36 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
|
|
37 BamTools::BgzfData* bgzf,
|
|
38 BamTools::BamReader* reader,
|
|
39 const BamIndex::PreferredIndexType& type)
|
|
40 {
|
|
41 // ---------------------------------------------------
|
|
42 // attempt to load preferred type first
|
|
43
|
|
44 const std::string bamtoolsIndexFilename = bamFilename + ".bti";
|
|
45 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
|
|
46 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
|
|
47 return new BamToolsIndex(bgzf, reader);
|
|
48
|
|
49 const std::string standardIndexFilename = bamFilename + ".bai";
|
|
50 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
|
|
51 if ( (type == BamIndex::STANDARD) && standardIndexExists )
|
|
52 return new BamStandardIndex(bgzf, reader);
|
|
53
|
|
54 // ----------------------------------------------------
|
|
55 // preferred type could not be found, try other (non-preferred) types
|
|
56 // if none found, return 0
|
|
57
|
|
58 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader);
|
|
59 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader);
|
|
60 return 0;
|
|
61 }
|
|
62
|
|
63 // returns index based on explicitly named index file (or 0 if not found)
|
|
64 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
|
|
65 BamTools::BgzfData* bgzf,
|
|
66 BamTools::BamReader* reader)
|
|
67 {
|
|
68 // see if specified file exists
|
|
69 const bool indexExists = BamTools::FileExists(indexFilename);
|
|
70 if ( !indexExists ) return 0;
|
|
71
|
|
72 const std::string bamtoolsIndexExtension(".bti");
|
|
73 const std::string standardIndexExtension(".bai");
|
|
74
|
|
75 // if has bamtoolsIndexExtension
|
|
76 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
|
|
77 return new BamToolsIndex(bgzf, reader);
|
|
78
|
|
79 // if has standardIndexExtension
|
|
80 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
|
|
81 return new BamStandardIndex(bgzf, reader);
|
|
82
|
|
83 // otherwise, unsupported file type
|
|
84 return 0;
|
|
85 }
|
|
86
|
|
87 // -------------------------------
|
|
88 // BamIndex implementation
|
|
89
|
|
90 // ctor
|
|
91 BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader)
|
|
92 : m_BGZF(bgzf)
|
|
93 , m_reader(reader)
|
|
94 , m_cacheMode(BamIndex::LimitedIndexCaching)
|
|
95 , m_indexStream(0)
|
|
96 {
|
|
97 if ( m_reader && m_reader->IsOpen() )
|
|
98 m_references = m_reader->GetReferenceData();
|
|
99 }
|
|
100
|
|
101 // dtor
|
|
102 BamIndex::~BamIndex(void) {
|
|
103 if ( IsOpen() )
|
|
104 fclose(m_indexStream);
|
|
105 }
|
|
106
|
|
107 // return true if FILE* is open
|
|
108 bool BamIndex::IsOpen(void) const {
|
|
109 return ( m_indexStream != 0 );
|
|
110 }
|
|
111
|
|
112 // loads existing data from file into memory
|
|
113 bool BamIndex::Load(const string& filename) {
|
|
114
|
|
115 // open index file, abort on error
|
|
116 if ( !OpenIndexFile(filename, "rb") ) {
|
|
117 fprintf(stderr, "ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
|
|
118 return false;
|
|
119 }
|
|
120
|
|
121 // check magic number
|
|
122 if ( !LoadHeader() ) {
|
|
123 fclose(m_indexStream);
|
|
124 return false;
|
|
125 }
|
|
126
|
|
127 // load reference data (but only keep in memory if full caching requested)
|
|
128 bool saveInitialLoad = ( m_cacheMode == BamIndex::FullIndexCaching );
|
|
129 if ( !LoadAllReferences(saveInitialLoad) ) {
|
|
130 fclose(m_indexStream);
|
|
131 return false;
|
|
132 }
|
|
133
|
|
134 // update index cache based on selected mode
|
|
135 UpdateCache();
|
|
136
|
|
137 // return success
|
|
138 return true;
|
|
139 }
|
|
140
|
|
141 // opens index file for reading/writing, return true if opened OK
|
|
142 bool BamIndex::OpenIndexFile(const string& filename, const string& mode) {
|
|
143 m_indexStream = fopen(filename.c_str(), mode.c_str());
|
|
144 return ( m_indexStream != 0 );
|
|
145 }
|
|
146
|
|
147 // rewind index file to beginning of index data, return true if rewound OK
|
|
148 bool BamIndex::Rewind(void) {
|
|
149 return ( fseek64(m_indexStream, DataBeginOffset(), SEEK_SET) == 0 );
|
|
150 }
|
|
151
|
|
152 // change the index caching behavior
|
|
153 void BamIndex::SetCacheMode(const BamIndexCacheMode mode) {
|
|
154 if ( mode != m_cacheMode ) {
|
|
155 m_cacheMode = mode;
|
|
156 UpdateCache();
|
|
157 }
|
|
158 }
|
|
159
|
|
160 // updates in-memory cache of index data, depending on current cache mode
|
|
161 void BamIndex::UpdateCache(void) {
|
|
162
|
|
163 // skip if file not open
|
|
164 if ( !IsOpen() ) return;
|
|
165
|
|
166 // reflect requested cache mode behavior
|
|
167 switch ( m_cacheMode ) {
|
|
168
|
|
169 case (BamIndex::FullIndexCaching) :
|
|
170 Rewind();
|
|
171 LoadAllReferences(true);
|
|
172 break;
|
|
173
|
|
174 case (BamIndex::LimitedIndexCaching) :
|
|
175 if ( HasFullDataCache() )
|
|
176 KeepOnlyFirstReferenceOffsets();
|
|
177 else {
|
|
178 ClearAllData();
|
|
179 SkipToFirstReference();
|
|
180 LoadFirstReference(true);
|
|
181 }
|
|
182 break;
|
|
183 case(BamIndex::NoIndexCaching) :
|
|
184 ClearAllData();
|
|
185 break;
|
|
186 default :
|
|
187 // unreachable
|
|
188 ;
|
|
189 }
|
|
190 }
|
|
191
|
|
192 // writes in-memory index data out to file
|
|
193 bool BamIndex::Write(const string& bamFilename) {
|
|
194
|
|
195 // open index file for writing
|
|
196 string indexFilename = bamFilename + Extension();
|
|
197 if ( !OpenIndexFile(indexFilename, "wb") ) {
|
|
198 fprintf(stderr, "ERROR: Could not open file to save index.\n");
|
|
199 return false;
|
|
200 }
|
|
201
|
|
202 // write index header data
|
|
203 if ( !WriteHeader() ) {
|
|
204 fprintf(stderr, "ERROR: There was a problem writing index metadata to new index file.\n");
|
|
205 fflush(m_indexStream);
|
|
206 fclose(m_indexStream);
|
|
207 exit(1);
|
|
208 }
|
|
209
|
|
210 // write main index data
|
|
211 if ( !WriteAllReferences() ) {
|
|
212 fprintf(stderr, "ERROR: There was a problem writing index data to new index file.\n");
|
|
213 fflush(m_indexStream);
|
|
214 fclose(m_indexStream);
|
|
215 exit(1);
|
|
216 }
|
|
217
|
|
218 // flush any remaining output, rewind file, and return success
|
|
219 fflush(m_indexStream);
|
|
220 fclose(m_indexStream);
|
|
221
|
|
222 // re-open index file for later reading
|
|
223 if ( !OpenIndexFile(indexFilename, "rb") ) {
|
|
224 fprintf(stderr, "ERROR: Could not open newly created index file for reading.\n");
|
|
225 return false;
|
|
226 }
|
|
227
|
|
228 // return success/failure of write
|
|
229 return true;
|
|
230 }
|