Mercurial > repos > zzhou > spp_phantompeak
comparison spp/src/BamIndex.cpp @ 15:e689b83b0257 draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:15:21 -0500 |
parents | ce08b0efa3fd |
children |
comparison
equal
deleted
inserted
replaced
14:918fecc1e7bb | 15:e689b83b0257 |
---|---|
1 // *************************************************************************** | |
2 // BamIndex.cpp (c) 2009 Derek Barnett | |
3 // Marth Lab, Department of Biology, Boston College | |
4 // All rights reserved. | |
5 // --------------------------------------------------------------------------- | |
6 // Last modified: 22 November 2010 (DB) | |
7 // --------------------------------------------------------------------------- | |
8 // Provides index functionality - both for the default (standardized) BAM | |
9 // index format (.bai) as well as a BamTools-specific (nonstandard) index | |
10 // format (.bti). | |
11 // *************************************************************************** | |
12 | |
13 #include <BamIndex.h> | |
14 #include <BamReader.h> | |
15 #include <BGZF.h> | |
16 #include <BamStandardIndex_p.h> | |
17 #include <BamToolsIndex_p.h> | |
18 using namespace BamTools; | |
19 using namespace BamTools::Internal; | |
20 | |
21 #include <cstdio> | |
22 #include <cstdlib> | |
23 #include <algorithm> | |
24 #include <iostream> | |
25 #include <map> | |
26 using namespace std; | |
27 | |
28 // -------------------------------------------------- | |
29 // BamIndex factory methods | |
30 | |
31 // returns index based on BAM filename 'stub' | |
32 // checks first for preferred type, returns that type if found | |
33 // (if not found, attmempts to load other type(s), returns 0 if NONE found) | |
34 // | |
35 // ** default preferred type is BamToolsIndex ** use this anytime it exists | |
36 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename, | |
37 BamTools::BgzfData* bgzf, | |
38 BamTools::BamReader* reader, | |
39 const BamIndex::PreferredIndexType& type) | |
40 { | |
41 // --------------------------------------------------- | |
42 // attempt to load preferred type first | |
43 | |
44 const std::string bamtoolsIndexFilename = bamFilename + ".bti"; | |
45 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename); | |
46 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists ) | |
47 return new BamToolsIndex(bgzf, reader); | |
48 | |
49 const std::string standardIndexFilename = bamFilename + ".bai"; | |
50 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename); | |
51 if ( (type == BamIndex::STANDARD) && standardIndexExists ) | |
52 return new BamStandardIndex(bgzf, reader); | |
53 | |
54 // ---------------------------------------------------- | |
55 // preferred type could not be found, try other (non-preferred) types | |
56 // if none found, return 0 | |
57 | |
58 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader); | |
59 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader); | |
60 return 0; | |
61 } | |
62 | |
63 // returns index based on explicitly named index file (or 0 if not found) | |
64 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename, | |
65 BamTools::BgzfData* bgzf, | |
66 BamTools::BamReader* reader) | |
67 { | |
68 // see if specified file exists | |
69 const bool indexExists = BamTools::FileExists(indexFilename); | |
70 if ( !indexExists ) return 0; | |
71 | |
72 const std::string bamtoolsIndexExtension(".bti"); | |
73 const std::string standardIndexExtension(".bai"); | |
74 | |
75 // if has bamtoolsIndexExtension | |
76 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) ) | |
77 return new BamToolsIndex(bgzf, reader); | |
78 | |
79 // if has standardIndexExtension | |
80 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) ) | |
81 return new BamStandardIndex(bgzf, reader); | |
82 | |
83 // otherwise, unsupported file type | |
84 return 0; | |
85 } | |
86 | |
87 // ------------------------------- | |
88 // BamIndex implementation | |
89 | |
90 // ctor | |
91 BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader) | |
92 : m_BGZF(bgzf) | |
93 , m_reader(reader) | |
94 , m_cacheMode(BamIndex::LimitedIndexCaching) | |
95 , m_indexStream(0) | |
96 { | |
97 if ( m_reader && m_reader->IsOpen() ) | |
98 m_references = m_reader->GetReferenceData(); | |
99 } | |
100 | |
101 // dtor | |
102 BamIndex::~BamIndex(void) { | |
103 if ( IsOpen() ) | |
104 fclose(m_indexStream); | |
105 } | |
106 | |
107 // return true if FILE* is open | |
108 bool BamIndex::IsOpen(void) const { | |
109 return ( m_indexStream != 0 ); | |
110 } | |
111 | |
112 // loads existing data from file into memory | |
113 bool BamIndex::Load(const string& filename) { | |
114 | |
115 // open index file, abort on error | |
116 if ( !OpenIndexFile(filename, "rb") ) { | |
117 fprintf(stderr, "ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str()); | |
118 return false; | |
119 } | |
120 | |
121 // check magic number | |
122 if ( !LoadHeader() ) { | |
123 fclose(m_indexStream); | |
124 return false; | |
125 } | |
126 | |
127 // load reference data (but only keep in memory if full caching requested) | |
128 bool saveInitialLoad = ( m_cacheMode == BamIndex::FullIndexCaching ); | |
129 if ( !LoadAllReferences(saveInitialLoad) ) { | |
130 fclose(m_indexStream); | |
131 return false; | |
132 } | |
133 | |
134 // update index cache based on selected mode | |
135 UpdateCache(); | |
136 | |
137 // return success | |
138 return true; | |
139 } | |
140 | |
141 // opens index file for reading/writing, return true if opened OK | |
142 bool BamIndex::OpenIndexFile(const string& filename, const string& mode) { | |
143 m_indexStream = fopen(filename.c_str(), mode.c_str()); | |
144 return ( m_indexStream != 0 ); | |
145 } | |
146 | |
147 // rewind index file to beginning of index data, return true if rewound OK | |
148 bool BamIndex::Rewind(void) { | |
149 return ( fseek64(m_indexStream, DataBeginOffset(), SEEK_SET) == 0 ); | |
150 } | |
151 | |
152 // change the index caching behavior | |
153 void BamIndex::SetCacheMode(const BamIndexCacheMode mode) { | |
154 if ( mode != m_cacheMode ) { | |
155 m_cacheMode = mode; | |
156 UpdateCache(); | |
157 } | |
158 } | |
159 | |
160 // updates in-memory cache of index data, depending on current cache mode | |
161 void BamIndex::UpdateCache(void) { | |
162 | |
163 // skip if file not open | |
164 if ( !IsOpen() ) return; | |
165 | |
166 // reflect requested cache mode behavior | |
167 switch ( m_cacheMode ) { | |
168 | |
169 case (BamIndex::FullIndexCaching) : | |
170 Rewind(); | |
171 LoadAllReferences(true); | |
172 break; | |
173 | |
174 case (BamIndex::LimitedIndexCaching) : | |
175 if ( HasFullDataCache() ) | |
176 KeepOnlyFirstReferenceOffsets(); | |
177 else { | |
178 ClearAllData(); | |
179 SkipToFirstReference(); | |
180 LoadFirstReference(true); | |
181 } | |
182 break; | |
183 case(BamIndex::NoIndexCaching) : | |
184 ClearAllData(); | |
185 break; | |
186 default : | |
187 // unreachable | |
188 ; | |
189 } | |
190 } | |
191 | |
192 // writes in-memory index data out to file | |
193 bool BamIndex::Write(const string& bamFilename) { | |
194 | |
195 // open index file for writing | |
196 string indexFilename = bamFilename + Extension(); | |
197 if ( !OpenIndexFile(indexFilename, "wb") ) { | |
198 fprintf(stderr, "ERROR: Could not open file to save index.\n"); | |
199 return false; | |
200 } | |
201 | |
202 // write index header data | |
203 if ( !WriteHeader() ) { | |
204 fprintf(stderr, "ERROR: There was a problem writing index metadata to new index file.\n"); | |
205 fflush(m_indexStream); | |
206 fclose(m_indexStream); | |
207 exit(1); | |
208 } | |
209 | |
210 // write main index data | |
211 if ( !WriteAllReferences() ) { | |
212 fprintf(stderr, "ERROR: There was a problem writing index data to new index file.\n"); | |
213 fflush(m_indexStream); | |
214 fclose(m_indexStream); | |
215 exit(1); | |
216 } | |
217 | |
218 // flush any remaining output, rewind file, and return success | |
219 fflush(m_indexStream); | |
220 fclose(m_indexStream); | |
221 | |
222 // re-open index file for later reading | |
223 if ( !OpenIndexFile(indexFilename, "rb") ) { | |
224 fprintf(stderr, "ERROR: Could not open newly created index file for reading.\n"); | |
225 return false; | |
226 } | |
227 | |
228 // return success/failure of write | |
229 return true; | |
230 } |