comparison spp/src/BamIndex.cpp @ 15:e689b83b0257 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:15:21 -0500
parents ce08b0efa3fd
children
comparison
equal deleted inserted replaced
14:918fecc1e7bb 15:e689b83b0257
1 // ***************************************************************************
2 // BamIndex.cpp (c) 2009 Derek Barnett
3 // Marth Lab, Department of Biology, Boston College
4 // All rights reserved.
5 // ---------------------------------------------------------------------------
6 // Last modified: 22 November 2010 (DB)
7 // ---------------------------------------------------------------------------
8 // Provides index functionality - both for the default (standardized) BAM
9 // index format (.bai) as well as a BamTools-specific (nonstandard) index
10 // format (.bti).
11 // ***************************************************************************
12
13 #include <BamIndex.h>
14 #include <BamReader.h>
15 #include <BGZF.h>
16 #include <BamStandardIndex_p.h>
17 #include <BamToolsIndex_p.h>
18 using namespace BamTools;
19 using namespace BamTools::Internal;
20
21 #include <cstdio>
22 #include <cstdlib>
23 #include <algorithm>
24 #include <iostream>
25 #include <map>
26 using namespace std;
27
28 // --------------------------------------------------
29 // BamIndex factory methods
30
31 // returns index based on BAM filename 'stub'
32 // checks first for preferred type, returns that type if found
33 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
34 //
35 // ** default preferred type is BamToolsIndex ** use this anytime it exists
36 BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename,
37 BamTools::BgzfData* bgzf,
38 BamTools::BamReader* reader,
39 const BamIndex::PreferredIndexType& type)
40 {
41 // ---------------------------------------------------
42 // attempt to load preferred type first
43
44 const std::string bamtoolsIndexFilename = bamFilename + ".bti";
45 const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename);
46 if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists )
47 return new BamToolsIndex(bgzf, reader);
48
49 const std::string standardIndexFilename = bamFilename + ".bai";
50 const bool standardIndexExists = BamTools::FileExists(standardIndexFilename);
51 if ( (type == BamIndex::STANDARD) && standardIndexExists )
52 return new BamStandardIndex(bgzf, reader);
53
54 // ----------------------------------------------------
55 // preferred type could not be found, try other (non-preferred) types
56 // if none found, return 0
57
58 if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader);
59 if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader);
60 return 0;
61 }
62
63 // returns index based on explicitly named index file (or 0 if not found)
64 BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename,
65 BamTools::BgzfData* bgzf,
66 BamTools::BamReader* reader)
67 {
68 // see if specified file exists
69 const bool indexExists = BamTools::FileExists(indexFilename);
70 if ( !indexExists ) return 0;
71
72 const std::string bamtoolsIndexExtension(".bti");
73 const std::string standardIndexExtension(".bai");
74
75 // if has bamtoolsIndexExtension
76 if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) )
77 return new BamToolsIndex(bgzf, reader);
78
79 // if has standardIndexExtension
80 if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) )
81 return new BamStandardIndex(bgzf, reader);
82
83 // otherwise, unsupported file type
84 return 0;
85 }
86
87 // -------------------------------
88 // BamIndex implementation
89
90 // ctor
91 BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader)
92 : m_BGZF(bgzf)
93 , m_reader(reader)
94 , m_cacheMode(BamIndex::LimitedIndexCaching)
95 , m_indexStream(0)
96 {
97 if ( m_reader && m_reader->IsOpen() )
98 m_references = m_reader->GetReferenceData();
99 }
100
101 // dtor
102 BamIndex::~BamIndex(void) {
103 if ( IsOpen() )
104 fclose(m_indexStream);
105 }
106
107 // return true if FILE* is open
108 bool BamIndex::IsOpen(void) const {
109 return ( m_indexStream != 0 );
110 }
111
112 // loads existing data from file into memory
113 bool BamIndex::Load(const string& filename) {
114
115 // open index file, abort on error
116 if ( !OpenIndexFile(filename, "rb") ) {
117 fprintf(stderr, "ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str());
118 return false;
119 }
120
121 // check magic number
122 if ( !LoadHeader() ) {
123 fclose(m_indexStream);
124 return false;
125 }
126
127 // load reference data (but only keep in memory if full caching requested)
128 bool saveInitialLoad = ( m_cacheMode == BamIndex::FullIndexCaching );
129 if ( !LoadAllReferences(saveInitialLoad) ) {
130 fclose(m_indexStream);
131 return false;
132 }
133
134 // update index cache based on selected mode
135 UpdateCache();
136
137 // return success
138 return true;
139 }
140
141 // opens index file for reading/writing, return true if opened OK
142 bool BamIndex::OpenIndexFile(const string& filename, const string& mode) {
143 m_indexStream = fopen(filename.c_str(), mode.c_str());
144 return ( m_indexStream != 0 );
145 }
146
147 // rewind index file to beginning of index data, return true if rewound OK
148 bool BamIndex::Rewind(void) {
149 return ( fseek64(m_indexStream, DataBeginOffset(), SEEK_SET) == 0 );
150 }
151
152 // change the index caching behavior
153 void BamIndex::SetCacheMode(const BamIndexCacheMode mode) {
154 if ( mode != m_cacheMode ) {
155 m_cacheMode = mode;
156 UpdateCache();
157 }
158 }
159
160 // updates in-memory cache of index data, depending on current cache mode
161 void BamIndex::UpdateCache(void) {
162
163 // skip if file not open
164 if ( !IsOpen() ) return;
165
166 // reflect requested cache mode behavior
167 switch ( m_cacheMode ) {
168
169 case (BamIndex::FullIndexCaching) :
170 Rewind();
171 LoadAllReferences(true);
172 break;
173
174 case (BamIndex::LimitedIndexCaching) :
175 if ( HasFullDataCache() )
176 KeepOnlyFirstReferenceOffsets();
177 else {
178 ClearAllData();
179 SkipToFirstReference();
180 LoadFirstReference(true);
181 }
182 break;
183 case(BamIndex::NoIndexCaching) :
184 ClearAllData();
185 break;
186 default :
187 // unreachable
188 ;
189 }
190 }
191
192 // writes in-memory index data out to file
193 bool BamIndex::Write(const string& bamFilename) {
194
195 // open index file for writing
196 string indexFilename = bamFilename + Extension();
197 if ( !OpenIndexFile(indexFilename, "wb") ) {
198 fprintf(stderr, "ERROR: Could not open file to save index.\n");
199 return false;
200 }
201
202 // write index header data
203 if ( !WriteHeader() ) {
204 fprintf(stderr, "ERROR: There was a problem writing index metadata to new index file.\n");
205 fflush(m_indexStream);
206 fclose(m_indexStream);
207 exit(1);
208 }
209
210 // write main index data
211 if ( !WriteAllReferences() ) {
212 fprintf(stderr, "ERROR: There was a problem writing index data to new index file.\n");
213 fflush(m_indexStream);
214 fclose(m_indexStream);
215 exit(1);
216 }
217
218 // flush any remaining output, rewind file, and return success
219 fflush(m_indexStream);
220 fclose(m_indexStream);
221
222 // re-open index file for later reading
223 if ( !OpenIndexFile(indexFilename, "rb") ) {
224 fprintf(stderr, "ERROR: Could not open newly created index file for reading.\n");
225 return false;
226 }
227
228 // return success/failure of write
229 return true;
230 }