Mercurial > repos > zzhou > spp_phantompeak
comparison spp/src/BamToolsIndex_p.h @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:608a8e0eac56 | 6:ce08b0efa3fd |
---|---|
1 // *************************************************************************** | |
2 // BamToolsIndex.h (c) 2010 Derek Barnett | |
3 // Marth Lab, Department of Biology, Boston College | |
4 // All rights reserved. | |
5 // --------------------------------------------------------------------------- | |
6 // Last modified: 19 November 2010 (DB) | |
7 // --------------------------------------------------------------------------- | |
8 // Provides index operations for the BamTools index format (".bti") | |
9 // *************************************************************************** | |
10 | |
11 #ifndef BAMTOOLS_INDEX_FORMAT_H | |
12 #define BAMTOOLS_INDEX_FORMAT_H | |
13 | |
14 // ------------- | |
15 // W A R N I N G | |
16 // ------------- | |
17 // | |
18 // This file is not part of the BamTools API. It exists purely as an | |
19 // implementation detail. This header file may change from version to | |
20 // version without notice, or even be removed. | |
21 // | |
22 // We mean it. | |
23 | |
24 #include <BamAux.h> | |
25 #include <BamIndex.h> | |
26 #include <map> | |
27 #include <string> | |
28 #include <vector> | |
29 | |
30 namespace BamTools { | |
31 | |
32 namespace Internal { | |
33 | |
34 // individual index offset entry | |
35 struct BamToolsIndexEntry { | |
36 | |
37 // data members | |
38 int32_t MaxEndPosition; | |
39 int64_t StartOffset; | |
40 int32_t StartPosition; | |
41 | |
42 // ctor | |
43 BamToolsIndexEntry(const int32_t& maxEndPosition = 0, | |
44 const int64_t& startOffset = 0, | |
45 const int32_t& startPosition = 0) | |
46 : MaxEndPosition(maxEndPosition) | |
47 , StartOffset(startOffset) | |
48 , StartPosition(startPosition) | |
49 { } | |
50 }; | |
51 | |
52 // reference index entry | |
53 struct BamToolsReferenceEntry { | |
54 | |
55 // data members | |
56 bool HasAlignments; | |
57 std::vector<BamToolsIndexEntry> Offsets; | |
58 | |
59 // ctor | |
60 BamToolsReferenceEntry(void) | |
61 : HasAlignments(false) | |
62 { } | |
63 }; | |
64 | |
65 // the actual index data structure | |
66 typedef std::map<int, BamToolsReferenceEntry> BamToolsIndexData; | |
67 | |
68 class BamToolsIndex : public BamIndex { | |
69 | |
70 // keep a list of any supported versions here | |
71 // (might be useful later to handle any 'legacy' versions if the format changes) | |
72 // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on | |
73 // | |
74 // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by: | |
75 // | |
76 // if ( indexVersion >= BTI_1_2 ) | |
77 // do something new | |
78 // else | |
79 // do the old thing | |
80 enum Version { BTI_1_0 = 1 | |
81 , BTI_1_1 | |
82 , BTI_1_2 | |
83 }; | |
84 | |
85 | |
86 // ctor & dtor | |
87 public: | |
88 BamToolsIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader); | |
89 ~BamToolsIndex(void); | |
90 | |
91 // interface (implements BamIndex virtual methods) | |
92 public: | |
93 // creates index data (in-memory) from current reader data | |
94 bool Build(void); | |
95 // returns supported file extension | |
96 const std::string Extension(void) const { return std::string(".bti"); } | |
97 // returns whether reference has alignments or no | |
98 bool HasAlignments(const int& referenceID) const; | |
99 // attempts to use index to jump to region; returns success/fail | |
100 // a "successful" jump indicates no error, but not whether this region has data | |
101 // * thus, the method sets a flag to indicate whether there are alignments | |
102 // available after the jump position | |
103 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); | |
104 public: | |
105 // clear all current index offset data in memory | |
106 void ClearAllData(void); | |
107 // return file position after header metadata | |
108 const off_t DataBeginOffset(void) const; | |
109 // return true if all index data is cached | |
110 bool HasFullDataCache(void) const; | |
111 // clears index data from all references except the first | |
112 void KeepOnlyFirstReferenceOffsets(void); | |
113 // load index data for all references, return true if loaded OK | |
114 // @saveData - save data in memory if true, just read & discard if false | |
115 bool LoadAllReferences(bool saveData = true); | |
116 // load first reference from file, return true if loaded OK | |
117 // @saveData - save data in memory if true, just read & discard if false | |
118 bool LoadFirstReference(bool saveData = true); | |
119 // load header data from index file, return true if loaded OK | |
120 bool LoadHeader(void); | |
121 // position file pointer to first reference begin, return true if skipped OK | |
122 bool SkipToFirstReference(void); | |
123 // write index reference data | |
124 bool WriteAllReferences(void); | |
125 // write index header data | |
126 bool WriteHeader(void); | |
127 | |
128 // 'internal' methods | |
129 public: | |
130 | |
131 // ----------------------- | |
132 // index file operations | |
133 | |
134 // check index file magic number, return true if OK | |
135 bool CheckMagicNumber(void); | |
136 // check index file version, return true if OK | |
137 bool CheckVersion(void); | |
138 // return true if FILE* is open | |
139 bool IsOpen(void) const; | |
140 // load a single index entry from file, return true if loaded OK | |
141 // @saveData - save data in memory if true, just read & discard if false | |
142 bool LoadIndexEntry(const int& refId, bool saveData = true); | |
143 // load a single reference from file, return true if loaded OK | |
144 // @saveData - save data in memory if true, just read & discard if false | |
145 bool LoadReference(const int& refId, bool saveData = true); | |
146 // loads number of references, return true if loaded OK | |
147 bool LoadReferenceCount(int& numReferences); | |
148 // position file pointer to desired reference begin, return true if skipped OK | |
149 bool SkipToReference(const int& refId); | |
150 // write current reference index data to new index file | |
151 bool WriteReferenceEntry(const BamToolsReferenceEntry& refEntry); | |
152 // write current index offset entry to new index file | |
153 bool WriteIndexEntry(const BamToolsIndexEntry& entry); | |
154 | |
155 // ----------------------- | |
156 // index data operations | |
157 | |
158 // clear all index offset data for desired reference | |
159 void ClearReferenceOffsets(const int& refId); | |
160 // calculate BAM file offset for desired region | |
161 // return true if no error (*NOT* equivalent to "has alignments or valid offset") | |
162 // check @hasAlignmentsInRegion to determine this status | |
163 // @region - target region | |
164 // @offset - resulting seek target | |
165 // @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status | |
166 bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion); | |
167 // returns true if index cache has data for desired reference | |
168 bool IsDataLoaded(const int& refId) const; | |
169 // clears index data from all references except the one specified | |
170 void KeepOnlyReferenceOffsets(const int& refId); | |
171 // saves an index offset entry in memory | |
172 void SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry); | |
173 // pre-allocates size for offset vector | |
174 void SetOffsetCount(const int& refId, const int& offsetCount); | |
175 // initializes index data structure to hold @count references | |
176 void SetReferenceCount(const int& count); | |
177 | |
178 // data members | |
179 private: | |
180 int32_t m_blockSize; | |
181 BamToolsIndexData m_indexData; | |
182 off_t m_dataBeginOffset; | |
183 bool m_hasFullDataCache; | |
184 bool m_isBigEndian; | |
185 int32_t m_inputVersion; // Version is serialized as int | |
186 Version m_outputVersion; | |
187 }; | |
188 | |
189 } // namespace Internal | |
190 } // namespace BamTools | |
191 | |
192 #endif // BAMTOOLS_INDEX_FORMAT_H |