6
|
1 // ***************************************************************************
|
|
2 // BamToolsIndex.h (c) 2010 Derek Barnett
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 19 November 2010 (DB)
|
|
7 // ---------------------------------------------------------------------------
|
|
8 // Provides index operations for the BamTools index format (".bti")
|
|
9 // ***************************************************************************
|
|
10
|
|
11 #ifndef BAMTOOLS_INDEX_FORMAT_H
|
|
12 #define BAMTOOLS_INDEX_FORMAT_H
|
|
13
|
|
14 // -------------
|
|
15 // W A R N I N G
|
|
16 // -------------
|
|
17 //
|
|
18 // This file is not part of the BamTools API. It exists purely as an
|
|
19 // implementation detail. This header file may change from version to
|
|
20 // version without notice, or even be removed.
|
|
21 //
|
|
22 // We mean it.
|
|
23
|
|
24 #include <BamAux.h>
|
|
25 #include <BamIndex.h>
|
|
26 #include <map>
|
|
27 #include <string>
|
|
28 #include <vector>
|
|
29
|
|
30 namespace BamTools {
|
|
31
|
|
32 namespace Internal {
|
|
33
|
|
34 // individual index offset entry
|
|
35 struct BamToolsIndexEntry {
|
|
36
|
|
37 // data members
|
|
38 int32_t MaxEndPosition;
|
|
39 int64_t StartOffset;
|
|
40 int32_t StartPosition;
|
|
41
|
|
42 // ctor
|
|
43 BamToolsIndexEntry(const int32_t& maxEndPosition = 0,
|
|
44 const int64_t& startOffset = 0,
|
|
45 const int32_t& startPosition = 0)
|
|
46 : MaxEndPosition(maxEndPosition)
|
|
47 , StartOffset(startOffset)
|
|
48 , StartPosition(startPosition)
|
|
49 { }
|
|
50 };
|
|
51
|
|
52 // reference index entry
|
|
53 struct BamToolsReferenceEntry {
|
|
54
|
|
55 // data members
|
|
56 bool HasAlignments;
|
|
57 std::vector<BamToolsIndexEntry> Offsets;
|
|
58
|
|
59 // ctor
|
|
60 BamToolsReferenceEntry(void)
|
|
61 : HasAlignments(false)
|
|
62 { }
|
|
63 };
|
|
64
|
|
65 // the actual index data structure
|
|
66 typedef std::map<int, BamToolsReferenceEntry> BamToolsIndexData;
|
|
67
|
|
68 class BamToolsIndex : public BamIndex {
|
|
69
|
|
70 // keep a list of any supported versions here
|
|
71 // (might be useful later to handle any 'legacy' versions if the format changes)
|
|
72 // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
|
|
73 //
|
|
74 // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by:
|
|
75 //
|
|
76 // if ( indexVersion >= BTI_1_2 )
|
|
77 // do something new
|
|
78 // else
|
|
79 // do the old thing
|
|
80 enum Version { BTI_1_0 = 1
|
|
81 , BTI_1_1
|
|
82 , BTI_1_2
|
|
83 };
|
|
84
|
|
85
|
|
86 // ctor & dtor
|
|
87 public:
|
|
88 BamToolsIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
|
|
89 ~BamToolsIndex(void);
|
|
90
|
|
91 // interface (implements BamIndex virtual methods)
|
|
92 public:
|
|
93 // creates index data (in-memory) from current reader data
|
|
94 bool Build(void);
|
|
95 // returns supported file extension
|
|
96 const std::string Extension(void) const { return std::string(".bti"); }
|
|
97 // returns whether reference has alignments or no
|
|
98 bool HasAlignments(const int& referenceID) const;
|
|
99 // attempts to use index to jump to region; returns success/fail
|
|
100 // a "successful" jump indicates no error, but not whether this region has data
|
|
101 // * thus, the method sets a flag to indicate whether there are alignments
|
|
102 // available after the jump position
|
|
103 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
|
|
104 public:
|
|
105 // clear all current index offset data in memory
|
|
106 void ClearAllData(void);
|
|
107 // return file position after header metadata
|
|
108 const off_t DataBeginOffset(void) const;
|
|
109 // return true if all index data is cached
|
|
110 bool HasFullDataCache(void) const;
|
|
111 // clears index data from all references except the first
|
|
112 void KeepOnlyFirstReferenceOffsets(void);
|
|
113 // load index data for all references, return true if loaded OK
|
|
114 // @saveData - save data in memory if true, just read & discard if false
|
|
115 bool LoadAllReferences(bool saveData = true);
|
|
116 // load first reference from file, return true if loaded OK
|
|
117 // @saveData - save data in memory if true, just read & discard if false
|
|
118 bool LoadFirstReference(bool saveData = true);
|
|
119 // load header data from index file, return true if loaded OK
|
|
120 bool LoadHeader(void);
|
|
121 // position file pointer to first reference begin, return true if skipped OK
|
|
122 bool SkipToFirstReference(void);
|
|
123 // write index reference data
|
|
124 bool WriteAllReferences(void);
|
|
125 // write index header data
|
|
126 bool WriteHeader(void);
|
|
127
|
|
128 // 'internal' methods
|
|
129 public:
|
|
130
|
|
131 // -----------------------
|
|
132 // index file operations
|
|
133
|
|
134 // check index file magic number, return true if OK
|
|
135 bool CheckMagicNumber(void);
|
|
136 // check index file version, return true if OK
|
|
137 bool CheckVersion(void);
|
|
138 // return true if FILE* is open
|
|
139 bool IsOpen(void) const;
|
|
140 // load a single index entry from file, return true if loaded OK
|
|
141 // @saveData - save data in memory if true, just read & discard if false
|
|
142 bool LoadIndexEntry(const int& refId, bool saveData = true);
|
|
143 // load a single reference from file, return true if loaded OK
|
|
144 // @saveData - save data in memory if true, just read & discard if false
|
|
145 bool LoadReference(const int& refId, bool saveData = true);
|
|
146 // loads number of references, return true if loaded OK
|
|
147 bool LoadReferenceCount(int& numReferences);
|
|
148 // position file pointer to desired reference begin, return true if skipped OK
|
|
149 bool SkipToReference(const int& refId);
|
|
150 // write current reference index data to new index file
|
|
151 bool WriteReferenceEntry(const BamToolsReferenceEntry& refEntry);
|
|
152 // write current index offset entry to new index file
|
|
153 bool WriteIndexEntry(const BamToolsIndexEntry& entry);
|
|
154
|
|
155 // -----------------------
|
|
156 // index data operations
|
|
157
|
|
158 // clear all index offset data for desired reference
|
|
159 void ClearReferenceOffsets(const int& refId);
|
|
160 // calculate BAM file offset for desired region
|
|
161 // return true if no error (*NOT* equivalent to "has alignments or valid offset")
|
|
162 // check @hasAlignmentsInRegion to determine this status
|
|
163 // @region - target region
|
|
164 // @offset - resulting seek target
|
|
165 // @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status
|
|
166 bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
|
|
167 // returns true if index cache has data for desired reference
|
|
168 bool IsDataLoaded(const int& refId) const;
|
|
169 // clears index data from all references except the one specified
|
|
170 void KeepOnlyReferenceOffsets(const int& refId);
|
|
171 // saves an index offset entry in memory
|
|
172 void SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry);
|
|
173 // pre-allocates size for offset vector
|
|
174 void SetOffsetCount(const int& refId, const int& offsetCount);
|
|
175 // initializes index data structure to hold @count references
|
|
176 void SetReferenceCount(const int& count);
|
|
177
|
|
178 // data members
|
|
179 private:
|
|
180 int32_t m_blockSize;
|
|
181 BamToolsIndexData m_indexData;
|
|
182 off_t m_dataBeginOffset;
|
|
183 bool m_hasFullDataCache;
|
|
184 bool m_isBigEndian;
|
|
185 int32_t m_inputVersion; // Version is serialized as int
|
|
186 Version m_outputVersion;
|
|
187 };
|
|
188
|
|
189 } // namespace Internal
|
|
190 } // namespace BamTools
|
|
191
|
|
192 #endif // BAMTOOLS_INDEX_FORMAT_H
|