annotate spp/src/BamToolsIndex_p.h @ 15:e689b83b0257 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:15:21 -0500
parents ce08b0efa3fd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
1 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
2 // BamToolsIndex.h (c) 2010 Derek Barnett
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
3 // Marth Lab, Department of Biology, Boston College
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
4 // All rights reserved.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
5 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
6 // Last modified: 19 November 2010 (DB)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
7 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
8 // Provides index operations for the BamTools index format (".bti")
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
9 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
10
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
11 #ifndef BAMTOOLS_INDEX_FORMAT_H
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
12 #define BAMTOOLS_INDEX_FORMAT_H
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
13
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
14 // -------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
15 // W A R N I N G
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
16 // -------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
17 //
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
18 // This file is not part of the BamTools API. It exists purely as an
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
19 // implementation detail. This header file may change from version to
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
20 // version without notice, or even be removed.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
21 //
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
22 // We mean it.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
23
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
24 #include <BamAux.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
25 #include <BamIndex.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
26 #include <map>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
27 #include <string>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
28 #include <vector>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
29
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
30 namespace BamTools {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
31
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
32 namespace Internal {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
33
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
34 // individual index offset entry
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
35 struct BamToolsIndexEntry {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
36
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
37 // data members
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
38 int32_t MaxEndPosition;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
39 int64_t StartOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
40 int32_t StartPosition;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
41
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
42 // ctor
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
43 BamToolsIndexEntry(const int32_t& maxEndPosition = 0,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
44 const int64_t& startOffset = 0,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
45 const int32_t& startPosition = 0)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
46 : MaxEndPosition(maxEndPosition)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
47 , StartOffset(startOffset)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
48 , StartPosition(startPosition)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
49 { }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
50 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
51
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
52 // reference index entry
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
53 struct BamToolsReferenceEntry {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
54
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
55 // data members
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
56 bool HasAlignments;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
57 std::vector<BamToolsIndexEntry> Offsets;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
58
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
59 // ctor
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
60 BamToolsReferenceEntry(void)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
61 : HasAlignments(false)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
62 { }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
63 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
64
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
65 // the actual index data structure
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
66 typedef std::map<int, BamToolsReferenceEntry> BamToolsIndexData;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
67
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
68 class BamToolsIndex : public BamIndex {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
69
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
70 // keep a list of any supported versions here
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
71 // (might be useful later to handle any 'legacy' versions if the format changes)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
72 // listed for example like: BTI_1_0 = 1, BTI_1_1 = 2, BTI_1_2 = 3, BTI_2_0 = 4, and so on
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
73 //
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
74 // so a change introduced in (hypothetical) BTI_1_2 would be handled from then on by:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
75 //
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
76 // if ( indexVersion >= BTI_1_2 )
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
77 // do something new
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
78 // else
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
79 // do the old thing
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
80 enum Version { BTI_1_0 = 1
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
81 , BTI_1_1
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
82 , BTI_1_2
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
83 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
84
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
85
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
86 // ctor & dtor
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
87 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
88 BamToolsIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
89 ~BamToolsIndex(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
90
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
91 // interface (implements BamIndex virtual methods)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
92 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
93 // creates index data (in-memory) from current reader data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
94 bool Build(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
95 // returns supported file extension
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
96 const std::string Extension(void) const { return std::string(".bti"); }
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
97 // returns whether reference has alignments or no
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
98 bool HasAlignments(const int& referenceID) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
99 // attempts to use index to jump to region; returns success/fail
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
100 // a "successful" jump indicates no error, but not whether this region has data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
101 // * thus, the method sets a flag to indicate whether there are alignments
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
102 // available after the jump position
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
103 bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
104 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
105 // clear all current index offset data in memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
106 void ClearAllData(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
107 // return file position after header metadata
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
108 const off_t DataBeginOffset(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
109 // return true if all index data is cached
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
110 bool HasFullDataCache(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
111 // clears index data from all references except the first
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
112 void KeepOnlyFirstReferenceOffsets(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
113 // load index data for all references, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
114 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
115 bool LoadAllReferences(bool saveData = true);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
116 // load first reference from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
117 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
118 bool LoadFirstReference(bool saveData = true);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
119 // load header data from index file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
120 bool LoadHeader(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
121 // position file pointer to first reference begin, return true if skipped OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
122 bool SkipToFirstReference(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
123 // write index reference data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
124 bool WriteAllReferences(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
125 // write index header data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
126 bool WriteHeader(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
127
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
128 // 'internal' methods
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
129 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
130
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
131 // -----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
132 // index file operations
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
133
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
134 // check index file magic number, return true if OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
135 bool CheckMagicNumber(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
136 // check index file version, return true if OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
137 bool CheckVersion(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
138 // return true if FILE* is open
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
139 bool IsOpen(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
140 // load a single index entry from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
141 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
142 bool LoadIndexEntry(const int& refId, bool saveData = true);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
143 // load a single reference from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
144 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
145 bool LoadReference(const int& refId, bool saveData = true);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
146 // loads number of references, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
147 bool LoadReferenceCount(int& numReferences);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
148 // position file pointer to desired reference begin, return true if skipped OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
149 bool SkipToReference(const int& refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
150 // write current reference index data to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
151 bool WriteReferenceEntry(const BamToolsReferenceEntry& refEntry);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
152 // write current index offset entry to new index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
153 bool WriteIndexEntry(const BamToolsIndexEntry& entry);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
154
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
155 // -----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
156 // index data operations
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
157
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
158 // clear all index offset data for desired reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
159 void ClearReferenceOffsets(const int& refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
160 // calculate BAM file offset for desired region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
161 // return true if no error (*NOT* equivalent to "has alignments or valid offset")
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
162 // check @hasAlignmentsInRegion to determine this status
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
163 // @region - target region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
164 // @offset - resulting seek target
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
165 // @hasAlignmentsInRegion - sometimes a file just lacks data in region, this flag indicates that status
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
166 bool GetOffset(const BamRegion& region, int64_t& offset, bool* hasAlignmentsInRegion);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
167 // returns true if index cache has data for desired reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
168 bool IsDataLoaded(const int& refId) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
169 // clears index data from all references except the one specified
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
170 void KeepOnlyReferenceOffsets(const int& refId);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
171 // saves an index offset entry in memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
172 void SaveOffsetEntry(const int& refId, const BamToolsIndexEntry& entry);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
173 // pre-allocates size for offset vector
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
174 void SetOffsetCount(const int& refId, const int& offsetCount);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
175 // initializes index data structure to hold @count references
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
176 void SetReferenceCount(const int& count);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
177
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
178 // data members
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
179 private:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
180 int32_t m_blockSize;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
181 BamToolsIndexData m_indexData;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
182 off_t m_dataBeginOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
183 bool m_hasFullDataCache;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
184 bool m_isBigEndian;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
185 int32_t m_inputVersion; // Version is serialized as int
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
186 Version m_outputVersion;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
187 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
188
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
189 } // namespace Internal
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
190 } // namespace BamTools
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
191
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
192 #endif // BAMTOOLS_INDEX_FORMAT_H