6
|
1 // ***************************************************************************
|
|
2 // BamIndex.h (c) 2009 Derek Barnett
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 19 November 2010 (DB)
|
|
7 // ---------------------------------------------------------------------------
|
|
8 // Provides basic BAM index interface
|
|
9 // ***************************************************************************
|
|
10
|
|
11 #ifndef BAM_INDEX_H
|
|
12 #define BAM_INDEX_H
|
|
13
|
|
14 #include <api_global.h>
|
|
15 #include <BamAux.h>
|
|
16 #include <iostream>
|
|
17 #include <string>
|
|
18 #include <vector>
|
|
19
|
|
20 namespace BamTools {
|
|
21
|
|
22 class BamReader;
|
|
23 class BgzfData;
|
|
24
|
|
25 namespace Internal {
|
|
26 class BamStandardIndex;
|
|
27 class BamToolsIndex;
|
|
28 } // namespace Internal
|
|
29
|
|
30 // --------------------------------------------------
|
|
31 // BamIndex base class
|
|
32 class API_EXPORT BamIndex {
|
|
33
|
|
34 // specify index-caching behavior
|
|
35 //
|
|
36 // @FullIndexCaching - store entire index file contents in memory
|
|
37 // @LimitedIndexCaching - store only index data for current reference
|
|
38 // being processed
|
|
39 // @NoIndexCaching - do not store any index data. Load as needed to
|
|
40 // calculate jump offset
|
|
41 public: enum BamIndexCacheMode { FullIndexCaching = 0
|
|
42 , LimitedIndexCaching
|
|
43 , NoIndexCaching
|
|
44 };
|
|
45
|
|
46 // ctor & dtor
|
|
47 public:
|
|
48 BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
|
|
49 virtual ~BamIndex(void);
|
|
50
|
|
51 // index interface
|
|
52 public:
|
|
53 // creates index data (in-memory) from current reader data
|
|
54 virtual bool Build(void) =0;
|
|
55 // returns supported file extension
|
|
56 virtual const std::string Extension(void) const =0;
|
|
57 // returns whether reference has alignments or no
|
|
58 virtual bool HasAlignments(const int& referenceID) const =0;
|
|
59 // attempts to use index to jump to region; returns success/fail
|
|
60 // a "successful" jump indicates no error, but not whether this region has data
|
|
61 // * thus, the method sets a flag to indicate whether there are alignments
|
|
62 // available after the jump position
|
|
63 virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
|
|
64 // loads existing data from file into memory
|
|
65 virtual bool Load(const std::string& filename);
|
|
66 // change the index caching behavior
|
|
67 virtual void SetCacheMode(const BamIndexCacheMode mode);
|
|
68 // writes in-memory index data out to file
|
|
69 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
|
|
70 virtual bool Write(const std::string& bamFilename);
|
|
71
|
|
72 // derived-classes MUST provide implementation
|
|
73 protected:
|
|
74 // clear all current index offset data in memory
|
|
75 virtual void ClearAllData(void) =0;
|
|
76 // return file position after header metadata
|
|
77 virtual const off_t DataBeginOffset(void) const =0;
|
|
78 // return true if all index data is cached
|
|
79 virtual bool HasFullDataCache(void) const =0;
|
|
80 // clears index data from all references except the first
|
|
81 virtual void KeepOnlyFirstReferenceOffsets(void) =0;
|
|
82 // load index data for all references, return true if loaded OK
|
|
83 // @saveData - save data in memory if true, just read & discard if false
|
|
84 virtual bool LoadAllReferences(bool saveData = true) =0;
|
|
85 // load first reference from file, return true if loaded OK
|
|
86 // @saveData - save data in memory if true, just read & discard if false
|
|
87 virtual bool LoadFirstReference(bool saveData = true) =0;
|
|
88 // load header data from index file, return true if loaded OK
|
|
89 virtual bool LoadHeader(void) =0;
|
|
90 // position file pointer to first reference begin, return true if skipped OK
|
|
91 virtual bool SkipToFirstReference(void) =0;
|
|
92 // write index reference data
|
|
93 virtual bool WriteAllReferences(void) =0;
|
|
94 // write index header data
|
|
95 virtual bool WriteHeader(void) =0;
|
|
96
|
|
97 // internal methods
|
|
98 protected:
|
|
99 // rewind index file to beginning of index data, return true if rewound OK
|
|
100 bool Rewind(void);
|
|
101
|
|
102 private:
|
|
103 // return true if FILE* is open
|
|
104 bool IsOpen(void) const;
|
|
105 // opens index file according to requested mode, return true if opened OK
|
|
106 bool OpenIndexFile(const std::string& filename, const std::string& mode);
|
|
107 // updates in-memory cache of index data, depending on current cache mode
|
|
108 void UpdateCache(void);
|
|
109
|
|
110 // factory methods for returning proper BamIndex-derived type based on available index files
|
|
111 public:
|
|
112
|
|
113 // returns index based on BAM filename 'stub'
|
|
114 // checks first for preferred type, returns that type if found
|
|
115 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
|
|
116 //
|
|
117 // ** default preferred type is BamToolsIndex ** use this anytime it exists
|
|
118 enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
|
|
119 static BamIndex* FromBamFilename(const std::string& bamFilename,
|
|
120 BamTools::BgzfData* bgzf,
|
|
121 BamTools::BamReader* reader,
|
|
122 const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
|
|
123
|
|
124 // returns index based on explicitly named index file (or 0 if not found)
|
|
125 static BamIndex* FromIndexFilename(const std::string& indexFilename,
|
|
126 BamTools::BgzfData* bgzf,
|
|
127 BamTools::BamReader* reader);
|
|
128
|
|
129 // data members
|
|
130 protected:
|
|
131 BamTools::BgzfData* m_BGZF;
|
|
132 BamTools::BamReader* m_reader;
|
|
133 BamTools::RefVector m_references;
|
|
134 BamIndex::BamIndexCacheMode m_cacheMode;
|
|
135 FILE* m_indexStream;
|
|
136
|
|
137
|
|
138 // friends
|
|
139 friend class Internal::BamStandardIndex;
|
|
140 friend class Internal::BamToolsIndex;
|
|
141 };
|
|
142
|
|
143 } // namespace BamTools
|
|
144
|
|
145 #endif // BAM_INDEX_H
|