annotate spp/src/BamIndex.h @ 15:e689b83b0257 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:15:21 -0500
parents ce08b0efa3fd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
1 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
2 // BamIndex.h (c) 2009 Derek Barnett
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
3 // Marth Lab, Department of Biology, Boston College
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
4 // All rights reserved.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
5 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
6 // Last modified: 19 November 2010 (DB)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
7 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
8 // Provides basic BAM index interface
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
9 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
10
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
11 #ifndef BAM_INDEX_H
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
12 #define BAM_INDEX_H
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
13
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
14 #include <api_global.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
15 #include <BamAux.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
16 #include <iostream>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
17 #include <string>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
18 #include <vector>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
19
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
20 namespace BamTools {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
21
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
22 class BamReader;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
23 class BgzfData;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
24
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
25 namespace Internal {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
26 class BamStandardIndex;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
27 class BamToolsIndex;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
28 } // namespace Internal
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
29
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
30 // --------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
31 // BamIndex base class
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
32 class API_EXPORT BamIndex {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
33
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
34 // specify index-caching behavior
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
35 //
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
36 // @FullIndexCaching - store entire index file contents in memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
37 // @LimitedIndexCaching - store only index data for current reference
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
38 // being processed
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
39 // @NoIndexCaching - do not store any index data. Load as needed to
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
40 // calculate jump offset
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
41 public: enum BamIndexCacheMode { FullIndexCaching = 0
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
42 , LimitedIndexCaching
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
43 , NoIndexCaching
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
44 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
45
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
46 // ctor & dtor
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
47 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
48 BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
49 virtual ~BamIndex(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
50
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
51 // index interface
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
52 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
53 // creates index data (in-memory) from current reader data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
54 virtual bool Build(void) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
55 // returns supported file extension
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
56 virtual const std::string Extension(void) const =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
57 // returns whether reference has alignments or no
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
58 virtual bool HasAlignments(const int& referenceID) const =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
59 // attempts to use index to jump to region; returns success/fail
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
60 // a "successful" jump indicates no error, but not whether this region has data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
61 // * thus, the method sets a flag to indicate whether there are alignments
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
62 // available after the jump position
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
63 virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
64 // loads existing data from file into memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
65 virtual bool Load(const std::string& filename);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
66 // change the index caching behavior
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
67 virtual void SetCacheMode(const BamIndexCacheMode mode);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
68 // writes in-memory index data out to file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
69 // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
70 virtual bool Write(const std::string& bamFilename);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
71
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
72 // derived-classes MUST provide implementation
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
73 protected:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
74 // clear all current index offset data in memory
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
75 virtual void ClearAllData(void) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
76 // return file position after header metadata
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
77 virtual const off_t DataBeginOffset(void) const =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
78 // return true if all index data is cached
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
79 virtual bool HasFullDataCache(void) const =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
80 // clears index data from all references except the first
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
81 virtual void KeepOnlyFirstReferenceOffsets(void) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
82 // load index data for all references, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
83 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
84 virtual bool LoadAllReferences(bool saveData = true) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
85 // load first reference from file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
86 // @saveData - save data in memory if true, just read & discard if false
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
87 virtual bool LoadFirstReference(bool saveData = true) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
88 // load header data from index file, return true if loaded OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
89 virtual bool LoadHeader(void) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
90 // position file pointer to first reference begin, return true if skipped OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
91 virtual bool SkipToFirstReference(void) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
92 // write index reference data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
93 virtual bool WriteAllReferences(void) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
94 // write index header data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
95 virtual bool WriteHeader(void) =0;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
96
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
97 // internal methods
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
98 protected:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
99 // rewind index file to beginning of index data, return true if rewound OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
100 bool Rewind(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
101
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
102 private:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
103 // return true if FILE* is open
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
104 bool IsOpen(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
105 // opens index file according to requested mode, return true if opened OK
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
106 bool OpenIndexFile(const std::string& filename, const std::string& mode);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
107 // updates in-memory cache of index data, depending on current cache mode
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
108 void UpdateCache(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
109
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
110 // factory methods for returning proper BamIndex-derived type based on available index files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
111 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
112
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
113 // returns index based on BAM filename 'stub'
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
114 // checks first for preferred type, returns that type if found
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
115 // (if not found, attmempts to load other type(s), returns 0 if NONE found)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
116 //
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
117 // ** default preferred type is BamToolsIndex ** use this anytime it exists
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
118 enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
119 static BamIndex* FromBamFilename(const std::string& bamFilename,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
120 BamTools::BgzfData* bgzf,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
121 BamTools::BamReader* reader,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
122 const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
123
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
124 // returns index based on explicitly named index file (or 0 if not found)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
125 static BamIndex* FromIndexFilename(const std::string& indexFilename,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
126 BamTools::BgzfData* bgzf,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
127 BamTools::BamReader* reader);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
128
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
129 // data members
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
130 protected:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
131 BamTools::BgzfData* m_BGZF;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
132 BamTools::BamReader* m_reader;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
133 BamTools::RefVector m_references;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
134 BamIndex::BamIndexCacheMode m_cacheMode;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
135 FILE* m_indexStream;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
136
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
137
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
138 // friends
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
139 friend class Internal::BamStandardIndex;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
140 friend class Internal::BamToolsIndex;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
141 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
142
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
143 } // namespace BamTools
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
144
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
145 #endif // BAM_INDEX_H