6
|
1 // ***************************************************************************
|
|
2 // BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 19 November 2010 (DB)
|
|
7 // ---------------------------------------------------------------------------
|
|
8 // Functionality for simultaneously reading multiple BAM files
|
|
9 // ***************************************************************************
|
|
10
|
|
11 #ifndef BAMMULTIREADER_H
|
|
12 #define BAMMULTIREADER_H
|
|
13
|
|
14 #include <api_global.h>
|
|
15 #include <BamReader.h>
|
|
16 #include <map>
|
|
17 #include <sstream>
|
|
18 #include <string>
|
|
19 #include <utility>
|
|
20
|
|
21 namespace BamTools {
|
|
22
|
|
23 // index mapping reference/position pairings to bamreaders and their alignments
|
|
24 typedef std::multimap<std::pair<int, int>, std::pair<BamReader*, BamAlignment*> > AlignmentIndex;
|
|
25
|
|
26 class API_EXPORT BamMultiReader {
|
|
27
|
|
28 // constructor / destructor
|
|
29 public:
|
|
30 BamMultiReader(void);
|
|
31 ~BamMultiReader(void);
|
|
32
|
|
33 // public interface
|
|
34 public:
|
|
35
|
|
36 // positioning
|
|
37 int CurrentRefID;
|
|
38 int CurrentLeft;
|
|
39
|
|
40 // region under analysis, specified using SetRegion
|
|
41 BamRegion Region;
|
|
42
|
|
43 // ----------------------
|
|
44 // BAM file operations
|
|
45 // ----------------------
|
|
46
|
|
47 // close BAM files
|
|
48 void Close(void);
|
|
49
|
|
50 // opens BAM files (and optional BAM index files, if provided)
|
|
51 // @openIndexes - triggers index opening, useful for suppressing
|
|
52 // error messages during merging of files in which we may not have
|
|
53 // indexes.
|
|
54 // @coreMode - setup our first alignments using GetNextAlignmentCore();
|
|
55 // also useful for merging
|
|
56 // @preferStandardIndex - look for standard BAM index ".bai" first. If false,
|
|
57 // will look for BamTools index ".bti".
|
|
58 bool Open(const std::vector<std::string>& filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = false);
|
|
59
|
|
60 // returns whether underlying BAM readers ALL have an index loaded
|
|
61 // this is useful to indicate whether Jump() or SetRegion() are possible
|
|
62 bool IsIndexLoaded(void) const;
|
|
63
|
|
64 // performs random-access jump to reference, position
|
|
65 bool Jump(int refID, int position = 0);
|
|
66
|
|
67 // sets the target region
|
|
68 bool SetRegion(const BamRegion& region);
|
|
69 bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above
|
|
70
|
|
71 // returns file pointers to beginning of alignments
|
|
72 bool Rewind(void);
|
|
73
|
|
74 // ----------------------
|
|
75 // access alignment data
|
|
76 // ----------------------
|
|
77 // updates the reference id marker to match the lower limit of our readers
|
|
78 void UpdateReferenceID(void);
|
|
79
|
|
80 // retrieves next available alignment (returns success/fail) from all files
|
|
81 bool GetNextAlignment(BamAlignment&);
|
|
82 // retrieves next available alignment (returns success/fail) from all files
|
|
83 // and populates the support data with information about the alignment
|
|
84 // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT
|
|
85 bool GetNextAlignmentCore(BamAlignment&);
|
|
86 // ... should this be private?
|
|
87 bool HasOpenReaders(void);
|
|
88
|
|
89 // ----------------------
|
|
90 // access auxiliary data
|
|
91 // ----------------------
|
|
92
|
|
93 // returns unified SAM header text for all files
|
|
94 const std::string GetHeaderText(void) const;
|
|
95 // returns number of reference sequences
|
|
96 const int GetReferenceCount(void) const;
|
|
97 // returns vector of reference objects
|
|
98 const BamTools::RefVector GetReferenceData(void) const;
|
|
99 // returns reference id (used for BamMultiReader::Jump()) for the given reference name
|
|
100 const int GetReferenceID(const std::string& refName) const;
|
|
101 // validates that we have a congruent set of BAM files that are aligned against the same reference sequences
|
|
102 void ValidateReaders() const;
|
|
103
|
|
104 // ----------------------
|
|
105 // BAM index operations
|
|
106 // ----------------------
|
|
107
|
|
108 // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai")
|
|
109 bool CreateIndexes(bool useStandardIndex = true);
|
|
110
|
|
111 // sets the index caching mode for the readers
|
|
112 void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode);
|
|
113
|
|
114 //const int GetReferenceID(const string& refName) const;
|
|
115
|
|
116 // utility
|
|
117 void PrintFilenames(void);
|
|
118 void DumpAlignmentIndex(void);
|
|
119 void UpdateAlignments(void); // updates our alignment cache
|
|
120
|
|
121 // private implementation
|
|
122 private:
|
|
123
|
|
124 // the set of readers and alignments which we operate on, maintained throughout the life of this class
|
|
125 std::vector<std::pair<BamReader*, BamAlignment*> > readers;
|
|
126
|
|
127 // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment
|
|
128 // when a reader reaches EOF, its entry is removed from this index
|
|
129 AlignmentIndex alignments;
|
|
130
|
|
131 std::vector<std::string> fileNames;
|
|
132 };
|
|
133
|
|
134 } // namespace BamTools
|
|
135
|
|
136 #endif // BAMMULTIREADER_H
|