annotate spp/src/BamMultiReader.h @ 15:e689b83b0257 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:15:21 -0500
parents ce08b0efa3fd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
1 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
2 // BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
3 // Marth Lab, Department of Biology, Boston College
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
4 // All rights reserved.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
5 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
6 // Last modified: 19 November 2010 (DB)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
7 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
8 // Functionality for simultaneously reading multiple BAM files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
9 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
10
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
11 #ifndef BAMMULTIREADER_H
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
12 #define BAMMULTIREADER_H
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
13
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
14 #include <api_global.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
15 #include <BamReader.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
16 #include <map>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
17 #include <sstream>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
18 #include <string>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
19 #include <utility>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
20
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
21 namespace BamTools {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
22
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
23 // index mapping reference/position pairings to bamreaders and their alignments
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
24 typedef std::multimap<std::pair<int, int>, std::pair<BamReader*, BamAlignment*> > AlignmentIndex;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
25
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
26 class API_EXPORT BamMultiReader {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
27
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
28 // constructor / destructor
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
29 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
30 BamMultiReader(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
31 ~BamMultiReader(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
32
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
33 // public interface
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
34 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
35
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
36 // positioning
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
37 int CurrentRefID;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
38 int CurrentLeft;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
39
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
40 // region under analysis, specified using SetRegion
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
41 BamRegion Region;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
42
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
43 // ----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
44 // BAM file operations
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
45 // ----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
46
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
47 // close BAM files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
48 void Close(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
49
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
50 // opens BAM files (and optional BAM index files, if provided)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
51 // @openIndexes - triggers index opening, useful for suppressing
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
52 // error messages during merging of files in which we may not have
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
53 // indexes.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
54 // @coreMode - setup our first alignments using GetNextAlignmentCore();
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
55 // also useful for merging
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
56 // @preferStandardIndex - look for standard BAM index ".bai" first. If false,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
57 // will look for BamTools index ".bti".
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
58 bool Open(const std::vector<std::string>& filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = false);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
59
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
60 // returns whether underlying BAM readers ALL have an index loaded
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
61 // this is useful to indicate whether Jump() or SetRegion() are possible
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
62 bool IsIndexLoaded(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
63
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
64 // performs random-access jump to reference, position
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
65 bool Jump(int refID, int position = 0);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
66
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
67 // sets the target region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
68 bool SetRegion(const BamRegion& region);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
69 bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
70
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
71 // returns file pointers to beginning of alignments
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
72 bool Rewind(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
73
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
74 // ----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
75 // access alignment data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
76 // ----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
77 // updates the reference id marker to match the lower limit of our readers
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
78 void UpdateReferenceID(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
79
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
80 // retrieves next available alignment (returns success/fail) from all files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
81 bool GetNextAlignment(BamAlignment&);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
82 // retrieves next available alignment (returns success/fail) from all files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
83 // and populates the support data with information about the alignment
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
84 // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
85 bool GetNextAlignmentCore(BamAlignment&);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
86 // ... should this be private?
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
87 bool HasOpenReaders(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
88
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
89 // ----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
90 // access auxiliary data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
91 // ----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
92
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
93 // returns unified SAM header text for all files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
94 const std::string GetHeaderText(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
95 // returns number of reference sequences
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
96 const int GetReferenceCount(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
97 // returns vector of reference objects
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
98 const BamTools::RefVector GetReferenceData(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
99 // returns reference id (used for BamMultiReader::Jump()) for the given reference name
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
100 const int GetReferenceID(const std::string& refName) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
101 // validates that we have a congruent set of BAM files that are aligned against the same reference sequences
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
102 void ValidateReaders() const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
103
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
104 // ----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
105 // BAM index operations
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
106 // ----------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
107
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
108 // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai")
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
109 bool CreateIndexes(bool useStandardIndex = true);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
110
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
111 // sets the index caching mode for the readers
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
112 void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
113
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
114 //const int GetReferenceID(const string& refName) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
115
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
116 // utility
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
117 void PrintFilenames(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
118 void DumpAlignmentIndex(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
119 void UpdateAlignments(void); // updates our alignment cache
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
120
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
121 // private implementation
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
122 private:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
123
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
124 // the set of readers and alignments which we operate on, maintained throughout the life of this class
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
125 std::vector<std::pair<BamReader*, BamAlignment*> > readers;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
126
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
127 // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
128 // when a reader reaches EOF, its entry is removed from this index
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
129 AlignmentIndex alignments;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
130
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
131 std::vector<std::string> fileNames;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
132 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
133
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
134 } // namespace BamTools
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
135
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
136 #endif // BAMMULTIREADER_H