annotate spp/src/BamReader_p.h @ 15:e689b83b0257 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:15:21 -0500
parents ce08b0efa3fd
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
1 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
2 // BamReader_p.h (c) 2010 Derek Barnett
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
3 // Marth Lab, Department of Biology, Boston College
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
4 // All rights reserved.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
5 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
6 // Last modified: 19 November 2010 (DB)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
7 // ---------------------------------------------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
8 // Provides the basic functionality for reading BAM files
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
9 // ***************************************************************************
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
10
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
11 #ifndef BAMREADER_P_H
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
12 #define BAMREADER_P_H
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
13
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
14 // -------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
15 // W A R N I N G
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
16 // -------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
17 //
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
18 // This file is not part of the BamTools API. It exists purely as an
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
19 // implementation detail. This header file may change from version to version
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
20 // without notice, or even be removed.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
21 //
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
22 // We mean it.
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
23
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
24 #include <BamAlignment.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
25 #include <BamIndex.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
26 #include <BGZF.h>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
27 #include <string>
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
28
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
29 namespace BamTools {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
30
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
31 class BamReader;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
32
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
33 namespace Internal {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
34
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
35 class BamReaderPrivate {
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
36
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
37 // enums
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
38 public: enum RegionState { BEFORE_REGION = 0
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
39 , WITHIN_REGION
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
40 , AFTER_REGION
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
41 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
42
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
43 // ctor & dtor
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
44 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
45 BamReaderPrivate(BamReader* parent);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
46 ~BamReaderPrivate(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
47
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
48 // 'public' interface to BamReader
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
49 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
50
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
51 // file operations
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
52 void Close(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
53 bool Open(const std::string& filename,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
54 const std::string& indexFilename,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
55 const bool lookForIndex,
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
56 const bool preferStandardIndex);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
57 bool Rewind(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
58 bool SetRegion(const BamRegion& region);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
59
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
60 // access alignment data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
61 bool GetNextAlignment(BamAlignment& bAlignment);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
62 bool GetNextAlignmentCore(BamAlignment& bAlignment);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
63
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
64 // access auxiliary data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
65 const std::string GetHeaderText(void) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
66 int GetReferenceID(const std::string& refName) const;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
67
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
68 // index operations
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
69 bool CreateIndex(bool useStandardIndex);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
70 void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
71
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
72 // 'internal' methods
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
73 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
74
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
75 // ---------------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
76 // reading alignments and auxiliary data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
77
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
78 // adjusts requested region if necessary (depending on where data actually begins)
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
79 void AdjustRegion(BamRegion& region);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
80 // fills out character data for BamAlignment data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
81 bool BuildCharData(BamAlignment& bAlignment);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
82 // checks to see if alignment overlaps current region
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
83 RegionState IsOverlap(BamAlignment& bAlignment);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
84 // retrieves header text from BAM file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
85 void LoadHeaderData(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
86 // retrieves BAM alignment under file pointer
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
87 bool LoadNextAlignment(BamAlignment& bAlignment);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
88 // builds reference data structure from BAM file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
89 void LoadReferenceData(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
90 // mark references with 'HasAlignments' status
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
91 void MarkReferences(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
92
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
93 // ---------------------------------
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
94 // index file handling
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
95
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
96 // clear out inernal index data structure
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
97 void ClearIndex(void);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
98 // loads index from BAM index file
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
99 bool LoadIndex(const bool lookForIndex, const bool preferStandardIndex);
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
100
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
101 // data members
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
102 public:
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
103
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
104 // general file data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
105 BgzfData mBGZF;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
106 std::string HeaderText;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
107 BamIndex* Index;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
108 RefVector References;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
109 bool HasIndex;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
110 int64_t AlignmentsBeginOffset;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
111 std::string Filename;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
112 std::string IndexFilename;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
113
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
114 // Internal::BamHeader* m_header;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
115
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
116 // index caching mode
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
117 BamIndex::BamIndexCacheMode IndexCacheMode;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
118
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
119 // system data
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
120 bool IsBigEndian;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
121
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
122 // user-specified region values
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
123 BamRegion Region;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
124 bool HasAlignmentsInRegion;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
125
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
126 // parent BamReader
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
127 BamReader* Parent;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
128
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
129 // BAM character constants
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
130 const char* DNA_LOOKUP;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
131 const char* CIGAR_LOOKUP;
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
132 };
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
133
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
134 } // namespace Internal
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
135 } // namespace BamTools
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
136
ce08b0efa3fd Uploaded
zzhou
parents:
diff changeset
137 #endif // BAMREADER_P_H