6
|
1 // ***************************************************************************
|
|
2 // BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg
|
|
3 // Marth Lab, Department of Biology, Boston College
|
|
4 // All rights reserved.
|
|
5 // ---------------------------------------------------------------------------
|
|
6 // Last modified: 19 November 2010 (DB)
|
|
7 // ---------------------------------------------------------------------------
|
|
8 // Provides the basic constants, data structures, utilities etc.
|
|
9 // used throughout the API for handling BAM files
|
|
10 // ***************************************************************************
|
|
11
|
|
12 #ifndef BAMAUX_H
|
|
13 #define BAMAUX_H
|
|
14
|
|
15 #include <api_global.h>
|
|
16
|
|
17 #include <fstream>
|
|
18 #include <iostream>
|
|
19 #include <string>
|
|
20 #include <vector>
|
|
21
|
|
22 // Platform-specific large-file support
|
|
23 #ifndef BAMTOOLS_LFS
|
|
24 #define BAMTOOLS_LFS
|
|
25 #ifdef WIN32
|
|
26 #define ftell64(a) _ftelli64(a)
|
|
27 #define fseek64(a,b,c) _fseeki64(a,b,c)
|
|
28 #else
|
|
29 #define ftell64(a) ftello(a)
|
|
30 #define fseek64(a,b,c) fseeko(a,b,c)
|
|
31 #endif
|
|
32 #endif // BAMTOOLS_LFS
|
|
33
|
|
34 // Platform-specific type definitions
|
|
35 #ifndef BAMTOOLS_TYPES
|
|
36 #define BAMTOOLS_TYPES
|
|
37 #ifdef _MSC_VER
|
|
38 typedef char int8_t;
|
|
39 typedef unsigned char uint8_t;
|
|
40 typedef short int16_t;
|
|
41 typedef unsigned short uint16_t;
|
|
42 typedef int int32_t;
|
|
43 typedef unsigned int uint32_t;
|
|
44 typedef long long int64_t;
|
|
45 typedef unsigned long long uint64_t;
|
|
46 #else
|
|
47 #include <stdint.h>
|
|
48 #endif
|
|
49 #endif // BAMTOOLS_TYPES
|
|
50
|
|
51 namespace BamTools {
|
|
52
|
|
53 // ----------------------------------------------------------------
|
|
54 // ----------------------------------------------------------------
|
|
55 // BAM constants
|
|
56
|
|
57 const int BAM_CMATCH = 0;
|
|
58 const int BAM_CINS = 1;
|
|
59 const int BAM_CDEL = 2;
|
|
60 const int BAM_CREF_SKIP = 3;
|
|
61 const int BAM_CSOFT_CLIP = 4;
|
|
62 const int BAM_CHARD_CLIP = 5;
|
|
63 const int BAM_CPAD = 6;
|
|
64 const int BAM_CIGAR_SHIFT = 4;
|
|
65 const int BAM_CIGAR_MASK = ((1 << BAM_CIGAR_SHIFT) - 1);
|
|
66 const int BAM_CORE_SIZE = 32;
|
|
67 const int BT_SIZEOF_INT = 4;
|
|
68
|
|
69 // ----------------------------------------------------------------
|
|
70 // ----------------------------------------------------------------
|
|
71 // Data structs & typedefs
|
|
72
|
|
73 // CIGAR operation data structure
|
|
74 struct API_EXPORT CigarOp {
|
|
75
|
|
76 // data members
|
|
77 char Type; // Operation type (MIDNSHP)
|
|
78 uint32_t Length; // Operation length (number of bases)
|
|
79
|
|
80 // constructor
|
|
81 CigarOp(const char type = '\0',
|
|
82 const uint32_t length = 0)
|
|
83 : Type(type)
|
|
84 , Length(length)
|
|
85 { }
|
|
86 };
|
|
87
|
|
88 // Reference data entry
|
|
89 struct API_EXPORT RefData {
|
|
90
|
|
91 // data members
|
|
92 std::string RefName; // Name of reference sequence
|
|
93 int32_t RefLength; // Length of reference sequence
|
|
94 bool RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence
|
|
95
|
|
96 // constructor
|
|
97 RefData(const int32_t& length = 0,
|
|
98 bool ok = false)
|
|
99 : RefLength(length)
|
|
100 , RefHasAlignments(ok)
|
|
101 { }
|
|
102 };
|
|
103 typedef std::vector<RefData> RefVector;
|
|
104
|
|
105 // General (sequential) genome region
|
|
106 struct API_EXPORT BamRegion {
|
|
107
|
|
108 // data members
|
|
109 int LeftRefID;
|
|
110 int LeftPosition;
|
|
111 int RightRefID;
|
|
112 int RightPosition;
|
|
113
|
|
114 // constructor
|
|
115 BamRegion(const int& leftID = -1,
|
|
116 const int& leftPos = -1,
|
|
117 const int& rightID = -1,
|
|
118 const int& rightPos = -1)
|
|
119 : LeftRefID(leftID)
|
|
120 , LeftPosition(leftPos)
|
|
121 , RightRefID(rightID)
|
|
122 , RightPosition(rightPos)
|
|
123 { }
|
|
124
|
|
125 // copy constructor
|
|
126 BamRegion(const BamRegion& other)
|
|
127 : LeftRefID(other.LeftRefID)
|
|
128 , LeftPosition(other.LeftPosition)
|
|
129 , RightRefID(other.RightRefID)
|
|
130 , RightPosition(other.RightPosition)
|
|
131 { }
|
|
132
|
|
133 // member functions
|
|
134 void clear(void) { LeftRefID = -1; LeftPosition = -1; RightRefID = -1; RightPosition = -1; }
|
|
135 bool isLeftBoundSpecified(void) const { return ( LeftRefID >= 0 && LeftPosition >= 0 ); }
|
|
136 bool isNull(void) const { return ( !isLeftBoundSpecified() && !isRightBoundSpecified() ); }
|
|
137 bool isRightBoundSpecified(void) const { return ( RightRefID >= 0 && RightPosition >= 0 ); }
|
|
138 };
|
|
139
|
|
140 // ----------------------------------------------------------------
|
|
141 // ----------------------------------------------------------------
|
|
142 // General utilities
|
|
143
|
|
144 // returns true if system is big endian
|
|
145 inline bool SystemIsBigEndian(void) {
|
|
146 const uint16_t one = 0x0001;
|
|
147 return ((*(char*) &one) == 0 );
|
|
148 }
|
|
149
|
|
150 // swaps endianness of 16-bit value 'in place'
|
|
151 inline void SwapEndian_16(int16_t& x) {
|
|
152 x = ((x >> 8) | (x << 8));
|
|
153 }
|
|
154
|
|
155 inline void SwapEndian_16(uint16_t& x) {
|
|
156 x = ((x >> 8) | (x << 8));
|
|
157 }
|
|
158
|
|
159 // swaps endianness of 32-bit value 'in-place'
|
|
160 inline void SwapEndian_32(int32_t& x) {
|
|
161 x = ( (x >> 24) |
|
|
162 ((x << 8) & 0x00FF0000) |
|
|
163 ((x >> 8) & 0x0000FF00) |
|
|
164 (x << 24)
|
|
165 );
|
|
166 }
|
|
167
|
|
168 inline void SwapEndian_32(uint32_t& x) {
|
|
169 x = ( (x >> 24) |
|
|
170 ((x << 8) & 0x00FF0000) |
|
|
171 ((x >> 8) & 0x0000FF00) |
|
|
172 (x << 24)
|
|
173 );
|
|
174 }
|
|
175
|
|
176 // swaps endianness of 64-bit value 'in-place'
|
|
177 inline void SwapEndian_64(int64_t& x) {
|
|
178 x = ( (x >> 56) |
|
|
179 ((x << 40) & 0x00FF000000000000ll) |
|
|
180 ((x << 24) & 0x0000FF0000000000ll) |
|
|
181 ((x << 8) & 0x000000FF00000000ll) |
|
|
182 ((x >> 8) & 0x00000000FF000000ll) |
|
|
183 ((x >> 24) & 0x0000000000FF0000ll) |
|
|
184 ((x >> 40) & 0x000000000000FF00ll) |
|
|
185 (x << 56)
|
|
186 );
|
|
187 }
|
|
188
|
|
189 inline void SwapEndian_64(uint64_t& x) {
|
|
190 x = ( (x >> 56) |
|
|
191 ((x << 40) & 0x00FF000000000000ll) |
|
|
192 ((x << 24) & 0x0000FF0000000000ll) |
|
|
193 ((x << 8) & 0x000000FF00000000ll) |
|
|
194 ((x >> 8) & 0x00000000FF000000ll) |
|
|
195 ((x >> 24) & 0x0000000000FF0000ll) |
|
|
196 ((x >> 40) & 0x000000000000FF00ll) |
|
|
197 (x << 56)
|
|
198 );
|
|
199 }
|
|
200
|
|
201 // swaps endianness of 'next 2 bytes' in a char buffer (in-place)
|
|
202 inline void SwapEndian_16p(char* data) {
|
|
203 uint16_t& value = (uint16_t&)*data;
|
|
204 SwapEndian_16(value);
|
|
205 }
|
|
206
|
|
207 // swaps endianness of 'next 4 bytes' in a char buffer (in-place)
|
|
208 inline void SwapEndian_32p(char* data) {
|
|
209 uint32_t& value = (uint32_t&)*data;
|
|
210 SwapEndian_32(value);
|
|
211 }
|
|
212
|
|
213 // swaps endianness of 'next 8 bytes' in a char buffer (in-place)
|
|
214 inline void SwapEndian_64p(char* data) {
|
|
215 uint64_t& value = (uint64_t&)*data;
|
|
216 SwapEndian_64(value);
|
|
217 }
|
|
218
|
|
219 // returns whether file exists (can be opened OK)
|
|
220 inline bool FileExists(const std::string& filename) {
|
|
221 std::ifstream f(filename.c_str(), std::ifstream::in);
|
|
222 return !f.fail();
|
|
223 }
|
|
224
|
|
225 } // namespace BamTools
|
|
226
|
|
227 #endif // BAMAUX_H
|