diff spp/src/BamAux.h @ 6:ce08b0efa3fd draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:11:40 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamAux.h	Tue Nov 27 16:11:40 2012 -0500
@@ -0,0 +1,227 @@
+// ***************************************************************************
+// BamAux.h (c) 2009 Derek Barnett, Michael Str�mberg
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides the basic constants, data structures, utilities etc. 
+// used throughout the API for handling BAM files
+// ***************************************************************************
+
+#ifndef BAMAUX_H
+#define BAMAUX_H
+
+#include <api_global.h>
+
+#include <fstream> 
+#include <iostream>
+#include <string>
+#include <vector>
+
+// Platform-specific large-file support
+#ifndef BAMTOOLS_LFS
+#define BAMTOOLS_LFS
+    #ifdef WIN32
+        #define ftell64(a)     _ftelli64(a)
+        #define fseek64(a,b,c) _fseeki64(a,b,c)
+    #else
+        #define ftell64(a)     ftello(a)
+        #define fseek64(a,b,c) fseeko(a,b,c)
+    #endif
+#endif // BAMTOOLS_LFS
+
+// Platform-specific type definitions
+#ifndef BAMTOOLS_TYPES
+#define BAMTOOLS_TYPES
+    #ifdef _MSC_VER
+        typedef char                 int8_t;
+        typedef unsigned char       uint8_t;
+        typedef short               int16_t;
+        typedef unsigned short     uint16_t;
+        typedef int                 int32_t;
+        typedef unsigned int       uint32_t;
+        typedef long long           int64_t;
+        typedef unsigned long long uint64_t;
+    #else
+        #include <stdint.h>
+    #endif
+#endif // BAMTOOLS_TYPES
+
+namespace BamTools {
+
+// ----------------------------------------------------------------
+// ----------------------------------------------------------------
+// BAM constants
+
+const int BAM_CMATCH      = 0;
+const int BAM_CINS        = 1;
+const int BAM_CDEL        = 2;
+const int BAM_CREF_SKIP   = 3;
+const int BAM_CSOFT_CLIP  = 4;
+const int BAM_CHARD_CLIP  = 5;
+const int BAM_CPAD        = 6;
+const int BAM_CIGAR_SHIFT = 4;
+const int BAM_CIGAR_MASK  = ((1 << BAM_CIGAR_SHIFT) - 1);
+const int BAM_CORE_SIZE   = 32;
+const int BT_SIZEOF_INT   = 4;
+
+// ----------------------------------------------------------------
+// ----------------------------------------------------------------
+// Data structs & typedefs
+
+// CIGAR operation data structure
+struct API_EXPORT CigarOp {
+  
+    // data members
+    char     Type;   // Operation type (MIDNSHP)
+    uint32_t Length; // Operation length (number of bases)
+    
+    // constructor
+    CigarOp(const char type = '\0', 
+            const uint32_t length = 0) 
+        : Type(type)
+        , Length(length) 
+    { }
+};
+
+// Reference data entry
+struct API_EXPORT RefData {
+   
+    // data members
+    std::string RefName;          // Name of reference sequence
+    int32_t     RefLength;        // Length of reference sequence
+    bool        RefHasAlignments; // True if BAM file contains alignments mapped to reference sequence
+    
+    // constructor
+    RefData(const int32_t& length = 0, 
+            bool ok = false)
+        : RefLength(length)
+        , RefHasAlignments(ok)
+    { }
+};
+typedef std::vector<RefData> RefVector;
+
+// General (sequential) genome region
+struct API_EXPORT BamRegion {
+  
+    // data members
+    int LeftRefID;
+    int LeftPosition;
+    int RightRefID;
+    int RightPosition;
+    
+    // constructor
+    BamRegion(const int& leftID   = -1, 
+              const int& leftPos  = -1,
+              const int& rightID  = -1,
+              const int& rightPos = -1)
+        : LeftRefID(leftID)
+        , LeftPosition(leftPos)
+        , RightRefID(rightID)
+        , RightPosition(rightPos)
+    { }
+    
+    // copy constructor
+    BamRegion(const BamRegion& other)
+	: LeftRefID(other.LeftRefID)
+	, LeftPosition(other.LeftPosition)
+	, RightRefID(other.RightRefID)
+	, RightPosition(other.RightPosition)
+    { }
+    
+    // member functions
+    void clear(void) { LeftRefID = -1; LeftPosition = -1; RightRefID = -1; RightPosition = -1; }
+    bool isLeftBoundSpecified(void) const { return ( LeftRefID >= 0 && LeftPosition >= 0 ); }
+    bool isNull(void) const { return ( !isLeftBoundSpecified() && !isRightBoundSpecified() ); }
+    bool isRightBoundSpecified(void) const { return ( RightRefID >= 0 && RightPosition >= 0 ); }
+};
+
+// ----------------------------------------------------------------
+// ----------------------------------------------------------------
+// General utilities 
+
+// returns true if system is big endian
+inline bool SystemIsBigEndian(void) {
+   const uint16_t one = 0x0001;
+   return ((*(char*) &one) == 0 );
+}
+
+// swaps endianness of 16-bit value 'in place'
+inline void SwapEndian_16(int16_t& x) {
+    x = ((x >> 8) | (x << 8));
+}
+
+inline void SwapEndian_16(uint16_t& x) {
+    x = ((x >> 8) | (x << 8));
+}
+
+// swaps endianness of 32-bit value 'in-place'
+inline void SwapEndian_32(int32_t& x) {
+    x = ( (x >> 24) | 
+         ((x << 8) & 0x00FF0000) | 
+         ((x >> 8) & 0x0000FF00) | 
+          (x << 24)
+        );
+}
+
+inline void SwapEndian_32(uint32_t& x) {
+    x = ( (x >> 24) | 
+         ((x << 8) & 0x00FF0000) | 
+         ((x >> 8) & 0x0000FF00) | 
+          (x << 24)
+        );
+}
+
+// swaps endianness of 64-bit value 'in-place'
+inline void SwapEndian_64(int64_t& x) {
+    x = ( (x >> 56) | 
+         ((x << 40) & 0x00FF000000000000ll) |
+         ((x << 24) & 0x0000FF0000000000ll) |
+         ((x << 8)  & 0x000000FF00000000ll) |
+         ((x >> 8)  & 0x00000000FF000000ll) |
+         ((x >> 24) & 0x0000000000FF0000ll) |
+         ((x >> 40) & 0x000000000000FF00ll) |
+          (x << 56)
+        );
+}
+
+inline void SwapEndian_64(uint64_t& x) {
+    x = ( (x >> 56) | 
+         ((x << 40) & 0x00FF000000000000ll) |
+         ((x << 24) & 0x0000FF0000000000ll) |
+         ((x << 8)  & 0x000000FF00000000ll) |
+         ((x >> 8)  & 0x00000000FF000000ll) |
+         ((x >> 24) & 0x0000000000FF0000ll) |
+         ((x >> 40) & 0x000000000000FF00ll) |
+          (x << 56)
+        );
+}
+
+// swaps endianness of 'next 2 bytes' in a char buffer (in-place)
+inline void SwapEndian_16p(char* data) {
+    uint16_t& value = (uint16_t&)*data; 
+    SwapEndian_16(value);
+}
+
+// swaps endianness of 'next 4 bytes' in a char buffer (in-place)
+inline void SwapEndian_32p(char* data) {
+    uint32_t& value = (uint32_t&)*data; 
+    SwapEndian_32(value);
+}
+
+// swaps endianness of 'next 8 bytes' in a char buffer (in-place)
+inline void SwapEndian_64p(char* data) {
+    uint64_t& value = (uint64_t&)*data; 
+    SwapEndian_64(value);
+}
+
+// returns whether file exists (can be opened OK)
+inline bool FileExists(const std::string& filename) {
+    std::ifstream f(filename.c_str(), std::ifstream::in);
+    return !f.fail();
+}
+
+} // namespace BamTools
+
+#endif // BAMAUX_H