Mercurial > repos > zzhou > spp_phantompeak
diff spp/src/BGZF.h @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spp/src/BGZF.h Tue Nov 27 16:11:40 2012 -0500 @@ -0,0 +1,322 @@ +// *************************************************************************** +// BGZF.h (c) 2009 Derek Barnett, Michael Str�mberg +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 November 2010 (DB) +// --------------------------------------------------------------------------- +// BGZF routines were adapted from the bgzf.c code developed at the Broad +// Institute. +// --------------------------------------------------------------------------- +// Provides the basic functionality for reading & writing BGZF files +// *************************************************************************** + +#ifndef BGZF_H +#define BGZF_H + +#include <api_global.h> +#include <zlib.h> + +#include <cstdio> +#include <cstdlib> +#include <cstring> +#include <string> + +// Platform-specific large-file support +#ifndef BAMTOOLS_LFS +#define BAMTOOLS_LFS + #ifdef WIN32 + #define ftell64(a) _ftelli64(a) + #define fseek64(a,b,c) _fseeki64(a,b,c) + #else + #define ftell64(a) ftello(a) + #define fseek64(a,b,c) fseeko(a,b,c) + #endif +#endif // BAMTOOLS_LFS + +// Platform-specific type definitions +#ifndef BAMTOOLS_TYPES +#define BAMTOOLS_TYPES + #ifdef _MSC_VER + typedef char int8_t; + typedef unsigned char uint8_t; + typedef short int16_t; + typedef unsigned short uint16_t; + typedef int int32_t; + typedef unsigned int uint32_t; + typedef long long int64_t; + typedef unsigned long long uint64_t; + #else + #include <stdint.h> + #endif +#endif // BAMTOOLS_TYPES + +namespace BamTools { + +// zlib constants +const int GZIP_ID1 = 31; +const int GZIP_ID2 = 139; +const int CM_DEFLATE = 8; +const int FLG_FEXTRA = 4; +const int OS_UNKNOWN = 255; +const int BGZF_XLEN = 6; +const int BGZF_ID1 = 66; +const int BGZF_ID2 = 67; +const int BGZF_LEN = 2; +const int GZIP_WINDOW_BITS = -15; +const int Z_DEFAULT_MEM_LEVEL = 8; + +// BZGF constants +const int BLOCK_HEADER_LENGTH = 18; +const int BLOCK_FOOTER_LENGTH = 8; +const int MAX_BLOCK_SIZE = 65536; +const int DEFAULT_BLOCK_SIZE = 65536; + +struct API_EXPORT BgzfData { + + // data members + public: + unsigned int UncompressedBlockSize; + unsigned int CompressedBlockSize; + unsigned int BlockLength; + unsigned int BlockOffset; + uint64_t BlockAddress; + bool IsOpen; + bool IsWriteOnly; + bool IsWriteUncompressed; + FILE* Stream; + char* UncompressedBlock; + char* CompressedBlock; + + // constructor & destructor + public: + BgzfData(void); + ~BgzfData(void); + + // main interface methods + public: + // closes BGZF file + void Close(void); + // opens the BGZF file (mode is either "rb" for reading, or "wb" for writing) + bool Open(const std::string& filename, const char* mode, bool isWriteUncompressed = false); + // reads BGZF data into a byte buffer + int Read(char* data, const unsigned int dataLength); + // seek to position in BGZF file + bool Seek(int64_t position); + // get file position in BGZF file + int64_t Tell(void); + // writes the supplied data into the BGZF buffer + unsigned int Write(const char* data, const unsigned int dataLen); + + // internal methods + private: + // compresses the current block + int DeflateBlock(void); + // flushes the data in the BGZF block + void FlushBlock(void); + // de-compresses the current block + int InflateBlock(const int& blockLength); + // reads a BGZF block + bool ReadBlock(void); + + // static 'utility' methods + public: + // checks BGZF block header + static inline bool CheckBlockHeader(char* header); + // packs an unsigned integer into the specified buffer + static inline void PackUnsignedInt(char* buffer, unsigned int value); + // packs an unsigned short into the specified buffer + static inline void PackUnsignedShort(char* buffer, unsigned short value); + // unpacks a buffer into a double + static inline double UnpackDouble(char* buffer); + static inline double UnpackDouble(const char* buffer); + // unpacks a buffer into a float + static inline float UnpackFloat(char* buffer); + static inline float UnpackFloat(const char* buffer); + // unpacks a buffer into a signed int + static inline signed int UnpackSignedInt(char* buffer); + static inline signed int UnpackSignedInt(const char* buffer); + // unpacks a buffer into a signed short + static inline signed short UnpackSignedShort(char* buffer); + static inline signed short UnpackSignedShort(const char* buffer); + // unpacks a buffer into an unsigned int + static inline unsigned int UnpackUnsignedInt(char* buffer); + static inline unsigned int UnpackUnsignedInt(const char* buffer); + // unpacks a buffer into an unsigned short + static inline unsigned short UnpackUnsignedShort(char* buffer); + static inline unsigned short UnpackUnsignedShort(const char* buffer); +}; + +// ------------------------------------------------------------- +// static 'utility' method implementations + +// checks BGZF block header +inline +bool BgzfData::CheckBlockHeader(char* header) { + return (header[0] == GZIP_ID1 && + header[1] == (char)GZIP_ID2 && + header[2] == Z_DEFLATED && + (header[3] & FLG_FEXTRA) != 0 && + BgzfData::UnpackUnsignedShort(&header[10]) == BGZF_XLEN && + header[12] == BGZF_ID1 && + header[13] == BGZF_ID2 && + BgzfData::UnpackUnsignedShort(&header[14]) == BGZF_LEN ); +} + +// 'packs' an unsigned integer into the specified buffer +inline +void BgzfData::PackUnsignedInt(char* buffer, unsigned int value) { + buffer[0] = (char)value; + buffer[1] = (char)(value >> 8); + buffer[2] = (char)(value >> 16); + buffer[3] = (char)(value >> 24); +} + +// 'packs' an unsigned short into the specified buffer +inline +void BgzfData::PackUnsignedShort(char* buffer, unsigned short value) { + buffer[0] = (char)value; + buffer[1] = (char)(value >> 8); +} + +// 'unpacks' a buffer into a double (includes both non-const & const char* flavors) +inline +double BgzfData::UnpackDouble(char* buffer) { + union { double value; unsigned char valueBuffer[sizeof(double)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + un.valueBuffer[4] = buffer[4]; + un.valueBuffer[5] = buffer[5]; + un.valueBuffer[6] = buffer[6]; + un.valueBuffer[7] = buffer[7]; + return un.value; +} + +inline +double BgzfData::UnpackDouble(const char* buffer) { + union { double value; unsigned char valueBuffer[sizeof(double)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + un.valueBuffer[4] = buffer[4]; + un.valueBuffer[5] = buffer[5]; + un.valueBuffer[6] = buffer[6]; + un.valueBuffer[7] = buffer[7]; + return un.value; +} + +// 'unpacks' a buffer into a float (includes both non-const & const char* flavors) +inline +float BgzfData::UnpackFloat(char* buffer) { + union { float value; unsigned char valueBuffer[sizeof(float)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; +} + +inline +float BgzfData::UnpackFloat(const char* buffer) { + union { float value; unsigned char valueBuffer[sizeof(float)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; +} + +// 'unpacks' a buffer into a signed int (includes both non-const & const char* flavors) +inline +signed int BgzfData::UnpackSignedInt(char* buffer) { + union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; +} + +inline +signed int BgzfData::UnpackSignedInt(const char* buffer) { + union { signed int value; unsigned char valueBuffer[sizeof(signed int)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; +} + +// 'unpacks' a buffer into a signed short (includes both non-const & const char* flavors) +inline +signed short BgzfData::UnpackSignedShort(char* buffer) { + union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + return un.value; +} + +inline +signed short BgzfData::UnpackSignedShort(const char* buffer) { + union { signed short value; unsigned char valueBuffer[sizeof(signed short)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + return un.value; +} + +// 'unpacks' a buffer into an unsigned int (includes both non-const & const char* flavors) +inline +unsigned int BgzfData::UnpackUnsignedInt(char* buffer) { + union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; +} + +inline +unsigned int BgzfData::UnpackUnsignedInt(const char* buffer) { + union { unsigned int value; unsigned char valueBuffer[sizeof(unsigned int)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + un.valueBuffer[2] = buffer[2]; + un.valueBuffer[3] = buffer[3]; + return un.value; +} + +// 'unpacks' a buffer into an unsigned short (includes both non-const & const char* flavors) +inline +unsigned short BgzfData::UnpackUnsignedShort(char* buffer) { + union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + return un.value; +} + +inline +unsigned short BgzfData::UnpackUnsignedShort(const char* buffer) { + union { unsigned short value; unsigned char valueBuffer[sizeof(unsigned short)]; } un; + un.value = 0; + un.valueBuffer[0] = buffer[0]; + un.valueBuffer[1] = buffer[1]; + return un.value; +} + +} // namespace BamTools + +#endif // BGZF_H