Mercurial > repos > zzhou > spp_phantompeak
diff spp/src/BamStandardIndex_p.h @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spp/src/BamStandardIndex_p.h Tue Nov 27 16:11:40 2012 -0500 @@ -0,0 +1,213 @@ +// *************************************************************************** +// BamStandardIndex.h (c) 2010 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 November 2010 (DB) +// --------------------------------------------------------------------------- +// Provides index operations for the standardized BAM index format (".bai") +// *************************************************************************** + +#ifndef BAM_STANDARD_INDEX_FORMAT_H +#define BAM_STANDARD_INDEX_FORMAT_H + +// ------------- +// W A R N I N G +// ------------- +// +// This file is not part of the BamTools API. It exists purely as an +// implementation detail. This header file may change from version to +// version without notice, or even be removed. +// +// We mean it. + +#include <BamAux.h> +#include <BamIndex.h> +#include <map> +#include <string> +#include <vector> + +namespace BamTools { + +class BamAlignment; + +namespace Internal { + +// BAM index constants +const int MAX_BIN = 37450; // =(8^6-1)/7+1 +const int BAM_LIDX_SHIFT = 14; + +// -------------------------------------------------- +// BamStandardIndex data structures & typedefs +struct Chunk { + + // data members + uint64_t Start; + uint64_t Stop; + + // constructor + Chunk(const uint64_t& start = 0, + const uint64_t& stop = 0) + : Start(start) + , Stop(stop) + { } +}; + +inline +bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) { + return lhs.Start < rhs.Start; +} + +typedef std::vector<Chunk> ChunkVector; +typedef std::map<uint32_t, ChunkVector> BamBinMap; +typedef std::vector<uint64_t> LinearOffsetVector; + +struct ReferenceIndex { + + // data members + BamBinMap Bins; + LinearOffsetVector Offsets; + bool HasAlignments; + + // constructor + ReferenceIndex(const BamBinMap& binMap = BamBinMap(), + const LinearOffsetVector& offsets = LinearOffsetVector(), + const bool hasAlignments = false) + : Bins(binMap) + , Offsets(offsets) + , HasAlignments(hasAlignments) + { } +}; + +typedef std::map<int32_t, ReferenceIndex> BamStandardIndexData; + +class BamStandardIndex : public BamIndex { + + // ctor & dtor + public: + BamStandardIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader); + ~BamStandardIndex(void); + + // interface (implements BamIndex virtual methods) + public: + // creates index data (in-memory) from current reader data + bool Build(void); + // returns supported file extension + const std::string Extension(void) const { return std::string(".bai"); } + // returns whether reference has alignments or no + bool HasAlignments(const int& referenceID) const; + // attempts to use index to jump to region; returns success/fail + // a "successful" jump indicates no error, but not whether this region has data + // * thus, the method sets a flag to indicate whether there are alignments + // available after the jump position + bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); + public: + // clear all current index offset data in memory + void ClearAllData(void); + // return file position after header metadata + const off_t DataBeginOffset(void) const; + // return true if all index data is cached + bool HasFullDataCache(void) const; + // clears index data from all references except the first + void KeepOnlyFirstReferenceOffsets(void); + // load index data for all references, return true if loaded OK + // @saveData - save data in memory if true, just read & discard if false + bool LoadAllReferences(bool saveData = true); + // load first reference from file, return true if loaded OK + // @saveData - save data in memory if true, just read & discard if false + bool LoadFirstReference(bool saveData = true); + // load header data from index file, return true if loaded OK + bool LoadHeader(void); + // position file pointer to first reference begin, return true if skipped OK + bool SkipToFirstReference(void); + // write index reference data + bool WriteAllReferences(void); + // write index header data + bool WriteHeader(void); + + // 'internal' methods + public: + + // ----------------------- + // index file operations + + // check index file magic number, return true if OK + bool CheckMagicNumber(void); + // check index file version, return true if OK + bool CheckVersion(void); + // load a single index bin entry from file, return true if loaded OK + // @saveData - save data in memory if true, just read & discard if false + bool LoadBin(ReferenceIndex& refEntry, bool saveData = true); + bool LoadBins(ReferenceIndex& refEntry, bool saveData = true); + // load a single index bin entry from file, return true if loaded OK + // @saveData - save data in memory if true, just read & discard if false + bool LoadChunk(ChunkVector& chunks, bool saveData = true); + bool LoadChunks(ChunkVector& chunks, bool saveData = true); + // load a single index linear offset entry from file, return true if loaded OK + // @saveData - save data in memory if true, just read & discard if false + bool LoadLinearOffsets(ReferenceIndex& refEntry, bool saveData = true); + // load a single reference from file, return true if loaded OK + // @saveData - save data in memory if true, just read & discard if false + bool LoadReference(const int& refId, bool saveData = true); + // loads number of references, return true if loaded OK + bool LoadReferenceCount(int& numReferences); + // position file pointer to desired reference begin, return true if skipped OK + bool SkipToReference(const int& refId); + // write index data for bin to new index file + bool WriteBin(const uint32_t& binId, const ChunkVector& chunks); + // write index data for bins to new index file + bool WriteBins(const BamBinMap& bins); + // write index data for chunk entry to new index file + bool WriteChunk(const Chunk& chunk); + // write index data for chunk entry to new index file + bool WriteChunks(const ChunkVector& chunks); + // write index data for linear offsets entry to new index file + bool WriteLinearOffsets(const LinearOffsetVector& offsets); + // write index data single reference to new index file + bool WriteReference(const ReferenceIndex& refEntry); + + // ----------------------- + // index data operations + + // calculate bins that overlap region + int BinsFromRegion(const BamRegion& region, + const bool isRightBoundSpecified, + uint16_t bins[MAX_BIN]); + // clear all index offset data for desired reference + void ClearReferenceOffsets(const int& refId); + // calculates offset(s) for a given region + bool GetOffsets(const BamRegion& region, + const bool isRightBoundSpecified, + std::vector<int64_t>& offsets, + bool* hasAlignmentsInRegion); + // returns true if index cache has data for desired reference + bool IsDataLoaded(const int& refId) const; + // clears index data from all references except the one specified + void KeepOnlyReferenceOffsets(const int& refId); + // simplifies index by merging 'chunks' + void MergeChunks(void); + // saves BAM bin entry for index + void SaveBinEntry(BamBinMap& binMap, + const uint32_t& saveBin, + const uint64_t& saveOffset, + const uint64_t& lastOffset); + // saves linear offset entry for index + void SaveLinearOffset(LinearOffsetVector& offsets, + const BamAlignment& bAlignment, + const uint64_t& lastOffset); + // initializes index data structure to hold @count references + void SetReferenceCount(const int& count); + + // data members + private: + + BamStandardIndexData m_indexData; + off_t m_dataBeginOffset; + bool m_hasFullDataCache; + bool m_isBigEndian; +}; + +} // namespace Internal +} // namespace BamTools + +#endif // BAM_STANDARD_INDEX_FORMAT_H