Mercurial > repos > zzhou > spp_phantompeak
view spp/src/BamStandardIndex_p.h @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
line wrap: on
line source
// *************************************************************************** // BamStandardIndex.h (c) 2010 Derek Barnett // Marth Lab, Department of Biology, Boston College // All rights reserved. // --------------------------------------------------------------------------- // Last modified: 19 November 2010 (DB) // --------------------------------------------------------------------------- // Provides index operations for the standardized BAM index format (".bai") // *************************************************************************** #ifndef BAM_STANDARD_INDEX_FORMAT_H #define BAM_STANDARD_INDEX_FORMAT_H // ------------- // W A R N I N G // ------------- // // This file is not part of the BamTools API. It exists purely as an // implementation detail. This header file may change from version to // version without notice, or even be removed. // // We mean it. #include <BamAux.h> #include <BamIndex.h> #include <map> #include <string> #include <vector> namespace BamTools { class BamAlignment; namespace Internal { // BAM index constants const int MAX_BIN = 37450; // =(8^6-1)/7+1 const int BAM_LIDX_SHIFT = 14; // -------------------------------------------------- // BamStandardIndex data structures & typedefs struct Chunk { // data members uint64_t Start; uint64_t Stop; // constructor Chunk(const uint64_t& start = 0, const uint64_t& stop = 0) : Start(start) , Stop(stop) { } }; inline bool ChunkLessThan(const Chunk& lhs, const Chunk& rhs) { return lhs.Start < rhs.Start; } typedef std::vector<Chunk> ChunkVector; typedef std::map<uint32_t, ChunkVector> BamBinMap; typedef std::vector<uint64_t> LinearOffsetVector; struct ReferenceIndex { // data members BamBinMap Bins; LinearOffsetVector Offsets; bool HasAlignments; // constructor ReferenceIndex(const BamBinMap& binMap = BamBinMap(), const LinearOffsetVector& offsets = LinearOffsetVector(), const bool hasAlignments = false) : Bins(binMap) , Offsets(offsets) , HasAlignments(hasAlignments) { } }; typedef std::map<int32_t, ReferenceIndex> BamStandardIndexData; class BamStandardIndex : public BamIndex { // ctor & dtor public: BamStandardIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader); ~BamStandardIndex(void); // interface (implements BamIndex virtual methods) public: // creates index data (in-memory) from current reader data bool Build(void); // returns supported file extension const std::string Extension(void) const { return std::string(".bai"); } // returns whether reference has alignments or no bool HasAlignments(const int& referenceID) const; // attempts to use index to jump to region; returns success/fail // a "successful" jump indicates no error, but not whether this region has data // * thus, the method sets a flag to indicate whether there are alignments // available after the jump position bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion); public: // clear all current index offset data in memory void ClearAllData(void); // return file position after header metadata const off_t DataBeginOffset(void) const; // return true if all index data is cached bool HasFullDataCache(void) const; // clears index data from all references except the first void KeepOnlyFirstReferenceOffsets(void); // load index data for all references, return true if loaded OK // @saveData - save data in memory if true, just read & discard if false bool LoadAllReferences(bool saveData = true); // load first reference from file, return true if loaded OK // @saveData - save data in memory if true, just read & discard if false bool LoadFirstReference(bool saveData = true); // load header data from index file, return true if loaded OK bool LoadHeader(void); // position file pointer to first reference begin, return true if skipped OK bool SkipToFirstReference(void); // write index reference data bool WriteAllReferences(void); // write index header data bool WriteHeader(void); // 'internal' methods public: // ----------------------- // index file operations // check index file magic number, return true if OK bool CheckMagicNumber(void); // check index file version, return true if OK bool CheckVersion(void); // load a single index bin entry from file, return true if loaded OK // @saveData - save data in memory if true, just read & discard if false bool LoadBin(ReferenceIndex& refEntry, bool saveData = true); bool LoadBins(ReferenceIndex& refEntry, bool saveData = true); // load a single index bin entry from file, return true if loaded OK // @saveData - save data in memory if true, just read & discard if false bool LoadChunk(ChunkVector& chunks, bool saveData = true); bool LoadChunks(ChunkVector& chunks, bool saveData = true); // load a single index linear offset entry from file, return true if loaded OK // @saveData - save data in memory if true, just read & discard if false bool LoadLinearOffsets(ReferenceIndex& refEntry, bool saveData = true); // load a single reference from file, return true if loaded OK // @saveData - save data in memory if true, just read & discard if false bool LoadReference(const int& refId, bool saveData = true); // loads number of references, return true if loaded OK bool LoadReferenceCount(int& numReferences); // position file pointer to desired reference begin, return true if skipped OK bool SkipToReference(const int& refId); // write index data for bin to new index file bool WriteBin(const uint32_t& binId, const ChunkVector& chunks); // write index data for bins to new index file bool WriteBins(const BamBinMap& bins); // write index data for chunk entry to new index file bool WriteChunk(const Chunk& chunk); // write index data for chunk entry to new index file bool WriteChunks(const ChunkVector& chunks); // write index data for linear offsets entry to new index file bool WriteLinearOffsets(const LinearOffsetVector& offsets); // write index data single reference to new index file bool WriteReference(const ReferenceIndex& refEntry); // ----------------------- // index data operations // calculate bins that overlap region int BinsFromRegion(const BamRegion& region, const bool isRightBoundSpecified, uint16_t bins[MAX_BIN]); // clear all index offset data for desired reference void ClearReferenceOffsets(const int& refId); // calculates offset(s) for a given region bool GetOffsets(const BamRegion& region, const bool isRightBoundSpecified, std::vector<int64_t>& offsets, bool* hasAlignmentsInRegion); // returns true if index cache has data for desired reference bool IsDataLoaded(const int& refId) const; // clears index data from all references except the one specified void KeepOnlyReferenceOffsets(const int& refId); // simplifies index by merging 'chunks' void MergeChunks(void); // saves BAM bin entry for index void SaveBinEntry(BamBinMap& binMap, const uint32_t& saveBin, const uint64_t& saveOffset, const uint64_t& lastOffset); // saves linear offset entry for index void SaveLinearOffset(LinearOffsetVector& offsets, const BamAlignment& bAlignment, const uint64_t& lastOffset); // initializes index data structure to hold @count references void SetReferenceCount(const int& count); // data members private: BamStandardIndexData m_indexData; off_t m_dataBeginOffset; bool m_hasFullDataCache; bool m_isBigEndian; }; } // namespace Internal } // namespace BamTools #endif // BAM_STANDARD_INDEX_FORMAT_H