Mercurial > repos > zzhou > spp_phantompeak
diff spp/src/BamMultiReader.h @ 6:ce08b0efa3fd draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:11:40 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spp/src/BamMultiReader.h Tue Nov 27 16:11:40 2012 -0500 @@ -0,0 +1,136 @@ +// *************************************************************************** +// BamMultiReader.h (c) 2010 Erik Garrison, Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 19 November 2010 (DB) +// --------------------------------------------------------------------------- +// Functionality for simultaneously reading multiple BAM files +// *************************************************************************** + +#ifndef BAMMULTIREADER_H +#define BAMMULTIREADER_H + +#include <api_global.h> +#include <BamReader.h> +#include <map> +#include <sstream> +#include <string> +#include <utility> + +namespace BamTools { + +// index mapping reference/position pairings to bamreaders and their alignments +typedef std::multimap<std::pair<int, int>, std::pair<BamReader*, BamAlignment*> > AlignmentIndex; + +class API_EXPORT BamMultiReader { + + // constructor / destructor + public: + BamMultiReader(void); + ~BamMultiReader(void); + + // public interface + public: + + // positioning + int CurrentRefID; + int CurrentLeft; + + // region under analysis, specified using SetRegion + BamRegion Region; + + // ---------------------- + // BAM file operations + // ---------------------- + + // close BAM files + void Close(void); + + // opens BAM files (and optional BAM index files, if provided) + // @openIndexes - triggers index opening, useful for suppressing + // error messages during merging of files in which we may not have + // indexes. + // @coreMode - setup our first alignments using GetNextAlignmentCore(); + // also useful for merging + // @preferStandardIndex - look for standard BAM index ".bai" first. If false, + // will look for BamTools index ".bti". + bool Open(const std::vector<std::string>& filenames, bool openIndexes = true, bool coreMode = false, bool preferStandardIndex = false); + + // returns whether underlying BAM readers ALL have an index loaded + // this is useful to indicate whether Jump() or SetRegion() are possible + bool IsIndexLoaded(void) const; + + // performs random-access jump to reference, position + bool Jump(int refID, int position = 0); + + // sets the target region + bool SetRegion(const BamRegion& region); + bool SetRegion(const int&, const int&, const int&, const int&); // convenience function to above + + // returns file pointers to beginning of alignments + bool Rewind(void); + + // ---------------------- + // access alignment data + // ---------------------- + // updates the reference id marker to match the lower limit of our readers + void UpdateReferenceID(void); + + // retrieves next available alignment (returns success/fail) from all files + bool GetNextAlignment(BamAlignment&); + // retrieves next available alignment (returns success/fail) from all files + // and populates the support data with information about the alignment + // *** BUT DOES NOT PARSE CHARACTER DATA FROM THE ALIGNMENT + bool GetNextAlignmentCore(BamAlignment&); + // ... should this be private? + bool HasOpenReaders(void); + + // ---------------------- + // access auxiliary data + // ---------------------- + + // returns unified SAM header text for all files + const std::string GetHeaderText(void) const; + // returns number of reference sequences + const int GetReferenceCount(void) const; + // returns vector of reference objects + const BamTools::RefVector GetReferenceData(void) const; + // returns reference id (used for BamMultiReader::Jump()) for the given reference name + const int GetReferenceID(const std::string& refName) const; + // validates that we have a congruent set of BAM files that are aligned against the same reference sequences + void ValidateReaders() const; + + // ---------------------- + // BAM index operations + // ---------------------- + + // creates index for BAM files which lack them, saves to files (default = bamFilename + ".bai") + bool CreateIndexes(bool useStandardIndex = true); + + // sets the index caching mode for the readers + void SetIndexCacheMode(const BamIndex::BamIndexCacheMode mode); + + //const int GetReferenceID(const string& refName) const; + + // utility + void PrintFilenames(void); + void DumpAlignmentIndex(void); + void UpdateAlignments(void); // updates our alignment cache + + // private implementation + private: + + // the set of readers and alignments which we operate on, maintained throughout the life of this class + std::vector<std::pair<BamReader*, BamAlignment*> > readers; + + // readers and alignments sorted by reference id and position, to keep track of the lowest (next) alignment + // when a reader reaches EOF, its entry is removed from this index + AlignmentIndex alignments; + + std::vector<std::string> fileNames; +}; + +} // namespace BamTools + +#endif // BAMMULTIREADER_H