diff spp/src/BamIndex.h @ 15:e689b83b0257 draft

Uploaded
author zzhou
date Tue, 27 Nov 2012 16:15:21 -0500
parents ce08b0efa3fd
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/spp/src/BamIndex.h	Tue Nov 27 16:15:21 2012 -0500
@@ -0,0 +1,145 @@
+// ***************************************************************************
+// BamIndex.h (c) 2009 Derek Barnett
+// Marth Lab, Department of Biology, Boston College
+// All rights reserved.
+// ---------------------------------------------------------------------------
+// Last modified: 19 November 2010 (DB)
+// ---------------------------------------------------------------------------
+// Provides basic BAM index interface
+// ***************************************************************************
+
+#ifndef BAM_INDEX_H
+#define BAM_INDEX_H
+
+#include <api_global.h>
+#include <BamAux.h>
+#include <iostream>
+#include <string>
+#include <vector>
+
+namespace BamTools {
+
+class BamReader;
+class BgzfData;
+
+namespace Internal {
+  class BamStandardIndex;
+  class BamToolsIndex;
+} // namespace Internal
+
+// --------------------------------------------------  
+// BamIndex base class
+class API_EXPORT BamIndex {
+
+    // specify index-caching behavior
+    //
+    // @FullIndexCaching - store entire index file contents in memory
+    // @LimitedIndexCaching - store only index data for current reference
+    //   being processed
+    // @NoIndexCaching - do not store any index data.  Load as needed to 
+    //   calculate jump offset
+    public: enum BamIndexCacheMode { FullIndexCaching = 0
+                                   , LimitedIndexCaching
+                                   , NoIndexCaching
+                                   };
+  
+    // ctor & dtor
+    public:
+        BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader);
+        virtual ~BamIndex(void);
+        
+    // index interface
+    public:
+        // creates index data (in-memory) from current reader data
+        virtual bool Build(void) =0;
+        // returns supported file extension
+        virtual const std::string Extension(void) const =0;
+        // returns whether reference has alignments or no
+        virtual bool HasAlignments(const int& referenceID) const =0;
+        // attempts to use index to jump to region; returns success/fail
+        // a "successful" jump indicates no error, but not whether this region has data
+        //   * thus, the method sets a flag to indicate whether there are alignments 
+        //     available after the jump position
+        virtual bool Jump(const BamTools::BamRegion& region, bool* hasAlignmentsInRegion) =0;
+        // loads existing data from file into memory
+        virtual bool Load(const std::string& filename);
+        // change the index caching behavior
+        virtual void SetCacheMode(const BamIndexCacheMode mode);
+        // writes in-memory index data out to file 
+        // N.B. - (this is the original BAM filename, method will modify it to use applicable extension)
+        virtual bool Write(const std::string& bamFilename);
+        
+    // derived-classes MUST provide implementation
+    protected:
+        // clear all current index offset data in memory
+        virtual void ClearAllData(void) =0;
+        // return file position after header metadata
+        virtual const off_t DataBeginOffset(void) const =0;
+        // return true if all index data is cached
+        virtual bool HasFullDataCache(void) const =0;
+        // clears index data from all references except the first
+        virtual void KeepOnlyFirstReferenceOffsets(void) =0;
+        // load index data for all references, return true if loaded OK
+        // @saveData - save data in memory if true, just read & discard if false
+        virtual bool LoadAllReferences(bool saveData = true) =0;
+        // load first reference from file, return true if loaded OK
+        // @saveData - save data in memory if true, just read & discard if false
+        virtual bool LoadFirstReference(bool saveData = true) =0;
+        // load header data from index file, return true if loaded OK
+        virtual bool LoadHeader(void) =0;
+        // position file pointer to first reference begin, return true if skipped OK
+        virtual bool SkipToFirstReference(void) =0;
+        // write index reference data
+        virtual bool WriteAllReferences(void) =0;
+        // write index header data
+        virtual bool WriteHeader(void) =0;
+
+    // internal methods
+    protected:
+        // rewind index file to beginning of index data, return true if rewound OK
+        bool Rewind(void);
+
+    private:
+        // return true if FILE* is open
+        bool IsOpen(void) const;
+        // opens index file according to requested mode, return true if opened OK
+        bool OpenIndexFile(const std::string& filename, const std::string& mode);
+        // updates in-memory cache of index data, depending on current cache mode
+        void UpdateCache(void);
+
+    // factory methods for returning proper BamIndex-derived type based on available index files
+    public:
+      
+        // returns index based on BAM filename 'stub'
+        // checks first for preferred type, returns that type if found
+        // (if not found, attmempts to load other type(s), returns 0 if NONE found)
+        //
+        // ** default preferred type is BamToolsIndex ** use this anytime it exists
+        enum PreferredIndexType { BAMTOOLS = 0, STANDARD };
+        static BamIndex* FromBamFilename(const std::string&   bamFilename,
+                                         BamTools::BgzfData*  bgzf,
+                                         BamTools::BamReader* reader, 
+                                         const BamIndex::PreferredIndexType& type = BamIndex::BAMTOOLS);
+        
+        // returns index based on explicitly named index file (or 0 if not found)
+        static BamIndex* FromIndexFilename(const std::string&   indexFilename,
+                                           BamTools::BgzfData*  bgzf,
+                                           BamTools::BamReader* reader);
+
+    // data members
+    protected:
+        BamTools::BgzfData*  m_BGZF;
+        BamTools::BamReader* m_reader;
+        BamTools::RefVector  m_references;
+        BamIndex::BamIndexCacheMode m_cacheMode;
+        FILE* m_indexStream;
+
+
+    // friends
+    friend class Internal::BamStandardIndex;
+    friend class Internal::BamToolsIndex;
+};
+
+} // namespace BamTools
+
+#endif // BAM_INDEX_H