Mercurial > repos > zzhou > spp_phantompeak
diff spp/src/BamIndex.cpp @ 15:e689b83b0257 draft
Uploaded
author | zzhou |
---|---|
date | Tue, 27 Nov 2012 16:15:21 -0500 |
parents | ce08b0efa3fd |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/spp/src/BamIndex.cpp Tue Nov 27 16:15:21 2012 -0500 @@ -0,0 +1,230 @@ +// *************************************************************************** +// BamIndex.cpp (c) 2009 Derek Barnett +// Marth Lab, Department of Biology, Boston College +// All rights reserved. +// --------------------------------------------------------------------------- +// Last modified: 22 November 2010 (DB) +// --------------------------------------------------------------------------- +// Provides index functionality - both for the default (standardized) BAM +// index format (.bai) as well as a BamTools-specific (nonstandard) index +// format (.bti). +// *************************************************************************** + +#include <BamIndex.h> +#include <BamReader.h> +#include <BGZF.h> +#include <BamStandardIndex_p.h> +#include <BamToolsIndex_p.h> +using namespace BamTools; +using namespace BamTools::Internal; + +#include <cstdio> +#include <cstdlib> +#include <algorithm> +#include <iostream> +#include <map> +using namespace std; + +// -------------------------------------------------- +// BamIndex factory methods + +// returns index based on BAM filename 'stub' +// checks first for preferred type, returns that type if found +// (if not found, attmempts to load other type(s), returns 0 if NONE found) +// +// ** default preferred type is BamToolsIndex ** use this anytime it exists +BamIndex* BamIndex::FromBamFilename(const std::string& bamFilename, + BamTools::BgzfData* bgzf, + BamTools::BamReader* reader, + const BamIndex::PreferredIndexType& type) +{ + // --------------------------------------------------- + // attempt to load preferred type first + + const std::string bamtoolsIndexFilename = bamFilename + ".bti"; + const bool bamtoolsIndexExists = BamTools::FileExists(bamtoolsIndexFilename); + if ( (type == BamIndex::BAMTOOLS) && bamtoolsIndexExists ) + return new BamToolsIndex(bgzf, reader); + + const std::string standardIndexFilename = bamFilename + ".bai"; + const bool standardIndexExists = BamTools::FileExists(standardIndexFilename); + if ( (type == BamIndex::STANDARD) && standardIndexExists ) + return new BamStandardIndex(bgzf, reader); + + // ---------------------------------------------------- + // preferred type could not be found, try other (non-preferred) types + // if none found, return 0 + + if ( bamtoolsIndexExists ) return new BamToolsIndex(bgzf, reader); + if ( standardIndexExists ) return new BamStandardIndex(bgzf, reader); + return 0; +} + +// returns index based on explicitly named index file (or 0 if not found) +BamIndex* BamIndex::FromIndexFilename(const std::string& indexFilename, + BamTools::BgzfData* bgzf, + BamTools::BamReader* reader) +{ + // see if specified file exists + const bool indexExists = BamTools::FileExists(indexFilename); + if ( !indexExists ) return 0; + + const std::string bamtoolsIndexExtension(".bti"); + const std::string standardIndexExtension(".bai"); + + // if has bamtoolsIndexExtension + if ( indexFilename.find(bamtoolsIndexExtension) == (indexFilename.length() - bamtoolsIndexExtension.length()) ) + return new BamToolsIndex(bgzf, reader); + + // if has standardIndexExtension + if ( indexFilename.find(standardIndexExtension) == (indexFilename.length() - standardIndexExtension.length()) ) + return new BamStandardIndex(bgzf, reader); + + // otherwise, unsupported file type + return 0; +} + +// ------------------------------- +// BamIndex implementation + +// ctor +BamIndex::BamIndex(BamTools::BgzfData* bgzf, BamTools::BamReader* reader) + : m_BGZF(bgzf) + , m_reader(reader) + , m_cacheMode(BamIndex::LimitedIndexCaching) + , m_indexStream(0) +{ + if ( m_reader && m_reader->IsOpen() ) + m_references = m_reader->GetReferenceData(); +} + +// dtor +BamIndex::~BamIndex(void) { + if ( IsOpen() ) + fclose(m_indexStream); +} + +// return true if FILE* is open +bool BamIndex::IsOpen(void) const { + return ( m_indexStream != 0 ); +} + +// loads existing data from file into memory +bool BamIndex::Load(const string& filename) { + + // open index file, abort on error + if ( !OpenIndexFile(filename, "rb") ) { + fprintf(stderr, "ERROR: Unable to open the BAM index file %s for reading.\n", filename.c_str()); + return false; + } + + // check magic number + if ( !LoadHeader() ) { + fclose(m_indexStream); + return false; + } + + // load reference data (but only keep in memory if full caching requested) + bool saveInitialLoad = ( m_cacheMode == BamIndex::FullIndexCaching ); + if ( !LoadAllReferences(saveInitialLoad) ) { + fclose(m_indexStream); + return false; + } + + // update index cache based on selected mode + UpdateCache(); + + // return success + return true; +} + +// opens index file for reading/writing, return true if opened OK +bool BamIndex::OpenIndexFile(const string& filename, const string& mode) { + m_indexStream = fopen(filename.c_str(), mode.c_str()); + return ( m_indexStream != 0 ); +} + +// rewind index file to beginning of index data, return true if rewound OK +bool BamIndex::Rewind(void) { + return ( fseek64(m_indexStream, DataBeginOffset(), SEEK_SET) == 0 ); +} + +// change the index caching behavior +void BamIndex::SetCacheMode(const BamIndexCacheMode mode) { + if ( mode != m_cacheMode ) { + m_cacheMode = mode; + UpdateCache(); + } +} + +// updates in-memory cache of index data, depending on current cache mode +void BamIndex::UpdateCache(void) { + + // skip if file not open + if ( !IsOpen() ) return; + + // reflect requested cache mode behavior + switch ( m_cacheMode ) { + + case (BamIndex::FullIndexCaching) : + Rewind(); + LoadAllReferences(true); + break; + + case (BamIndex::LimitedIndexCaching) : + if ( HasFullDataCache() ) + KeepOnlyFirstReferenceOffsets(); + else { + ClearAllData(); + SkipToFirstReference(); + LoadFirstReference(true); + } + break; + case(BamIndex::NoIndexCaching) : + ClearAllData(); + break; + default : + // unreachable + ; + } +} + +// writes in-memory index data out to file +bool BamIndex::Write(const string& bamFilename) { + + // open index file for writing + string indexFilename = bamFilename + Extension(); + if ( !OpenIndexFile(indexFilename, "wb") ) { + fprintf(stderr, "ERROR: Could not open file to save index.\n"); + return false; + } + + // write index header data + if ( !WriteHeader() ) { + fprintf(stderr, "ERROR: There was a problem writing index metadata to new index file.\n"); + fflush(m_indexStream); + fclose(m_indexStream); + exit(1); + } + + // write main index data + if ( !WriteAllReferences() ) { + fprintf(stderr, "ERROR: There was a problem writing index data to new index file.\n"); + fflush(m_indexStream); + fclose(m_indexStream); + exit(1); + } + + // flush any remaining output, rewind file, and return success + fflush(m_indexStream); + fclose(m_indexStream); + + // re-open index file for later reading + if ( !OpenIndexFile(indexFilename, "rb") ) { + fprintf(stderr, "ERROR: Could not open newly created index file for reading.\n"); + return false; + } + + // return success/failure of write + return true; +}