Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/utils/Fasta/Fasta.h @ 0:dfcd8b6c1bda
Uploaded
| author | aaronquinlan |
|---|---|
| date | Thu, 03 Nov 2011 10:25:04 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:dfcd8b6c1bda |
|---|---|
| 1 // *************************************************************************** | |
| 2 // FastaIndex.h (c) 2010 Erik Garrison <erik.garrison@bc.edu> | |
| 3 // Marth Lab, Department of Biology, Boston College | |
| 4 // All rights reserved. | |
| 5 // --------------------------------------------------------------------------- | |
| 6 // Last modified: 5 February 2010 (EG) | |
| 7 // --------------------------------------------------------------------------- | |
| 8 | |
| 9 #ifndef _FASTA_H | |
| 10 #define _FASTA_H | |
| 11 | |
| 12 #include <map> | |
| 13 #include <iostream> | |
| 14 #include <fstream> | |
| 15 #include <vector> | |
| 16 #include <stdint.h> | |
| 17 #include <stdio.h> | |
| 18 #include <algorithm> | |
| 19 #include "LargeFileSupport.h" | |
| 20 #include <sys/stat.h> | |
| 21 #include <sys/mman.h> | |
| 22 #include "split.h" | |
| 23 #include <stdlib.h> | |
| 24 #include <ctype.h> | |
| 25 #include <unistd.h> | |
| 26 | |
| 27 using namespace std; | |
| 28 | |
| 29 class FastaIndexEntry { | |
| 30 friend ostream& operator<<(ostream& output, const FastaIndexEntry& e); | |
| 31 public: | |
| 32 FastaIndexEntry(string name, int length, long long offset, int line_blen, int line_len); | |
| 33 FastaIndexEntry(void); | |
| 34 ~FastaIndexEntry(void); | |
| 35 string name; // sequence name | |
| 36 int length; // length of sequence | |
| 37 long long offset; // bytes offset of sequence from start of file | |
| 38 int line_blen; // line length in bytes, sequence characters | |
| 39 int line_len; // line length including newline | |
| 40 void clear(void); | |
| 41 }; | |
| 42 | |
| 43 class FastaIndex : public map<string, FastaIndexEntry> { | |
| 44 friend ostream& operator<<(ostream& output, FastaIndex& i); | |
| 45 public: | |
| 46 FastaIndex(void); | |
| 47 ~FastaIndex(void); | |
| 48 vector<string> sequenceNames; | |
| 49 void indexReference(string refName); | |
| 50 void readIndexFile(string fname); | |
| 51 void writeIndexFile(string fname); | |
| 52 ifstream indexFile; | |
| 53 FastaIndexEntry entry(string key); | |
| 54 void flushEntryToIndex(FastaIndexEntry& entry); | |
| 55 string indexFileExtension(void); | |
| 56 }; | |
| 57 | |
| 58 class FastaReference { | |
| 59 public: | |
| 60 void open(string reffilename, bool usemmap = false); | |
| 61 bool usingmmap; | |
| 62 string filename; | |
| 63 FastaReference(void) : usingmmap(false) { } | |
| 64 ~FastaReference(void); | |
| 65 FILE* file; | |
| 66 void* filemm; | |
| 67 size_t filesize; | |
| 68 FastaIndex* index; | |
| 69 vector<FastaIndexEntry> findSequencesStartingWith(string seqnameStart); | |
| 70 string getSequence(string seqname); | |
| 71 // potentially useful for performance, investigate | |
| 72 // void getSequence(string seqname, string& sequence); | |
| 73 string getSubSequence(string seqname, int start, int length); | |
| 74 string sequenceNameStartingWith(string seqnameStart); | |
| 75 long unsigned int sequenceLength(string seqname); | |
| 76 }; | |
| 77 | |
| 78 #endif |
