Mercurial > repos > aaronquinlan > multi_intersect
comparison BEDTools-Version-2.14.3/src/windowBed/windowBed.cpp @ 1:bec36315bd12 default tip
Deleted selected files
| author | aaronquinlan |
|---|---|
| date | Sat, 19 Nov 2011 14:17:03 -0500 |
| parents | dfcd8b6c1bda |
| children |
comparison
equal
deleted
inserted
replaced
| 0:dfcd8b6c1bda | 1:bec36315bd12 |
|---|---|
| 1 /***************************************************************************** | |
| 2 windowBed.cpp | |
| 3 | |
| 4 (c) 2009 - Aaron Quinlan | |
| 5 Hall Laboratory | |
| 6 Department of Biochemistry and Molecular Genetics | |
| 7 University of Virginia | |
| 8 aaronquinlan@gmail.com | |
| 9 | |
| 10 Licenced under the GNU General Public License 2.0 license. | |
| 11 ******************************************************************************/ | |
| 12 #include "lineFileUtilities.h" | |
| 13 #include "windowBed.h" | |
| 14 | |
| 15 | |
| 16 /* | |
| 17 Constructor | |
| 18 */ | |
| 19 BedWindow::BedWindow(string bedAFile, string bedBFile, int leftSlop, int rightSlop, | |
| 20 bool anyHit, bool noHit, bool writeCount, bool strandWindows, | |
| 21 bool matchOnSameStrand, bool matchOnDiffStrand, bool bamInput, bool bamOutput, bool isUncompressedBam) { | |
| 22 | |
| 23 _bedAFile = bedAFile; | |
| 24 _bedBFile = bedBFile; | |
| 25 | |
| 26 _leftSlop = leftSlop; | |
| 27 _rightSlop = rightSlop; | |
| 28 | |
| 29 _anyHit = anyHit; | |
| 30 _noHit = noHit; | |
| 31 _writeCount = writeCount; | |
| 32 _strandWindows = strandWindows; | |
| 33 _matchOnSameStrand = matchOnSameStrand; | |
| 34 _matchOnDiffStrand = matchOnDiffStrand; | |
| 35 _bamInput = bamInput; | |
| 36 _bamOutput = bamOutput; | |
| 37 _isUncompressedBam = isUncompressedBam; | |
| 38 | |
| 39 _bedA = new BedFile(bedAFile); | |
| 40 _bedB = new BedFile(bedBFile); | |
| 41 | |
| 42 if (_bamInput == false) | |
| 43 WindowIntersectBed(); | |
| 44 else | |
| 45 WindowIntersectBam(_bedAFile); | |
| 46 } | |
| 47 | |
| 48 | |
| 49 | |
| 50 /* | |
| 51 Destructor | |
| 52 */ | |
| 53 BedWindow::~BedWindow(void) { | |
| 54 } | |
| 55 | |
| 56 | |
| 57 | |
| 58 void BedWindow::FindWindowOverlaps(const BED &a, vector<BED> &hits) { | |
| 59 | |
| 60 /* | |
| 61 Adjust the start and end of a based on the requested window | |
| 62 */ | |
| 63 | |
| 64 // update the current feature's start and end | |
| 65 // according to the slop requested (slop = 0 by default) | |
| 66 CHRPOS aFudgeStart = 0; | |
| 67 CHRPOS aFudgeEnd; | |
| 68 AddWindow(a, aFudgeStart, aFudgeEnd); | |
| 69 | |
| 70 /* | |
| 71 Now report the hits (if any) based on the window around a. | |
| 72 */ | |
| 73 // get the hits in B for the A feature | |
| 74 _bedB->FindOverlapsPerBin(a.chrom, aFudgeStart, aFudgeEnd, a.strand, hits, _matchOnSameStrand, _matchOnDiffStrand); | |
| 75 | |
| 76 int numOverlaps = 0; | |
| 77 | |
| 78 // loop through the hits and report those that meet the user's criteria | |
| 79 vector<BED>::const_iterator h = hits.begin(); | |
| 80 vector<BED>::const_iterator hitsEnd = hits.end(); | |
| 81 for (; h != hitsEnd; ++h) { | |
| 82 | |
| 83 int s = max(aFudgeStart, h->start); | |
| 84 int e = min(aFudgeEnd, h->end); | |
| 85 int overlapBases = (e - s); // the number of overlapping bases b/w a and b | |
| 86 int aLength = (a.end - a.start); // the length of a in b.p. | |
| 87 | |
| 88 if (s < e) { | |
| 89 // is there enough overlap (default ~ 1bp) | |
| 90 if ( ((float) overlapBases / (float) aLength) > 0 ) { | |
| 91 numOverlaps++; | |
| 92 if (_anyHit == false && _noHit == false && _writeCount == false) { | |
| 93 _bedA->reportBedTab(a); | |
| 94 _bedB->reportBedNewLine(*h); | |
| 95 } | |
| 96 } | |
| 97 } | |
| 98 } | |
| 99 if (_anyHit == true && (numOverlaps >= 1)) { | |
| 100 _bedA->reportBedNewLine(a); } | |
| 101 else if (_writeCount == true) { | |
| 102 _bedA->reportBedTab(a); printf("\t%d\n", numOverlaps); | |
| 103 } | |
| 104 else if (_noHit == true && (numOverlaps == 0)) { | |
| 105 _bedA->reportBedNewLine(a); | |
| 106 } | |
| 107 } | |
| 108 | |
| 109 | |
| 110 bool BedWindow::FindOneOrMoreWindowOverlaps(const BED &a) { | |
| 111 | |
| 112 // update the current feature's start and end | |
| 113 // according to the slop requested (slop = 0 by default) | |
| 114 CHRPOS aFudgeStart = 0; | |
| 115 CHRPOS aFudgeEnd; | |
| 116 AddWindow(a, aFudgeStart, aFudgeEnd); | |
| 117 | |
| 118 bool overlapsFound = _bedB->FindOneOrMoreOverlapsPerBin(a.chrom, a.start, a.end, a.strand, _matchOnSameStrand, _matchOnDiffStrand); | |
| 119 return overlapsFound; | |
| 120 } | |
| 121 | |
| 122 | |
| 123 void BedWindow::WindowIntersectBed() { | |
| 124 | |
| 125 // load the "B" bed file into a map so | |
| 126 // that we can easily compare "A" to it for overlaps | |
| 127 _bedB->loadBedFileIntoMap(); | |
| 128 | |
| 129 BED a, nullBed; | |
| 130 int lineNum = 0; // current input line number | |
| 131 BedLineStatus bedStatus; | |
| 132 vector<BED> hits; // vector of potential hits | |
| 133 hits.reserve(100); | |
| 134 | |
| 135 _bedA->Open(); | |
| 136 while ((bedStatus = _bedA->GetNextBed(a, lineNum)) != BED_INVALID) { | |
| 137 if (bedStatus == BED_VALID) { | |
| 138 FindWindowOverlaps(a, hits); | |
| 139 hits.clear(); | |
| 140 a = nullBed; | |
| 141 } | |
| 142 } | |
| 143 _bedA->Close(); | |
| 144 } | |
| 145 | |
| 146 | |
| 147 void BedWindow::WindowIntersectBam(string bamFile) { | |
| 148 | |
| 149 // load the "B" bed file into a map so | |
| 150 // that we can easily compare "A" to it for overlaps | |
| 151 _bedB->loadBedFileIntoMap(); | |
| 152 | |
| 153 // open the BAM file | |
| 154 BamReader reader; | |
| 155 BamWriter writer; | |
| 156 reader.Open(bamFile); | |
| 157 | |
| 158 // get header & reference information | |
| 159 string bamHeader = reader.GetHeaderText(); | |
| 160 RefVector refs = reader.GetReferenceData(); | |
| 161 | |
| 162 // open a BAM output to stdout if we are writing BAM | |
| 163 if (_bamOutput == true) { | |
| 164 // set compression mode | |
| 165 BamWriter::CompressionMode compressionMode = BamWriter::Compressed; | |
| 166 if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; | |
| 167 writer.SetCompressionMode(compressionMode); | |
| 168 // open our BAM writer | |
| 169 writer.Open("stdout", bamHeader, refs); | |
| 170 } | |
| 171 | |
| 172 vector<BED> hits; // vector of potential hits | |
| 173 // reserve some space | |
| 174 hits.reserve(100); | |
| 175 | |
| 176 _bedA->bedType = 6; | |
| 177 BamAlignment bam; | |
| 178 bool overlapsFound; | |
| 179 // get each set of alignments for each pair. | |
| 180 while (reader.GetNextAlignment(bam)) { | |
| 181 | |
| 182 if (bam.IsMapped()) { | |
| 183 BED a; | |
| 184 a.chrom = refs.at(bam.RefID).RefName; | |
| 185 a.start = bam.Position; | |
| 186 a.end = bam.GetEndPosition(false, false); | |
| 187 | |
| 188 // build the name field from the BAM alignment. | |
| 189 a.name = bam.Name; | |
| 190 if (bam.IsFirstMate()) a.name += "/1"; | |
| 191 if (bam.IsSecondMate()) a.name += "/2"; | |
| 192 | |
| 193 a.score = ToString(bam.MapQuality); | |
| 194 a.strand = "+"; if (bam.IsReverseStrand()) a.strand = "-"; | |
| 195 | |
| 196 if (_bamOutput == true) { | |
| 197 overlapsFound = FindOneOrMoreWindowOverlaps(a); | |
| 198 if (overlapsFound == true) { | |
| 199 if (_noHit == false) | |
| 200 writer.SaveAlignment(bam); | |
| 201 } | |
| 202 else { | |
| 203 if (_noHit == true) | |
| 204 writer.SaveAlignment(bam); | |
| 205 } | |
| 206 } | |
| 207 else { | |
| 208 FindWindowOverlaps(a, hits); | |
| 209 hits.clear(); | |
| 210 } | |
| 211 } | |
| 212 // BAM IsMapped() is false | |
| 213 else if (_noHit == true) { | |
| 214 writer.SaveAlignment(bam); | |
| 215 } | |
| 216 } | |
| 217 | |
| 218 // close the relevant BAM files. | |
| 219 reader.Close(); | |
| 220 if (_bamOutput == true) { | |
| 221 writer.Close(); | |
| 222 } | |
| 223 } | |
| 224 | |
| 225 | |
| 226 void BedWindow::AddWindow(const BED &a, CHRPOS &fudgeStart, CHRPOS &fudgeEnd) { | |
| 227 // Does the user want to treat the windows based on strand? | |
| 228 // If so, | |
| 229 // if "+", then left is left and right is right | |
| 230 // if "-", the left is right and right is left. | |
| 231 if (_strandWindows) { | |
| 232 if (a.strand == "+") { | |
| 233 if ((int) (a.start - _leftSlop) > 0) | |
| 234 fudgeStart = a.start - _leftSlop; | |
| 235 else fudgeStart = 0; | |
| 236 fudgeEnd = a.end + _rightSlop; | |
| 237 } | |
| 238 else { | |
| 239 if ((int) (a.start - _rightSlop) > 0) | |
| 240 fudgeStart = a.start - _rightSlop; | |
| 241 else fudgeStart = 0; | |
| 242 fudgeEnd = a.end + _leftSlop; | |
| 243 } | |
| 244 } | |
| 245 // If not, add the windows irrespective of strand | |
| 246 else { | |
| 247 if ((int) (a.start - _leftSlop) > 0) | |
| 248 fudgeStart = a.start - _leftSlop; | |
| 249 else fudgeStart = 0; | |
| 250 fudgeEnd = a.end + _rightSlop; | |
| 251 } | |
| 252 } | |
| 253 |
