Mercurial > repos > messersc > jamm
diff signalmaker.r @ 1:243f75d0ed6e draft default tip
Uploaded.
Includes new release 1.0.7 with fixed optional controls.
author | messersc |
---|---|
date | Thu, 19 Feb 2015 05:39:45 -0500 |
parents | d42f4d78c85e |
children |
line wrap: on
line diff
--- a/signalmaker.r Wed Dec 17 10:40:23 2014 -0500 +++ b/signalmaker.r Thu Feb 19 05:39:45 2015 -0500 @@ -1,9 +1,22 @@ -############################### -#### Signal Maker for NGS Data -#### R script -############################### - - +######################################################################## +# JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates. +# Copyright (C) 2014-2015 Mahmoud Ibrahim +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +# Contact: mahmoud.ibrahim@mdc-berlin.de +######################################################################## # ======================= @@ -76,10 +89,15 @@ return(reads) } - +#Read in bedpe(st2) +parseinpe = function(bedfile) { + l = read.table(bedfile, header = FALSE) + l = cbind((l[[1]] + 1), l[[2]]) + return(l) +} #Produces normalized extended read counts (takes output of parsein(), return a vector of floats) -countreads = function(bedfile, reads, frag, chromsize, filelist) { +countreads = function(bedfile, reads, frag, chromsize, filelist, chrcount) { o = which(filelist == bedfile) @@ -112,6 +130,36 @@ +#Produces normalized extended read counts (takes output of parsein(), return a vector of floats / PAIRED END) +countreadspe = function(bedfile, reads, chromsize, filelist, chrcount) { + + o = which(filelist == bedfile) + + counts = vector(mode = "numeric", length = chromsize) + + for (j in 1:length(reads[[o]][,1])) { + counts[(reads[[o]][j,1]):(reads[[o]][j,2])] = counts[(reads[[o]][j,1]):(reads[[o]][j,2])] + 1 + } + + mCount = mean(counts) + + if (chrcount == 1) { + counts = counts/mCount + write(paste(mCount), file = paste0(out, "/norma.", o, ".info")) + } else { + meanCounts = mean(as.numeric(read.table(paste0(out, "/norma.", o, ".info"))[[1]])) + if ((mCount > (5*meanCounts)) || (mCount < (0.2*meanCounts))) { + mCount = meanCounts + } else { + write(paste(mCount), file = paste0(out, "/norma.", o, ".info"), append = TRUE) + } + counts = counts/mCount + } + + return(counts) +} + + #generate signal getsig = function(winStart, winEnds, startList, stepSize, chromName) { @@ -187,10 +235,8 @@ if (grepl('^-frag=',each.arg)) { arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] if (! is.na(arg.split[2]) ) { - frag <- arg.split[2] - } else { - stop('No fragment length given') - } + frag <- arg.split[2] + } } #bakcground files directory if (grepl('^-bkgd=',each.arg)) { @@ -252,6 +298,13 @@ chrcount <- as.numeric(arg.split[2]) } } + #alignment type + if (grepl('^-t=',each.arg)) { + arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] + if (! is.na(arg.split[2]) ) { + type <- arg.split[2] + } + } } ##Parse in variables @@ -260,7 +313,9 @@ if (bkgd != "None") { readsFiles[[numdup+1]] = bkgd } -frags = as.numeric(strsplit(frag, ",", fixed = TRUE)[[1]]) +if (type == "single") { + frags = as.numeric(strsplit(frag, ",", fixed = TRUE)[[1]]) +} rm(bednames) options(stringsAsFactors = FALSE) @@ -278,10 +333,20 @@ regions = cbind(regions[[2]] + 1, regions[[3]]) #import read data -if (cornum > 1) { - datain = mclapply(readsFiles, parsein, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control) -} else { - datain = lapply(readsFiles, parsein) #read in all bed files (samples and control) +if (type == "single") { + if (cornum > 1) { + datain = mclapply(readsFiles, parsein, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control) + } else { + datain = lapply(readsFiles, parsein) #read in all bed files (samples and control) + } +} +#import read data +if (type == "paired") { + if (cornum > 1) { + datain = mclapply(readsFiles, parseinpe, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control) + } else { + datain = lapply(readsFiles, parseinpe) #read in all bed files (samples and control) + } } #=======================> DONE! @@ -290,10 +355,20 @@ # =============== # Counting Reads # =============== -if (cornum > 1) { - counts = mclapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, mc.cores = cornum, mc.preschedule = TRUE) -} else { - counts = lapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles) +if (type == "single") { + if (cornum > 1) { + counts = mclapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount, mc.cores = cornum, mc.preschedule = TRUE) + } else { + counts = lapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount) + } +} + +if (type == "paired") { + if (cornum > 1) { + counts = mclapply(readsFiles, countreadspe, reads = datain, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount, mc.cores = cornum, mc.preschedule = TRUE) + } else { + counts = lapply(readsFiles, countreadspe, reads = datain, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount) + } } rm(datain) #=======================> DONE!