view peakhelper.r @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.
author messersc
date Thu, 19 Feb 2015 05:39:45 -0500
parents d42f4d78c85e
children
line wrap: on
line source

########################################################################
# JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
# Copyright (C) 2014-2015  Mahmoud Ibrahim
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Contact: mahmoud.ibrahim@mdc-berlin.de
########################################################################



# ======================= 
# User-defined variables
# ======================= 
options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000) 
#=======================> DONE! 




# ============================================== 
# Parsing Arguments (source: phantom SPP script)
# ============================================== 
args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line


#Parsing arguments and storing values
for (each.arg in args) {
	#bed file names
	if (grepl('^-filelist=',each.arg)) {
		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
		if (! is.na(arg.split[2]) ) {
				bednames <- arg.split[2]
		} else {
			stop('No bed file names')
		} 
	}
}
options(stringsAsFactors = FALSE)
#=======================> DONE! 



# ================= 
# Custom Functions
# =================
score <- function(x){
	x  = ((x-min(x))/(max(x)-min(x)))*10000
	return(x)
}

fixnpf = function(bedfile) {

	writethis = read.table(bedfile, header=FALSE)

	
	message(paste0("Minimum Peak Width Used to Produce Filtered List: ", writethis$V11[1]))
	writethis$V9 = p.adjust(writethis$V8, method = "BH")
	writethis$V8[writethis$V8 == 0] = 10
	writethis$V8[writethis$V8 == 10] = min(writethis$V8)
	writethis$V8 = -(log10(writethis$V8))
	writethis$V9[writethis$V9 == 0] = 10
	writethis$V9[writethis$V9 == 10] = min(writethis$V9)
	writethis$V9 = -(log10(writethis$V9))	
	writethis$V7 = as.numeric(writethis$V7) * writethis$V9
	writethis$V5 = score(as.numeric(writethis$V7))
	
	
	#find peak score cutoff (pareto distribution)
	sizes = writethis$V3 - writethis$V2
	scores = writethis$V7
	ss = cbind(sizes, scores)
	ss = ss[ss[,1] > writethis$V11[1],,drop=FALSE]
	scores = ss[,2]
	scores = scores[scores > 0]
	num = length(scores)
	if (length(scores) > 300000) {
		scores = scores[1:300000]
		num = 300000
	}
	den = log(min(scores))
	den = sum(log(scores) - den)
	alpha = num / den	
	geom = (exp(1/alpha)) * (min(scores)) 
	message(paste0("Minimum Peak Score Used to Produce Filtered List: ", geom))
	
	#done now write something
	write(paste(writethis$V1, writethis$V2, writethis$V3, writethis$V4, writethis$V5, ".", writethis$V7, writethis$V8, writethis$V9, writethis$V10, writethis$V11, geom, sep = "\t"), file = bedfile, ncolumns = 1)
	return(bedfile)
}
#=======================> DONE! 



# ==================
# Fixing narrowpeak
# ==================
bedfiles = fixnpf(bednames) #read in all bed files (samples and control)
#=======================> DONE!