view xcorr.r @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.
author messersc
date Thu, 19 Feb 2015 05:39:45 -0500
parents d42f4d78c85e
children
line wrap: on
line source

########################################################################
# JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
# Copyright (C) 2014-2015  Mahmoud Ibrahim
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Contact: mahmoud.ibrahim@mdc-berlin.de
########################################################################


# ========================= 
# Required Libraries check 
# ========================= 
if ((is.element('parallel', installed.packages()[,1]) == FALSE)) {
	stop("R package'parallel' is required. Please install it!")
}
suppressPackageStartupMessages(library("parallel"))
#=======================> DONE! 




# ============================================== 
# Parsing Arguments (source: phantom SPP script)
# ============================================== 
args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line


#Set arguments to default values
ibed = NA # input bed file
sFile = NA # chromosome size
storeFile = NA # file to store result

#Parsing arguments and storing values
for (each.arg in args) {
	#input bed file
	if (grepl('^-ibed=',each.arg)) {
		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
		if (! is.na(arg.split[2]) ) {
				ibed <- arg.split[2]
		} else {
			stop('No input bed file')
		} 
	}
	if (grepl('^-s=',each.arg)) {			
		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
		if (! is.na(arg.split[2]) ) {
			sFile <- arg.split[2] 
		} else {
			stop('No chromosome size file')
		}
	}
	if (grepl('^-rl=',each.arg)) {			
		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
		if (! is.na(arg.split[2]) ) {
			rl <- arg.split[2] 
		} else {
			stop('Read length missing')
		}
	}
	if (grepl('^-d=',each.arg)) {			
		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
		if (! is.na(arg.split[2]) ) {
			storeFile <- arg.split[2] 
		} else {
			stop('No file to store result')
		}
	}
	if (grepl('^-p=',each.arg)) {			
		arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 
		if (! is.na(arg.split[2]) ) {
			cornum <- as.numeric(arg.split[2]) 
		} else {
			stop('No number of cores given')
		}
	}
}

#Read in variables
chromosomes = read.table(sFile, header=FALSE)
chromName = chromosomes$V1 #which chromosome
chromSize = as.numeric(chromosomes$V2) #chromosome size
rm(chromosomes)

ibed = strsplit(ibed, ",", fixed = TRUE)[[1]]
rl = as.numeric(strsplit(rl, ",", fixed = TRUE)[[1]])
#=======================> DONE! 


counting = function(readst, br){
	return(hist(readst, breaks = br, plot = FALSE)$counts)
}


countingreads = function(bedfile, readlen, filelist) {
	reads = read.table(bedfile, header = FALSE)
	reads = cbind(as.character(reads[[1]]), as.character(reads[[2]]))
	o = which(filelist == bedfile)
	tri = list(as.numeric(reads[reads[,2] == "+",,drop = FALSE][,1]), (as.numeric(reads[reads[,2] == "-",,drop = FALSE][,1])) + readlen[o] - 1)
	readcounts = mclapply(tri, counting, br = genomevec, mc.cores = cornum)
	return(readcounts)
}

xc = function(countlist) {
	crossCorrelation = ccf(countlist[[2]], countlist[[1]], plot = FALSE); #xcorr
	crossCorrelation$lag = crossCorrelation$lag * 20; #correct lag for counts window
	maxCorr = which.max(crossCorrelation$acf);
	maxCorr = abs(crossCorrelation$lag[maxCorr]);
	return(maxCorr)
}


# ================== 
# Read Start Count
# ==================
#make the window vector (a vector with start positions of chromosome windows)
genomevec = seq(0, chromSize, by = 20);
if (max(genomevec) < chromSize) {
	genomevec = append(genomevec, chromSize);
}
datain = mclapply(ibed, countingreads, readlen = rl, filelist = ibed, mc.cores = cornum)
#=======================> DONE! 


# ================== 
# Cross Correlation
# ==================
xcorr = unlist(mclapply(datain, xc, mc.cores = cornum))
#=======================> DONE! 


# ===================== 
# Write result to File
# =====================
for (i in 1:length(xcorr)) {
	filename = strsplit(ibed[i], "/", fixed = TRUE)[[1]]
	filename = filename[length(filename)]
	filename = strsplit(filename, ".", fixed = TRUE)[[1]]
	filename = filename[3]
	if ((xcorr[i] <= 500) && (xcorr[i] >= 50)) { #only write this to file if xcorr value was plausible
		message(paste0(chromName, ", ", filename, ": Ok!"))
		write(paste(chromName, xcorr[i], sep = "\t"), file = paste0(storeFile, "/xc.", filename, ".tab"), append = TRUE)
	} else {
		message(paste0(chromName, ", ", filename, ": Value Not Used!"))
	}
}
#=======================> DONE!