Mercurial > repos > messersc > jamm
diff xcorr.r @ 0:d42f4d78c85e draft
Uploaded
author | messersc |
---|---|
date | Wed, 17 Dec 2014 10:40:23 -0500 |
parents | |
children | 243f75d0ed6e |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/xcorr.r Wed Dec 17 10:40:23 2014 -0500 @@ -0,0 +1,143 @@ +############################### +#### Fragment Length Calculator +#### R script +############################### + + +# ========================= +# Required Libraries check +# ========================= +if ((is.element('parallel', installed.packages()[,1]) == FALSE)) { + stop("R package'parallel' is required. Please install it!") +} +suppressPackageStartupMessages(library("parallel")) +#=======================> DONE! + + + + +# ============================================== +# Parsing Arguments (source: phantom SPP script) +# ============================================== +args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line + + +#Set arguments to default values +ibed = NA # input bed file +sFile = NA # chromosome size +storeFile = NA # file to store result + +#Parsing arguments and storing values +for (each.arg in args) { + #input bed file + if (grepl('^-ibed=',each.arg)) { + arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = + if (! is.na(arg.split[2]) ) { + ibed <- arg.split[2] + } else { + stop('No input bed file') + } + } + if (grepl('^-s=',each.arg)) { + arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = + if (! is.na(arg.split[2]) ) { + sFile <- arg.split[2] + } else { + stop('No chromosome size file') + } + } + if (grepl('^-rl=',each.arg)) { + arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = + if (! is.na(arg.split[2]) ) { + rl <- arg.split[2] + } else { + stop('Read length missing') + } + } + if (grepl('^-d=',each.arg)) { + arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = + if (! is.na(arg.split[2]) ) { + storeFile <- arg.split[2] + } else { + stop('No file to store result') + } + } + if (grepl('^-p=',each.arg)) { + arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on = + if (! is.na(arg.split[2]) ) { + cornum <- as.numeric(arg.split[2]) + } else { + stop('No number of cores given') + } + } +} + +#Read in variables +chromosomes = read.table(sFile, header=FALSE) +chromName = chromosomes$V1 #which chromosome +chromSize = as.numeric(chromosomes$V2) #chromosome size +rm(chromosomes) + +ibed = strsplit(ibed, ",", fixed = TRUE)[[1]] +rl = as.numeric(strsplit(rl, ",", fixed = TRUE)[[1]]) +#=======================> DONE! + + +counting = function(readst, br){ + return(hist(readst, breaks = br, plot = FALSE)$counts) +} + + +countingreads = function(bedfile, readlen, filelist) { + reads = read.table(bedfile, header = FALSE) + reads = cbind(as.character(reads[[1]]), as.character(reads[[2]])) + o = which(filelist == bedfile) + tri = list(as.numeric(reads[reads[,2] == "+",,drop = FALSE][,1]), (as.numeric(reads[reads[,2] == "-",,drop = FALSE][,1])) + readlen[o] - 1) + readcounts = mclapply(tri, counting, br = genomevec, mc.cores = cornum) + return(readcounts) +} + +xc = function(countlist) { + crossCorrelation = ccf(countlist[[2]], countlist[[1]], plot = FALSE); #xcorr + crossCorrelation$lag = crossCorrelation$lag * 20; #correct lag for counts window + maxCorr = which.max(crossCorrelation$acf); + maxCorr = abs(crossCorrelation$lag[maxCorr]); + return(maxCorr) +} + + +# ================== +# Read Start Count +# ================== +#make the window vector (a vector with start positions of chromosome windows) +genomevec = seq(0, chromSize, by = 20); +if (max(genomevec) < chromSize) { + genomevec = append(genomevec, chromSize); +} +datain = mclapply(ibed, countingreads, readlen = rl, filelist = ibed, mc.cores = cornum) +#=======================> DONE! + + +# ================== +# Cross Correlation +# ================== +xcorr = unlist(mclapply(datain, xc, mc.cores = cornum)) +#=======================> DONE! + + +# ===================== +# Write result to File +# ===================== +for (i in 1:length(xcorr)) { + filename = strsplit(ibed[i], "/", fixed = TRUE)[[1]] + filename = filename[length(filename)] + filename = strsplit(filename, ".", fixed = TRUE)[[1]] + filename = filename[3] + if ((xcorr[i] <= 500) && (xcorr[i] >= 50)) { #only write this to file if xcorr value was plausible + message(paste0(chromName, ", ", filename, ": Ok!")) + write(paste(chromName, xcorr[i], sep = "\t"), file = paste0(storeFile, "/xc.", filename, ".tab"), append = TRUE) + } else { + message(paste0(chromName, ", ", filename, ": Value Not Used!")) + } +} +#=======================> DONE!