annotate xcorr.r @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.
author messersc
date Thu, 19 Feb 2015 05:39:45 -0500
parents d42f4d78c85e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
1 ########################################################################
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
3 # Copyright (C) 2014-2015 Mahmoud Ibrahim
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
4 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
5 # This program is free software: you can redistribute it and/or modify
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
6 # it under the terms of the GNU General Public License as published by
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
7 # the Free Software Foundation, either version 3 of the License, or
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
8 # (at your option) any later version.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
9 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
10 # This program is distributed in the hope that it will be useful,
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
13 # GNU General Public License for more details.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
14 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
15 # You should have received a copy of the GNU General Public License
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
17 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
18 # Contact: mahmoud.ibrahim@mdc-berlin.de
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
19 ########################################################################
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
20
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
21
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
22 # =========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
23 # Required Libraries check
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
24 # =========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
25 if ((is.element('parallel', installed.packages()[,1]) == FALSE)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
26 stop("R package'parallel' is required. Please install it!")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
27 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
28 suppressPackageStartupMessages(library("parallel"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
29 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
30
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
31
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
32
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
33
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
34 # ==============================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
35 # Parsing Arguments (source: phantom SPP script)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
36 # ==============================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
37 args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
38
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
39
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
40 #Set arguments to default values
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
41 ibed = NA # input bed file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
42 sFile = NA # chromosome size
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
43 storeFile = NA # file to store result
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
44
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
45 #Parsing arguments and storing values
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
46 for (each.arg in args) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
47 #input bed file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
48 if (grepl('^-ibed=',each.arg)) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
49 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
50 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
51 ibed <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
52 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
53 stop('No input bed file')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
54 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
55 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
56 if (grepl('^-s=',each.arg)) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
57 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
58 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
59 sFile <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
60 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
61 stop('No chromosome size file')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
62 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
63 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
64 if (grepl('^-rl=',each.arg)) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
65 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
66 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
67 rl <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
68 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
69 stop('Read length missing')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
70 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
71 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
72 if (grepl('^-d=',each.arg)) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
73 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
74 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
75 storeFile <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
76 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
77 stop('No file to store result')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
78 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
79 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
80 if (grepl('^-p=',each.arg)) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
81 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
82 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
83 cornum <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
84 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
85 stop('No number of cores given')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
86 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
87 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
88 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
89
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
90 #Read in variables
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
91 chromosomes = read.table(sFile, header=FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
92 chromName = chromosomes$V1 #which chromosome
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
93 chromSize = as.numeric(chromosomes$V2) #chromosome size
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
94 rm(chromosomes)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
95
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
96 ibed = strsplit(ibed, ",", fixed = TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
97 rl = as.numeric(strsplit(rl, ",", fixed = TRUE)[[1]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
98 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
99
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
100
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
101 counting = function(readst, br){
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
102 return(hist(readst, breaks = br, plot = FALSE)$counts)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
103 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
104
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
105
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
106 countingreads = function(bedfile, readlen, filelist) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
107 reads = read.table(bedfile, header = FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
108 reads = cbind(as.character(reads[[1]]), as.character(reads[[2]]))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
109 o = which(filelist == bedfile)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
110 tri = list(as.numeric(reads[reads[,2] == "+",,drop = FALSE][,1]), (as.numeric(reads[reads[,2] == "-",,drop = FALSE][,1])) + readlen[o] - 1)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
111 readcounts = mclapply(tri, counting, br = genomevec, mc.cores = cornum)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
112 return(readcounts)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
113 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
114
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
115 xc = function(countlist) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
116 crossCorrelation = ccf(countlist[[2]], countlist[[1]], plot = FALSE); #xcorr
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
117 crossCorrelation$lag = crossCorrelation$lag * 20; #correct lag for counts window
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
118 maxCorr = which.max(crossCorrelation$acf);
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
119 maxCorr = abs(crossCorrelation$lag[maxCorr]);
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
120 return(maxCorr)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
121 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
122
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
123
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
124 # ==================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
125 # Read Start Count
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
126 # ==================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
127 #make the window vector (a vector with start positions of chromosome windows)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
128 genomevec = seq(0, chromSize, by = 20);
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
129 if (max(genomevec) < chromSize) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
130 genomevec = append(genomevec, chromSize);
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
131 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
132 datain = mclapply(ibed, countingreads, readlen = rl, filelist = ibed, mc.cores = cornum)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
133 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
134
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
135
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
136 # ==================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
137 # Cross Correlation
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
138 # ==================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
139 xcorr = unlist(mclapply(datain, xc, mc.cores = cornum))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
140 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
141
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
142
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
143 # =====================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
144 # Write result to File
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
145 # =====================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
146 for (i in 1:length(xcorr)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
147 filename = strsplit(ibed[i], "/", fixed = TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
148 filename = filename[length(filename)]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
149 filename = strsplit(filename, ".", fixed = TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
150 filename = filename[3]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
151 if ((xcorr[i] <= 500) && (xcorr[i] >= 50)) { #only write this to file if xcorr value was plausible
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
152 message(paste0(chromName, ", ", filename, ": Ok!"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
153 write(paste(chromName, xcorr[i], sep = "\t"), file = paste0(storeFile, "/xc.", filename, ".tab"), append = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
154 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
155 message(paste0(chromName, ", ", filename, ": Value Not Used!"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
156 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
157 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
158 #=======================> DONE!