1
|
1 ########################################################################
|
|
2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
|
|
3 # Copyright (C) 2014-2015 Mahmoud Ibrahim
|
|
4 #
|
|
5 # This program is free software: you can redistribute it and/or modify
|
|
6 # it under the terms of the GNU General Public License as published by
|
|
7 # the Free Software Foundation, either version 3 of the License, or
|
|
8 # (at your option) any later version.
|
|
9 #
|
|
10 # This program is distributed in the hope that it will be useful,
|
|
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 # GNU General Public License for more details.
|
|
14 #
|
|
15 # You should have received a copy of the GNU General Public License
|
|
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17 #
|
|
18 # Contact: mahmoud.ibrahim@mdc-berlin.de
|
|
19 ########################################################################
|
0
|
20
|
|
21
|
|
22 # =========================
|
|
23 # Required Libraries check
|
|
24 # =========================
|
|
25 if ((is.element('parallel', installed.packages()[,1]) == FALSE)) {
|
|
26 stop("R package'parallel' is required. Please install it!")
|
|
27 }
|
|
28 suppressPackageStartupMessages(library("parallel"))
|
|
29 #=======================> DONE!
|
|
30
|
|
31
|
|
32
|
|
33
|
|
34 # ==============================================
|
|
35 # Parsing Arguments (source: phantom SPP script)
|
|
36 # ==============================================
|
|
37 args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line
|
|
38
|
|
39
|
|
40 #Set arguments to default values
|
|
41 ibed = NA # input bed file
|
|
42 sFile = NA # chromosome size
|
|
43 storeFile = NA # file to store result
|
|
44
|
|
45 #Parsing arguments and storing values
|
|
46 for (each.arg in args) {
|
|
47 #input bed file
|
|
48 if (grepl('^-ibed=',each.arg)) {
|
1
|
49 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
|
0
|
50 if (! is.na(arg.split[2]) ) {
|
|
51 ibed <- arg.split[2]
|
|
52 } else {
|
|
53 stop('No input bed file')
|
|
54 }
|
|
55 }
|
|
56 if (grepl('^-s=',each.arg)) {
|
1
|
57 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
|
0
|
58 if (! is.na(arg.split[2]) ) {
|
|
59 sFile <- arg.split[2]
|
|
60 } else {
|
|
61 stop('No chromosome size file')
|
|
62 }
|
|
63 }
|
|
64 if (grepl('^-rl=',each.arg)) {
|
1
|
65 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
|
0
|
66 if (! is.na(arg.split[2]) ) {
|
|
67 rl <- arg.split[2]
|
|
68 } else {
|
|
69 stop('Read length missing')
|
|
70 }
|
|
71 }
|
|
72 if (grepl('^-d=',each.arg)) {
|
1
|
73 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
|
0
|
74 if (! is.na(arg.split[2]) ) {
|
|
75 storeFile <- arg.split[2]
|
|
76 } else {
|
|
77 stop('No file to store result')
|
|
78 }
|
|
79 }
|
|
80 if (grepl('^-p=',each.arg)) {
|
1
|
81 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
|
0
|
82 if (! is.na(arg.split[2]) ) {
|
|
83 cornum <- as.numeric(arg.split[2])
|
|
84 } else {
|
|
85 stop('No number of cores given')
|
|
86 }
|
|
87 }
|
|
88 }
|
|
89
|
|
90 #Read in variables
|
|
91 chromosomes = read.table(sFile, header=FALSE)
|
|
92 chromName = chromosomes$V1 #which chromosome
|
|
93 chromSize = as.numeric(chromosomes$V2) #chromosome size
|
|
94 rm(chromosomes)
|
|
95
|
|
96 ibed = strsplit(ibed, ",", fixed = TRUE)[[1]]
|
|
97 rl = as.numeric(strsplit(rl, ",", fixed = TRUE)[[1]])
|
|
98 #=======================> DONE!
|
|
99
|
|
100
|
|
101 counting = function(readst, br){
|
|
102 return(hist(readst, breaks = br, plot = FALSE)$counts)
|
|
103 }
|
|
104
|
|
105
|
|
106 countingreads = function(bedfile, readlen, filelist) {
|
|
107 reads = read.table(bedfile, header = FALSE)
|
|
108 reads = cbind(as.character(reads[[1]]), as.character(reads[[2]]))
|
|
109 o = which(filelist == bedfile)
|
|
110 tri = list(as.numeric(reads[reads[,2] == "+",,drop = FALSE][,1]), (as.numeric(reads[reads[,2] == "-",,drop = FALSE][,1])) + readlen[o] - 1)
|
|
111 readcounts = mclapply(tri, counting, br = genomevec, mc.cores = cornum)
|
|
112 return(readcounts)
|
|
113 }
|
|
114
|
|
115 xc = function(countlist) {
|
|
116 crossCorrelation = ccf(countlist[[2]], countlist[[1]], plot = FALSE); #xcorr
|
|
117 crossCorrelation$lag = crossCorrelation$lag * 20; #correct lag for counts window
|
|
118 maxCorr = which.max(crossCorrelation$acf);
|
|
119 maxCorr = abs(crossCorrelation$lag[maxCorr]);
|
|
120 return(maxCorr)
|
|
121 }
|
|
122
|
|
123
|
|
124 # ==================
|
|
125 # Read Start Count
|
|
126 # ==================
|
|
127 #make the window vector (a vector with start positions of chromosome windows)
|
|
128 genomevec = seq(0, chromSize, by = 20);
|
|
129 if (max(genomevec) < chromSize) {
|
|
130 genomevec = append(genomevec, chromSize);
|
|
131 }
|
|
132 datain = mclapply(ibed, countingreads, readlen = rl, filelist = ibed, mc.cores = cornum)
|
|
133 #=======================> DONE!
|
|
134
|
|
135
|
|
136 # ==================
|
|
137 # Cross Correlation
|
|
138 # ==================
|
|
139 xcorr = unlist(mclapply(datain, xc, mc.cores = cornum))
|
|
140 #=======================> DONE!
|
|
141
|
|
142
|
|
143 # =====================
|
|
144 # Write result to File
|
|
145 # =====================
|
|
146 for (i in 1:length(xcorr)) {
|
|
147 filename = strsplit(ibed[i], "/", fixed = TRUE)[[1]]
|
|
148 filename = filename[length(filename)]
|
|
149 filename = strsplit(filename, ".", fixed = TRUE)[[1]]
|
|
150 filename = filename[3]
|
|
151 if ((xcorr[i] <= 500) && (xcorr[i] >= 50)) { #only write this to file if xcorr value was plausible
|
|
152 message(paste0(chromName, ", ", filename, ": Ok!"))
|
|
153 write(paste(chromName, xcorr[i], sep = "\t"), file = paste0(storeFile, "/xc.", filename, ".tab"), append = TRUE)
|
|
154 } else {
|
|
155 message(paste0(chromName, ", ", filename, ": Value Not Used!"))
|
|
156 }
|
|
157 }
|
|
158 #=======================> DONE!
|