1
|
1 ########################################################################
|
|
2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
|
|
3 # Copyright (C) 2014-2015 Mahmoud Ibrahim
|
|
4 #
|
|
5 # This program is free software: you can redistribute it and/or modify
|
|
6 # it under the terms of the GNU General Public License as published by
|
|
7 # the Free Software Foundation, either version 3 of the License, or
|
|
8 # (at your option) any later version.
|
|
9 #
|
|
10 # This program is distributed in the hope that it will be useful,
|
|
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
13 # GNU General Public License for more details.
|
|
14 #
|
|
15 # You should have received a copy of the GNU General Public License
|
|
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
17 #
|
|
18 # Contact: mahmoud.ibrahim@mdc-berlin.de
|
|
19 ########################################################################
|
0
|
20
|
|
21
|
|
22
|
|
23 # =======================
|
|
24 # User-defined variables
|
|
25 # =======================
|
|
26 options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000)
|
|
27 #=======================> DONE!
|
|
28
|
|
29
|
|
30
|
|
31
|
|
32 # ==============================================
|
|
33 # Parsing Arguments (source: phantom SPP script)
|
|
34 # ==============================================
|
|
35 args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line
|
|
36
|
|
37
|
|
38 #Parsing arguments and storing values
|
|
39 for (each.arg in args) {
|
|
40 #bed file names
|
|
41 if (grepl('^-filelist=',each.arg)) {
|
|
42 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
|
|
43 if (! is.na(arg.split[2]) ) {
|
|
44 bednames <- arg.split[2]
|
|
45 } else {
|
|
46 stop('No bed file names')
|
|
47 }
|
|
48 }
|
|
49 }
|
|
50 options(stringsAsFactors = FALSE)
|
|
51 #=======================> DONE!
|
|
52
|
|
53
|
|
54
|
|
55 # =================
|
|
56 # Custom Functions
|
|
57 # =================
|
|
58 score <- function(x){
|
|
59 x = ((x-min(x))/(max(x)-min(x)))*10000
|
|
60 return(x)
|
|
61 }
|
|
62
|
|
63 fixnpf = function(bedfile) {
|
|
64
|
|
65 writethis = read.table(bedfile, header=FALSE)
|
|
66
|
|
67
|
|
68 message(paste0("Minimum Peak Width Used to Produce Filtered List: ", writethis$V11[1]))
|
|
69 writethis$V9 = p.adjust(writethis$V8, method = "BH")
|
|
70 writethis$V8[writethis$V8 == 0] = 10
|
|
71 writethis$V8[writethis$V8 == 10] = min(writethis$V8)
|
|
72 writethis$V8 = -(log10(writethis$V8))
|
|
73 writethis$V9[writethis$V9 == 0] = 10
|
|
74 writethis$V9[writethis$V9 == 10] = min(writethis$V9)
|
|
75 writethis$V9 = -(log10(writethis$V9))
|
|
76 writethis$V7 = as.numeric(writethis$V7) * writethis$V9
|
|
77 writethis$V5 = score(as.numeric(writethis$V7))
|
|
78
|
|
79
|
|
80 #find peak score cutoff (pareto distribution)
|
|
81 sizes = writethis$V3 - writethis$V2
|
|
82 scores = writethis$V7
|
|
83 ss = cbind(sizes, scores)
|
|
84 ss = ss[ss[,1] > writethis$V11[1],,drop=FALSE]
|
|
85 scores = ss[,2]
|
|
86 scores = scores[scores > 0]
|
|
87 num = length(scores)
|
|
88 if (length(scores) > 300000) {
|
|
89 scores = scores[1:300000]
|
|
90 num = 300000
|
|
91 }
|
|
92 den = log(min(scores))
|
|
93 den = sum(log(scores) - den)
|
|
94 alpha = num / den
|
|
95 geom = (exp(1/alpha)) * (min(scores))
|
|
96 message(paste0("Minimum Peak Score Used to Produce Filtered List: ", geom))
|
|
97
|
|
98 #done now write something
|
|
99 write(paste(writethis$V1, writethis$V2, writethis$V3, writethis$V4, writethis$V5, ".", writethis$V7, writethis$V8, writethis$V9, writethis$V10, writethis$V11, geom, sep = "\t"), file = bedfile, ncolumns = 1)
|
|
100 return(bedfile)
|
|
101 }
|
|
102 #=======================> DONE!
|
|
103
|
|
104
|
|
105
|
|
106 # ==================
|
|
107 # Fixing narrowpeak
|
|
108 # ==================
|
|
109 bedfiles = fixnpf(bednames) #read in all bed files (samples and control)
|
|
110 #=======================> DONE!
|