annotate peakhelper.r @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.
author messersc
date Thu, 19 Feb 2015 05:39:45 -0500
parents d42f4d78c85e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
1 ########################################################################
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
3 # Copyright (C) 2014-2015 Mahmoud Ibrahim
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
4 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
5 # This program is free software: you can redistribute it and/or modify
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
6 # it under the terms of the GNU General Public License as published by
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
7 # the Free Software Foundation, either version 3 of the License, or
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
8 # (at your option) any later version.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
9 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
10 # This program is distributed in the hope that it will be useful,
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
13 # GNU General Public License for more details.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
14 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
15 # You should have received a copy of the GNU General Public License
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
17 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
18 # Contact: mahmoud.ibrahim@mdc-berlin.de
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
19 ########################################################################
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
20
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
21
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
22
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
23 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
24 # User-defined variables
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
25 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
26 options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
27 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
28
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
29
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
30
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
31
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
32 # ==============================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
33 # Parsing Arguments (source: phantom SPP script)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
34 # ==============================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
35 args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
36
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
37
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
38 #Parsing arguments and storing values
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
39 for (each.arg in args) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
40 #bed file names
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
41 if (grepl('^-filelist=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
42 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] # split on =
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
43 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
44 bednames <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
45 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
46 stop('No bed file names')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
47 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
48 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
49 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
50 options(stringsAsFactors = FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
51 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
52
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
53
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
54
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
55 # =================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
56 # Custom Functions
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
57 # =================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
58 score <- function(x){
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
59 x = ((x-min(x))/(max(x)-min(x)))*10000
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
60 return(x)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
61 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
62
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
63 fixnpf = function(bedfile) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
64
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
65 writethis = read.table(bedfile, header=FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
66
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
67
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
68 message(paste0("Minimum Peak Width Used to Produce Filtered List: ", writethis$V11[1]))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
69 writethis$V9 = p.adjust(writethis$V8, method = "BH")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
70 writethis$V8[writethis$V8 == 0] = 10
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
71 writethis$V8[writethis$V8 == 10] = min(writethis$V8)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
72 writethis$V8 = -(log10(writethis$V8))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
73 writethis$V9[writethis$V9 == 0] = 10
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
74 writethis$V9[writethis$V9 == 10] = min(writethis$V9)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
75 writethis$V9 = -(log10(writethis$V9))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
76 writethis$V7 = as.numeric(writethis$V7) * writethis$V9
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
77 writethis$V5 = score(as.numeric(writethis$V7))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
78
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
79
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
80 #find peak score cutoff (pareto distribution)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
81 sizes = writethis$V3 - writethis$V2
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
82 scores = writethis$V7
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
83 ss = cbind(sizes, scores)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
84 ss = ss[ss[,1] > writethis$V11[1],,drop=FALSE]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
85 scores = ss[,2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
86 scores = scores[scores > 0]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
87 num = length(scores)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
88 if (length(scores) > 300000) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
89 scores = scores[1:300000]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
90 num = 300000
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
91 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
92 den = log(min(scores))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
93 den = sum(log(scores) - den)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
94 alpha = num / den
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
95 geom = (exp(1/alpha)) * (min(scores))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
96 message(paste0("Minimum Peak Score Used to Produce Filtered List: ", geom))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
97
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
98 #done now write something
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
99 write(paste(writethis$V1, writethis$V2, writethis$V3, writethis$V4, writethis$V5, ".", writethis$V7, writethis$V8, writethis$V9, writethis$V10, writethis$V11, geom, sep = "\t"), file = bedfile, ncolumns = 1)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
100 return(bedfile)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
101 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
102 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
103
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
104
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
105
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
106 # ==================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
107 # Fixing narrowpeak
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
108 # ==================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
109 bedfiles = fixnpf(bednames) #read in all bed files (samples and control)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
110 #=======================> DONE!