annotate peakfinder.r @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.
author messersc
date Thu, 19 Feb 2015 05:39:45 -0500
parents d42f4d78c85e
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
1 ########################################################################
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
3 # Copyright (C) 2014-2015 Mahmoud Ibrahim
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
4 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
5 # This program is free software: you can redistribute it and/or modify
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
6 # it under the terms of the GNU General Public License as published by
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
7 # the Free Software Foundation, either version 3 of the License, or
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
8 # (at your option) any later version.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
9 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
10 # This program is distributed in the hope that it will be useful,
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
13 # GNU General Public License for more details.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
14 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
15 # You should have received a copy of the GNU General Public License
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
17 #
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
18 # Contact: mahmoud.ibrahim@mdc-berlin.de
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
19 ########################################################################
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
20
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
21
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
22
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
23
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
24 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
25 # User-defined variables
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
26 # =======================
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
27 samplingSeed = 1011414 #the seed makes the random sampling that JAMM does "deterministic". You can change this to your integer seed of choice to reproduce exact same results or randomize it this way: ceiling(rnorm(1,50,300)).
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
28 reportNoClust = "n" #report windows for which clustering failed? Reported windows will be marked by "NoClust" in the peak name (4th column).
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
29 cutoff = NA #To enforce an SNR cutoff ratio for bin enrichment calling, delete NA and enter the number you want.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
30 strict = 1 #To make bin enrichment calling more / less strict, increase / decrease this number.
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
31 meanAdjust = "n" #Adjust the initialization mean vector for each window before clustering? If you want to do so, change this to "y".
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
32 options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000).
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
33 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
34
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
35
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
36
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
37
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
38 # ================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
39 # Required Libraries check & load
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
40 # ================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
41 if ((is.element('mclust', installed.packages()[,1]) == FALSE) || (is.element('signal', installed.packages()[,1]) == FALSE) || (is.element('parallel', installed.packages()[,1]) == FALSE)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
42 stop("R package 'mclust', 'signal' and 'parallel' are required. Please install them!")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
43 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
44 suppressPackageStartupMessages(library("mclust"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
45 suppressPackageStartupMessages(library("signal"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
46 suppressPackageStartupMessages(library("parallel"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
47 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
48
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
49
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
50
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
51
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
52
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
53 # =================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
54 # Custom Functions
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
55 # =================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
56 #Get per-row Geometric mean (takes list, returns vectors, not lists!)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
57 geomeanL <- function(mat){
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
58 n = length(mat)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
59 if (n > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
60 mult = (mat[[1]])*(mat[[2]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
61 if (n > 2) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
62 for (i in 3:n) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
63 mult = mult*(mat[[i]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
64 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
65 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
66 mult = mult^(1/n)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
67 mat = mult
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
68 return(mat)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
69 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
70 return(mat[[1]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
71 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
72 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
73
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
74
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
75
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
76 #Get per-row Geometric mean (takes matrix, returns vectors, not lists!)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
77 geomean <- function(mat){
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
78 n = NCOL(mat)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
79 if (n > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
80 mult = (mat[,1])*(mat[,2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
81 if (n > 2) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
82 for (i in 3:n) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
83 mult = mult*(mat[,i])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
84 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
85 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
86 mult = mult^(1/n)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
87 mat = mult
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
88 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
89 return(mat)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
90 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
91
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
92
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
93
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
94 #Read in bed(st2) file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
95 parsein = function(bedfile) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
96 l = read.table(bedfile, header = FALSE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
97 l = l + 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
98 return(l)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
99 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
100
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
101
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
102 #Read in bedpe(st2) file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
103 parseinpe = function(bedfile) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
104 l = read.table(bedfile, header = FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
105 l = cbind((l[[1]] + 1), l[[2]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
106 return(l)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
107 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
108
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
109
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
110 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
111 countreads = function(bedfile, reads, frag, chromsize, filelist, chrcount) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
112
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
113 o = which(filelist == bedfile)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
114
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
115 counts = vector(mode = "numeric", length = chromsize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
116
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
117 for (j in 1:length(reads[[o]])) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
118 if ((reads[[o]][j]+frag[o]-1) <= chromsize) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
119 counts[(reads[[o]][j]):(reads[[o]][j]+frag[o]-1)] = counts[(reads[[o]][j]):(reads[[o]][j]+frag[o]-1)] + 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
120 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
121 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
122
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
123 mCount = mean(counts)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
124
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
125 if (chrcount == 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
126 counts = counts/mCount
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
127 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
128 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
129 meanCounts = mean(as.numeric(read.table(paste0(out, "/norma.", o, ".info"))[[1]]))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
130 if ((mCount > (5*meanCounts)) || (mCount < (0.2*meanCounts))) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
131 mCount = meanCounts
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
132 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
133 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"), append = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
134 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
135 counts = counts/mCount
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
136 }
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
137
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
138 return(counts)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
139 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
140
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
141
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
142
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
143 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
144 countreadspe = function(bedfile, reads, chromsize, filelist, chrcount) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
145
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
146 o = which(filelist == bedfile)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
147
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
148 counts = vector(mode = "numeric", length = chromsize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
149
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
150 for (j in 1:length(reads[[o]][,1])) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
151 counts[(reads[[o]][j,1]):(reads[[o]][j,2])] = counts[(reads[[o]][j,1]):(reads[[o]][j,2])] + 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
152 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
153
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
154 mCount = mean(counts)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
155
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
156 if (chrcount == 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
157 counts = counts/mCount
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
158 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
159 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
160 meanCounts = mean(as.numeric(read.table(paste0(out, "/norma.", o, ".info"))[[1]]))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
161 if ((mCount > (5*meanCounts)) || (mCount < (0.2*meanCounts))) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
162 mCount = meanCounts
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
163 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
164 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"), append = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
165 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
166 counts = counts/mCount
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
167 }
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
168
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
169 return(counts)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
170 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
171
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
172
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
173
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
174 #find enriched bins
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
175 pickbins = function(winStart, counts, binSize, chromSize, numdup, C, cutoff, strict, mCs, dCs, bkgd) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
176
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
177 if ((winStart + binSize) <= chromSize) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
178 winEnd = winStart + binSize
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
179 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
180 winEnd = chromSize
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
181 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
182 binSizeTemp = winEnd - winStart
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
183 tempend = winEnd - 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
184
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
185 #extract subset of the background
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
186 if (bkgd != "None") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
187 Cs = counts[[numdup+1]][winStart:tempend]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
188 mCs = mean(Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
189 dCs = sd(Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
190 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
191
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
192 go = rep(0, numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
193 for (g in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
194 mS = (mean(counts[[g]][winStart:tempend]))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
195 ratio = mS/dCs
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
196 if ((mS > (mCs * strict)) && (ratio > cutoff)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
197 go[g] = 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
198 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
199 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
200 veep = sum(go)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
201 return(veep)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
202 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
203
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
204
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
205
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
206
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
207 #find enriched wins
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
208 pickwins = function(winStart, coffeeshopSud, counts, numdup, startlist, winSize) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
209
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
210 plz = which(startlist == winStart)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
211 winEnd = coffeeshopSud[plz]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
212 rWinSize = winEnd - winStart + 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
213
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
214
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
215 if(rWinSize >= winSize) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
216 mS = rep(0, numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
217 for (g in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
218 mS[g] = (mean(counts[[g]][winStart:winEnd]))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
219 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
220 veep = mean(mS)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
221 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
222 veep = FALSE
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
223 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
224
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
225 return(veep)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
226 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
227
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
228
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
229
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
230 #score windows for fast analysis
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
231 scorewindow = function(winStart, coffeeshopSud, numdup, C, bkgd, counts, startlist) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
232
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
233 plz = which(startlist == winStart)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
234 winEnd = coffeeshopSud[plz]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
235
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
236 #will store peak information
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
237 writethis = list()
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
238
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
239 rWinSizeTemp = winEnd - winStart + 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
240
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
241 #extract subset of the IP
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
242 Rs = matrix(nrow = rWinSizeTemp, ncol = numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
243 Rsr = Rs
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
244 for (j in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
245 Rsr[,j] = counts[[j]][winStart:winEnd]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
246 Rs[,j] = filtfilt(rep(1,80)/80,1,Rsr[,j])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
247 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
248 #extract subset of the background
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
249 if (bkgd != "None") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
250 Cs = counts[[numdup+1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
251 Cmin = min(Cs[Cs > 0])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
252 Cs = Cs[winStart:winEnd]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
253 Cs = filtfilt(rep(1,80)/80,1,Cs) + Cmin #gets rid of Inf in the fold change
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
254 } else {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
255 set.seed(samplingSeed)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
256 Cs = sample(C, rWinSizeTemp, replace = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
257 Cs = filtfilt(rep(1,80)/80,1,Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
258 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
259
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
260 #start scoring
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
261 signal = (geomean(Rs))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
262 cairo = (mean(signal)) / (mean(Cs))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
263 return(cairo)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
264 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
265
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
266
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
267 #Initialize MClust clustering parameters
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
268 smoothcounts = function(winStart, coffeeshopSud, numdup, counts, startlist) { #helper function1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
269
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
270 plz = which(startlist == winStart)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
271 winEnd = coffeeshopSud[plz]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
272
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
273 #extract subset of the IP
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
274 Rs = matrix(0, nrow = (winEnd - winStart + 1), ncol = numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
275 for (j in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
276 Rs[,j] = counts[[j]][winStart:winEnd]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
277 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
278 #smooth extended read counts
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
279 for (j in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
280 Rs[,j] = filtfilt(rep(1,80)/80,1,Rs[,j])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
281 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
282 return(Rs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
283 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
284 cluster = function(model, sig, init, clustnummer, noise) { #helper function2
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
285 set.seed(samplingSeed)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
286 noisy = sample(noise, length(sig[,1]), replace = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
287 clust = me(model, sig+noisy, init)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
288 bicc = bic(model, clust$loglik, length(sig[,1]), length(sig[1,]), clustnummer)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
289 out = list(bicc = bicc, param = clust$parameters)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
290 return(out)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
291 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
292 initparam = function(coffeeshopNord, coffeeshopSud, numdup, counts, cornum, clustnummer, modelnames, noise) { #main function
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
293
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
294 n = length(coffeeshopNord)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
295 #smooth extended read counts
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
296 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
297 sig = mclapply(coffeeshopNord, smoothcounts, coffeeshopSud, numdup, counts, startlist = coffeeshopNord, mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
298 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
299 sig = lapply(coffeeshopNord, smoothcounts, coffeeshopSud, numdup, counts, startlist = coffeeshopNord)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
300 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
301 sig = do.call(rbind, sig)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
302
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
303 #kmeans initialization
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
304 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
305 init = kmeans(sig, clustnummer, nstart = 20)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
306 init = unmap(init$cluster)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
307
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
308
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
309 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
310 param = mclapply(modelnames, cluster, sig, init, clustnummer, noise, mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
311 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
312 param = lapply(modelnames, cluster, sig, init, clustnummer, noise)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
313 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
314
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
315 bicc = vector(mode = "numeric", length = length(modelnames))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
316 for (i in 1:length(modelnames)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
317 bicc[i] = as.numeric(param[[i]]$bicc)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
318 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
319 bicc = which.max(bicc)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
320
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
321 out = list(initparam = param[[bicc]]$param, modelname = modelnames[bicc])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
322 return(out)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
323 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
324
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
325
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
326
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
327
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
328 #find peaks
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
329 findpeak = function(winStart, coffeeshopSud, numdup, C, param, bkgd, resol, counts, noise, startlist, meanAdjust, clustnummer) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
330
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
331
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
332 plz = which(startlist == winStart)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
333 winEnd = coffeeshopSud[plz]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
334
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
335
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
336 #will store peak information
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
337 writethis = list()
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
338 ccx = 1 #default is clustering didNOT work
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
339
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
340
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
341 rWinSizeTemp = winEnd - winStart + 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
342
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
343 #extract subset of the IP
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
344 Rs = matrix(nrow = rWinSizeTemp, ncol = numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
345 Rsr = Rs
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
346 if (meanAdjust == "y") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
347 for (j in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
348 Rsr[,j] = counts[[j]][winStart:winEnd]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
349 Rs[,j] = filtfilt(rep(1,80)/80,1,Rsr[,j])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
350 kabel = which.max(param$init$mean[j,])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
351 param$initparam$mean[j,kabel] = mean(Rs[,j])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
352 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
353 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
354 for (j in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
355 Rsr[,j] = counts[[j]][winStart:winEnd]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
356 Rs[,j] = filtfilt(rep(1,80)/80,1,Rsr[,j])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
357 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
358 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
359
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
360 if (resol != "window") {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
361
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
362 #clustering (take 1)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
363 take = 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
364 set.seed(samplingSeed)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
365 noisy = sample(noise, rWinSizeTemp, replace = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
366 clust = em(param$modelname, Rs+noisy, param$initparam)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
367 clust$classification = map(clust$z)
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
368 if (!((any(diff(clust$classification)) != 0) && (!(any(is.na(clust$classification)))))) { #clustering didn't work, take1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
369
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
370 #repeat clustering from scratch, take 2!
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
371 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
372 init = kmeans(Rs, clustnummer, nstart = 20)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
373 init = unmap(init$cluster)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
374 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
375 noisy = sample(noise, rWinSizeTemp, replace = TRUE)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
376 clust = me(param$modelname, Rs+noisy, init)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
377 clust$classification = map(clust$z)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
378 if ((any(diff(clust$classification)) != 0) && (!(any(is.na(clust$classification))))) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
379 ccx = 0 #clustering worked, take2
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
380 take = 2
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
381 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
382 } else {ccx = 0} #clustering worked, take1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
383
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
384 if (ccx != 1) { #clustering worked either in take1 or take2
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
385
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
386 if (numdup > 1) { #check whether all components replicates agreed on clustering assignments
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
387 cc = vector(mode = "numeric", length = numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
388 for (g in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
389 cc[g] = which.max(clust$parameters$mean[g,]) #which cluster has the largest mean (this is the peak cluster, hopefully!)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
390 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
391 ccx = sum(diff(cc))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
392 cluster = cc[1]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
393 rm(cc)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
394
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
395 if ((ccx != 0) && (take == 1)) { #not all replicates agreed? Repeat the clustering with from scratch if not already done!
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
396 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
397 init = kmeans(Rs, clustnummer, nstart = 20)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
398 init = unmap(init$cluster)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
399 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
400 noisy = sample(noise, rWinSizeTemp, replace = TRUE)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
401 clust = me(param$modelname, Rs+noisy, init)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
402 clust$classification = map(clust$z)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
403 if ((any(diff(clust$classification)) != 0) && (!(any(is.na(clust$classification))))) { #clustering worked? check whether replicates agreed take 3
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
404 cc = vector(mode = "numeric", length = numdup)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
405 for (g in 1:numdup) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
406 cc[g] = which.max(clust$parameters$mean[g,]) #which cluster has the largest mean (this is the peak cluster, hopefully!)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
407 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
408 ccx = sum(diff(cc))
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
409 cluster = cc[1]
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
410 rm(cc)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
411 take = 3
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
412 }
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
413 }
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
414 } else { #no replicates!
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
415 cluster = which.max(clust$parameters$mean) #which cluster has the largest mean (this is the peak cluster, hopefully!)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
416 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
417 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
418
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
419 if ((ccx != 0) && (reportNoClust=="y")) { resol = "window" } #clustering did not work and windows should be reported
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
420
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
421
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
422 if (ccx == 0) { #clustering worked and all replicates agree on the cluster assignments
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
423
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
424 #extract subset of the background
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
425 if (bkgd != "None") {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
426 Cs = counts[[numdup+1]][winStart:winEnd]
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
427 Cs = filtfilt(rep(1,80)/80,1,Cs)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
428 } else {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
429 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
430 Cs = sample(C, rWinSizeTemp, replace = TRUE)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
431 Cs = filtfilt(rep(1,80)/80,1,Cs)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
432 }
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
433
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
434 #find region boundaries
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
435 loc = 1:length(clust$classification)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
436 gmclass = cbind(loc, clust$classification)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
437 locPeak = gmclass[gmclass[,2] == cluster,,drop=FALSE]
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
438 rStart = locPeak[1] #start position of the region
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
439 rEnd = locPeak[length(locPeak[,1]),1] #end position of the region
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
440
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
441 #peak resolution check
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
442 if (resol == "region") {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
443 pSize = rEnd - rStart
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
444 signal = (geomean(Rs[rStart:rEnd,]))
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
445 signal2 = (signal) - (Cs[rStart:rEnd])
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
446 gm = mean(signal2)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
447 summit = which.max(geomean(Rsr[rStart:rEnd,])) - 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
448 will2k = wilcox.test(signal, Cs[rStart:rEnd])
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
449
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
450 #Is there signal in the region above background
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
451 if (gm > 0) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
452 writethis[[1]] = rStart + winStart - 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
453 writethis[[2]] = rEnd + winStart
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
454 writethis[[3]] = paste0(chromName, ".", rStart+winStart -1)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
455 writethis[[4]] = "1000"
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
456 writethis[[5]] = "."
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
457 writethis[[6]] = gm
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
458 writethis[[7]] = will2k$p.value
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
459 writethis[[8]] = "-1"
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
460 writethis[[9]] = summit
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
461 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
462 } else if (resol == "peak") {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
463 #find out where separate peaks are
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
464 d = diff(locPeak[,1])
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
465 d[length(d)+1] = 0
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
466 locPeak = cbind(locPeak, d)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
467 bound1 = which(locPeak[,3] > 1, arr.in=TRUE)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
468 bound2 = bound1 + 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
469 bound = locPeak[sort(c(bound1,bound2))]
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
470 bound = c(rStart, bound, rEnd)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
471 w = 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
472 warum = 0
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
473 while (w < length(bound)) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
474 pStart = bound[w] + winStart - 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
475 pEnd = bound[w+1] + winStart
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
476 pSize = pEnd - pStart
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
477 signal = (geomean(Rs[(bound[w]):(bound[w+1]),]))
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
478 signal2 = (signal) - (Cs[bound[w]:bound[w+1]])
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
479 gm = mean(signal2)
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
480 summit = which.max(geomean(Rsr[(bound[w]):(bound[w+1]),])) - 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
481 will2k = wilcox.test(signal, Cs[(bound[w]):(bound[w+1])])
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
482
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
483 weil = warum * 9
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
484 #Is there signal in the region above background
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
485 if (gm > 0) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
486 writethis[[1+weil]] = pStart
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
487 writethis[[2+weil]] = pEnd
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
488 writethis[[3+weil]] = paste0(chromName, ".", pStart)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
489 writethis[[4+weil]] = "1000"
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
490 writethis[[5+weil]] = "."
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
491 writethis[[6+weil]] = gm
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
492 writethis[[7+weil]] = will2k$p.value
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
493 writethis[[8+weil]] = "-1"
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
494 writethis[[9+weil]] = summit
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
495 }
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
496 w = w + 2
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
497 warum = warum + 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
498 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
499 } #peak resolution check
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
500 } #clustering worked and all replicates agree on clustering assignments?
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
501 } #window resolution check
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
502
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
503 if (resol == "window") {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
504
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
505 #extract subset of the background
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
506 if (bkgd != "None") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
507 Cs = counts[[numdup+1]][winStart:winEnd]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
508 Cs = filtfilt(rep(1,80)/80,1,Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
509 } else {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
510 set.seed(samplingSeed)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
511 Cs = sample(C, rWinSizeTemp, replace = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
512 Cs = filtfilt(rep(1,80)/80,1,Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
513 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
514
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
515 #calculate scores
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
516 pSize = rWinSizeTemp
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
517 signal = geomean(Rs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
518 signal2 = (signal) - (Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
519 gm = mean(signal2)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
520 summit = which.max(geomean(Rsr)) - 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
521 will2k = wilcox.test(signal, Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
522
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
523 #Is there signal in the region above background
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
524 if (gm > 0) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
525 writethis[[1]] = winStart - 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
526 writethis[[2]] = winEnd
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
527 writethis[[3]] = paste0(chromName, ".", winStart -1, ".NoClust")
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
528 writethis[[4]] = "1000"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
529 writethis[[5]] = "."
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
530 writethis[[6]] = gm
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
531 writethis[[7]] = will2k$p.value
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
532 writethis[[8]] = "-1"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
533 writethis[[9]] = summit
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
534 }
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
535 } #window reporting
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
536
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
537 return(writethis)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
538 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
539
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
540
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
541 #filter return value of findpeak()
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
542 processPeaks = function(peaks) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
543 peaks = matrix(unlist(peaks), ncol=9, byrow=TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
544 peaks = peaks[peaks[,1] != FALSE,,drop=FALSE]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
545 peaks = data.frame(peaks)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
546 return(peaks)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
547 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
548 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
549
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
550
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
551
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
552
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
553
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
554
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
555
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
556
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
557
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
558 # ==========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
559 # Parse-in System Variables
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
560 # ==========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
561 args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
562
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
563 #Parsing arguments and storing values
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
564 for (each.arg in args) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
565 #chormosome size file
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
566 if (grepl('-sfile=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
567 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
568 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
569 size.file <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
570 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
571 stop('No genome size file')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
572 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
573 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
574 #bed file names
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
575 if (grepl('-bednames=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
576 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
577 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
578 bednames <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
579 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
580 stop('No bed file names')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
581 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
582 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
583 #bed files directory
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
584 if (grepl('-frag=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
585 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
586 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
587 frag <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
588 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
589 stop('No fragment length given')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
590 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
591 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
592 #bakcground files directory
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
593 if (grepl('-bkgd=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
594 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
595 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
596 bkgd <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
597 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
598 stop('No background file')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
599 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
600 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
601 #bakcground files directory
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
602 if (grepl('-out=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
603 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
604 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
605 out <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
606 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
607 stop('No output directory given')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
608 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
609 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
610 #Cluster number
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
611 if (grepl('-clustnummer=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
612 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
613 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
614 clustnummer <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
615 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
616 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
617 #resolution
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
618 if (grepl('-resolution=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
619 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
620 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
621 resol <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
622 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
623 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
624 #processor cores
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
625 if (grepl('-p=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
626 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
627 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
628 cornum <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
629 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
630 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
631 #minimum window size
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
632 if (grepl('-window=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
633 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
634 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
635 winSize <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
636 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
637 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
638 #window size
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
639 if (grepl('-bin=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
640 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
641 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
642 binsize <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
643 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
644 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
645 #type (paired / single)
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
646 if (grepl('-type=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
647 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
648 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
649 type <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
650 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
651 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
652 #chromosome number
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
653 if (grepl('-chrcount=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
654 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
655 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
656 chrcount <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
657 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
658 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
659 #window enrichment cutoff
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
660 if (grepl('-windowe=',each.arg)) {
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
661 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
662 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
663 windowe <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
664 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
665 }
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
666 #initialize
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
667 if (grepl('-initModel=',each.arg)) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
668 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
669 if (! is.na(arg.split[2]) ) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
670 initialize <- arg.split[2]
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
671 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
672 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
673
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
674 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
675
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
676 ##Parse in variables
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
677 chromosomes = read.table(size.file, header=FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
678 chromName = chromosomes$V1; #which chromosome
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
679 chromSize = chromosomes$V2; #chromosomes size
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
680 rm(chromosomes)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
681
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
682 if (chrcount == 1) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
683 write(paste(samplingSeed), file = paste0(out, "/seed.info"))
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
684 } else {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
685 samplingSeed = as.numeric(read.table(paste0(out, "/seed.info"), header = FALSE)[[1]])
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
686 }
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
687 readsFiles = as.list(strsplit(bednames, ",", fixed = TRUE)[[1]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
688 numdup = length(readsFiles) #number of replicates
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
689 if (bkgd != "None") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
690 readsFiles[[numdup+1]] = bkgd
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
691 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
692 winSize = as.numeric(winSize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
693 binSize = as.numeric(binsize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
694 winSize = binSize * winSize
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
695
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
696 if (type == "single") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
697 frags = as.numeric(strsplit(frag, ",", fixed = TRUE)[[1]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
698 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
699 rm(bednames)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
700
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
701 if (numdup > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
702 modelnames = c("VVV","VEV")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
703 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
704 modelnames = "V"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
705 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
706
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
707 if (windowe != "auto") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
708 windowe = as.numeric(windowe)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
709 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
710
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
711 nothing = FALSE #default is we found some peaks
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
712 options(stringsAsFactors = FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
713 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
714
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
715
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
716
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
717
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
718
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
719 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
720 # Some preliminary stuff
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
721 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
722 if (type == "single") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
723 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
724 datain = mclapply(readsFiles, parsein, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
725 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
726 datain = lapply(readsFiles, parsein) #read in all bed files (samples and control)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
727 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
728 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
729
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
730 if (type == "paired") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
731 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
732 datain = mclapply(readsFiles, parseinpe, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
733 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
734 datain = lapply(readsFiles, parseinpe) #read in all bed files (samples and control)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
735 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
736 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
737
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
738 #minimum peak size (only a recommendation)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
739 minpeak = floor(binSize / 4)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
740
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
741 #make bins vector
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
742 bins = seq(from = 1, to = (chromSize - 1), by = binSize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
743 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
744
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
745
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
746
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
747
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
748 # ===============
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
749 # Counting Reads
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
750 # ===============
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
751 if (type == "single") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
752 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
753 counts = mclapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount, mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
754 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
755 counts = lapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
756 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
757 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
758
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
759 if (type == "paired") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
760 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
761 counts = mclapply(readsFiles, countreadspe, reads = datain, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount, mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
762 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
763 counts = lapply(readsFiles, countreadspe, reads = datain, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
764 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
765 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
766
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
767 rm(datain)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
768 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
769
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
770
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
771
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
772
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
773 # ============================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
774 # Estimating Background Model
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
775 # ============================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
776 if (chrcount == 1){ #first chromosome, estimate bkgd (includes SNR cutoff)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
777 if (is.na(cutoff)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
778 if (bkgd != "None") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
779 cutoff = vector(length = numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
780 sdC = sd(counts[[numdup+1]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
781 for (x in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
782 cutoff[x] = (mean(counts[[x]]))/(sdC)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
783 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
784 cutoff = max(cutoff)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
785 C = NULL
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
786 mCs = NULL
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
787 write(paste(c(cutoff,NA,NA)), file = paste0(out, "/bkgd.info"), append = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
788 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
789 cutoff = vector(length = numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
790 mmV = var(geomeanL(counts))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
791 mmM = mean(geomeanL(counts))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
792 sigma = log(1+((mmV) / ((mmM)^2)))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
793 mu = (log(mmM)) - (0.5 * (sigma))
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
794 set.seed(samplingSeed)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
795 C = rlnorm(100000, mu, sqrt(sigma))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
796 for (x in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
797 cutoff[x] = (mean(counts[[x]]))/(sd(C))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
798 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
799 cutoff = max(cutoff)
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
800 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
801 snow = sample(C, binSize*5, replace = TRUE)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
802 mCs = mean(snow)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
803 dCs = sd(snow)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
804 write(paste(c(cutoff,sigma,mu)), file = paste0(out, "/bkgd.info"), append = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
805 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
806 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
807 } else { #bkgd estiamted from before
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
808 bkgdInfo = read.table(paste0(out, "/bkgd.info"), header = FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
809 if (is.na(cutoff)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
810 cutoff = as.numeric(bkgdInfo[[1]][1])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
811 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
812
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
813 if (bkgd != "None") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
814 C = NULL
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
815 mCs = NULL
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
816 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
817 sigma = as.numeric(bkgdInfo[[1]][2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
818 mu = as.numeric(bkgdInfo[[1]][3])
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
819 set.seed(samplingSeed)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
820 C = rlnorm(100000, mu, sqrt(sigma))
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
821 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
822 snow = sample(C, binSize*5, replace = TRUE)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
823 mCs = mean(snow)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
824 dCs = sd(snow)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
825 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
826 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
827 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
828
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
829
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
830
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
831
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
832 # ========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
833 # Picking Enriched Windows
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
834 # ========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
835 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
836 coffeeshop = mclapply(bins, pickbins, counts, binSize, chromSize, numdup, C, cutoff, strict, mCs, dCs, bkgd, mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
837 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
838 coffeeshop = lapply(bins, pickbins, counts, binSize, chromSize, numdup, C, cutoff, strict, mCs, dCs, bkgd)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
839 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
840 coffeeshop = as.numeric(unlist(coffeeshop))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
841 coffeeshop[coffeeshop != numdup] = 0
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
842
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
843 if (sum(coffeeshop) != 0) { #Any enriched bins?
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
844
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
845 coffeeshop = c(0, diff(coffeeshop))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
846 coffeeshop = cbind(coffeeshop, bins)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
847 coffeeshopNord = coffeeshop[coffeeshop[,1] == numdup,,drop=FALSE]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
848 coffeeshopSud = coffeeshop[coffeeshop[,1] == -numdup,,drop=FALSE]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
849 coffeeshopNord = coffeeshopNord[,2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
850 coffeeshopSud = coffeeshopSud[,2] - 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
851 if (length(coffeeshopSud) < length(coffeeshopNord)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
852 coffeeshopSud = c(coffeeshopSud, chromSize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
853 } else if (length(coffeeshopSud) > length(coffeeshopNord)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
854 coffeeshopNord = c(1, coffeeshopNord)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
855 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
856 if (coffeeshopSud[length(coffeeshopSud)] > chromSize) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
857 coffeeshopSud[length(coffeeshopSud)] = chromSize
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
858 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
859
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
860
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
861 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
862 coffeeshop = mclapply(coffeeshopNord, pickwins, coffeeshopSud, counts, numdup, startlist = coffeeshopNord, winSize, mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
863 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
864 coffeeshop = lapply(coffeeshopNord, pickwins, coffeeshopSud, counts, numdup, startlist = coffeeshopNord, winSize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
865 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
866 coffeeshop = as.numeric(unlist(coffeeshop))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
867 coffeeshop = cbind(coffeeshopNord, coffeeshopSud, coffeeshop)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
868 coffeeshop = coffeeshop[coffeeshop[,3] != FALSE,,drop=FALSE]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
869 coffeeshop = coffeeshop[order(coffeeshop[,3], decreasing = TRUE),]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
870 rm(bins)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
871 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
872
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
873
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
874
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
875
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
876 # ===================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
877 # Initializing Clustering Parameters
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
878 # ===================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
879 if (length(coffeeshop[,1]) > 0) { #any enriched windows detected?
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
880
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
881 if (initialize == "deterministic") {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
882 yummy = ceiling(length(coffeeshop[,1]) / 1000)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
883 if (yummy == 0) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
884 yummy = 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
885 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
886 }
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
887 if (initialize == "stochastic") {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
888 yummy = ceiling(length(coffeeshop[,1]) / 4)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
889 if (yummy > 20) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
890 set.seed(samplingSeed)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
891 yummy = sample(1:yummy, 20)
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
892 } else if (yummy > 0) {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
893 yummy = 1:yummy
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
894 } else {
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
895 yummy = 1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
896 }
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
897 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
898 coffeeshopNord = coffeeshop[yummy,1]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
899 coffeeshopSud = coffeeshop[yummy,2]
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
900 set.seed(samplingSeed)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
901 noise = rnorm(100000, mean=0, sd=0.1)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
902 param = initparam(coffeeshopNord, coffeeshopSud, numdup, counts, cornum, clustnummer, modelnames, noise)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
903 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
904
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
905
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
906
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
907
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
908
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
909 # ==========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
910 # Enriched Window Filtering
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
911 # ==========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
912 if (windowe != 1) { #do it only if window fold enrichment filtering is required
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
913 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
914 scores = mclapply(coffeeshop[,1], scorewindow, coffeeshop[,2], numdup, C, bkgd, counts, startlist = coffeeshop[,1], mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
915 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
916 scores = lapply(coffeeshop[,1], scorewindow, coffeeshop[,2], numdup, C, bkgd, counts, startlist = coffeeshop[,1])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
917 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
918 scores = unlist(scores)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
919
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
920 if (windowe == "auto") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
921 lscores = log(scores)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
922 if (length(scores) > 0) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
923 if (chrcount == 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
924 cutthisTEMP = ((mean(lscores)) + (sd(lscores)*1))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
925 write(paste(cutthisTEMP), file = paste0(out, "/bkgd.info"), append = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
926 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
927 cutthisTEMP = as.numeric(bkgdInfo[[1]][4])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
928 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
929 finalwins = which(lscores > cutthisTEMP)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
930 cutthisW = min(scores[finalwins])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
931 coffeeshop = coffeeshop[finalwins,]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
932 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
933 if (chrcount == 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
934 cutthisTEMP = 0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
935 cutthisW = "Not Applicable, All Windows Analyzed!"
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
936 write(paste(cutthisTEMP), file = paste0(out, "/bkgd.info"), append = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
937 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
938 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
939 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
940 cutthisW = windowe
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
941 if (length(scores) > 0) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
942 finalwins = which(scores >= windowe)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
943 coffeeshop = coffeeshop[finalwins,]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
944 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
945 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
946 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
947 else { cutthisW = 1 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
948 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
949
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
950
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
951
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
952
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
953
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
954 # ==============
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
955 # Finding Peaks
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
956 # ==============
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
957 if (length(coffeeshop[,1]) > 0) { #any enriched windows left after filtering?
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
958 coffeeshop = coffeeshop[,-3]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
959 if (cornum > 1) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
960 peaks = mclapply(coffeeshop[,1], findpeak, coffeeshop[,2], numdup, C, param, bkgd, resol, counts, noise, startlist = coffeeshop[,1], meanAdjust, clustnummer, mc.cores = cornum, mc.preschedule = TRUE)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
961 } else {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
962 peaks = lapply(coffeeshop[,1], findpeak, coffeeshop[,2], numdup, C, param, bkgd, resol, counts, noise, startlist = coffeeshop[,1], meanAdjust, clustnummer)
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
963 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
964 if (!(is.null(peaks))) { #any peaks discovered?
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
965 writethis = processPeaks(peaks)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
966 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
967
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
968
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
969
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
970
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
971
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
972 # =========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
973 # Writing Peak Information
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
974 # =========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
975 } else { nothing = TRUE } #no peaks
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
976 } else { nothing = TRUE } #no enriched windows left after filtering
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
977 } else { nothing = TRUE } #no enriched widnows discovered
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
978 } else { nothing = TRUE; cutthisW = windowe } #no enriched bins discovered
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
979
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
980 if (isTRUE(nothing)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
981 file.create(paste0(out, "/", chromName, ".peaks.bed"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
982 write(paste(chromName, minpeak, sep = " "), file = paste0(out, "/min.peaksize"), append=TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
983
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
984 if (chrcount == 1) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
985 message(paste0("No peaks found! - Window Fold Enrichment: ", cutthisW, " - Seed: ", samplingSeed))
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
986 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
987 message("No peaks found!")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
988 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
989 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
990 write(paste(chromName, writethis$X1, writethis$X2, writethis$X3, writethis$X4, writethis$X5, writethis$X6, writethis$X7, writethis$X8, writethis$X9, minpeak, sep = " "), file = paste0(out, "/", chromName, ".peaks.bed"), ncolumns = 1)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
991 write(paste(chromName, minpeak, sep = " "), file = paste0(out, "/min.peaksize"), append=TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
992
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
993
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
994 if (chrcount == 1) {
1
243f75d0ed6e Uploaded.
messersc
parents: 0
diff changeset
995 message(paste0("Done! - Window Fold Enrichment: ", cutthisW, " - Seed: ", samplingSeed))
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
996 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
997 message("Done!")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
998 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
999 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
1000 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
1001
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
1002