Mercurial > repos > messersc > jamm
comparison signalmaker.r @ 1:243f75d0ed6e draft default tip
Uploaded.
Includes new release 1.0.7 with fixed optional controls.
author | messersc |
---|---|
date | Thu, 19 Feb 2015 05:39:45 -0500 |
parents | d42f4d78c85e |
children |
comparison
equal
deleted
inserted
replaced
0:d42f4d78c85e | 1:243f75d0ed6e |
---|---|
1 ############################### | 1 ######################################################################## |
2 #### Signal Maker for NGS Data | 2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates. |
3 #### R script | 3 # Copyright (C) 2014-2015 Mahmoud Ibrahim |
4 ############################### | 4 # |
5 | 5 # This program is free software: you can redistribute it and/or modify |
6 | 6 # it under the terms of the GNU General Public License as published by |
7 # the Free Software Foundation, either version 3 of the License, or | |
8 # (at your option) any later version. | |
9 # | |
10 # This program is distributed in the hope that it will be useful, | |
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 # GNU General Public License for more details. | |
14 # | |
15 # You should have received a copy of the GNU General Public License | |
16 # along with this program. If not, see <http://www.gnu.org/licenses/>. | |
17 # | |
18 # Contact: mahmoud.ibrahim@mdc-berlin.de | |
19 ######################################################################## | |
7 | 20 |
8 | 21 |
9 # ======================= | 22 # ======================= |
10 # User-defined variables | 23 # User-defined variables |
11 # ======================= | 24 # ======================= |
74 parsein = function(bedfile) { | 87 parsein = function(bedfile) { |
75 reads = read.table(bedfile, header = FALSE)[[1]] | 88 reads = read.table(bedfile, header = FALSE)[[1]] |
76 return(reads) | 89 return(reads) |
77 } | 90 } |
78 | 91 |
79 | 92 #Read in bedpe(st2) |
93 parseinpe = function(bedfile) { | |
94 l = read.table(bedfile, header = FALSE) | |
95 l = cbind((l[[1]] + 1), l[[2]]) | |
96 return(l) | |
97 } | |
80 | 98 |
81 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats) | 99 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats) |
82 countreads = function(bedfile, reads, frag, chromsize, filelist) { | 100 countreads = function(bedfile, reads, frag, chromsize, filelist, chrcount) { |
83 | 101 |
84 o = which(filelist == bedfile) | 102 o = which(filelist == bedfile) |
85 | 103 |
86 counts = vector(mode = "numeric", length = chromsize) | 104 counts = vector(mode = "numeric", length = chromsize) |
87 | 105 |
108 | 126 |
109 return(counts) | 127 return(counts) |
110 } | 128 } |
111 | 129 |
112 | 130 |
131 | |
132 | |
133 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats / PAIRED END) | |
134 countreadspe = function(bedfile, reads, chromsize, filelist, chrcount) { | |
135 | |
136 o = which(filelist == bedfile) | |
137 | |
138 counts = vector(mode = "numeric", length = chromsize) | |
139 | |
140 for (j in 1:length(reads[[o]][,1])) { | |
141 counts[(reads[[o]][j,1]):(reads[[o]][j,2])] = counts[(reads[[o]][j,1]):(reads[[o]][j,2])] + 1 | |
142 } | |
143 | |
144 mCount = mean(counts) | |
145 | |
146 if (chrcount == 1) { | |
147 counts = counts/mCount | |
148 write(paste(mCount), file = paste0(out, "/norma.", o, ".info")) | |
149 } else { | |
150 meanCounts = mean(as.numeric(read.table(paste0(out, "/norma.", o, ".info"))[[1]])) | |
151 if ((mCount > (5*meanCounts)) || (mCount < (0.2*meanCounts))) { | |
152 mCount = meanCounts | |
153 } else { | |
154 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"), append = TRUE) | |
155 } | |
156 counts = counts/mCount | |
157 } | |
158 | |
159 return(counts) | |
160 } | |
113 | 161 |
114 | 162 |
115 #generate signal | 163 #generate signal |
116 getsig = function(winStart, winEnds, startList, stepSize, chromName) { | 164 getsig = function(winStart, winEnds, startList, stepSize, chromName) { |
117 | 165 |
185 } | 233 } |
186 #bed files directory | 234 #bed files directory |
187 if (grepl('^-frag=',each.arg)) { | 235 if (grepl('^-frag=',each.arg)) { |
188 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] | 236 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] |
189 if (! is.na(arg.split[2]) ) { | 237 if (! is.na(arg.split[2]) ) { |
190 frag <- arg.split[2] | 238 frag <- arg.split[2] |
191 } else { | 239 } |
192 stop('No fragment length given') | |
193 } | |
194 } | 240 } |
195 #bakcground files directory | 241 #bakcground files directory |
196 if (grepl('^-bkgd=',each.arg)) { | 242 if (grepl('^-bkgd=',each.arg)) { |
197 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] | 243 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] |
198 if (! is.na(arg.split[2]) ) { | 244 if (! is.na(arg.split[2]) ) { |
248 #chrcount | 294 #chrcount |
249 if (grepl('^-chrcount=',each.arg)) { | 295 if (grepl('^-chrcount=',each.arg)) { |
250 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] | 296 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] |
251 if (! is.na(arg.split[2]) ) { | 297 if (! is.na(arg.split[2]) ) { |
252 chrcount <- as.numeric(arg.split[2]) | 298 chrcount <- as.numeric(arg.split[2]) |
299 } | |
300 } | |
301 #alignment type | |
302 if (grepl('^-t=',each.arg)) { | |
303 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] | |
304 if (! is.na(arg.split[2]) ) { | |
305 type <- arg.split[2] | |
253 } | 306 } |
254 } | 307 } |
255 } | 308 } |
256 | 309 |
257 ##Parse in variables | 310 ##Parse in variables |
258 readsFiles = as.list(strsplit(bednames, ",", fixed = TRUE)[[1]]) | 311 readsFiles = as.list(strsplit(bednames, ",", fixed = TRUE)[[1]]) |
259 numdup = length(readsFiles) #number of replicates | 312 numdup = length(readsFiles) #number of replicates |
260 if (bkgd != "None") { | 313 if (bkgd != "None") { |
261 readsFiles[[numdup+1]] = bkgd | 314 readsFiles[[numdup+1]] = bkgd |
262 } | 315 } |
263 frags = as.numeric(strsplit(frag, ",", fixed = TRUE)[[1]]) | 316 if (type == "single") { |
317 frags = as.numeric(strsplit(frag, ",", fixed = TRUE)[[1]]) | |
318 } | |
264 rm(bednames) | 319 rm(bednames) |
265 | 320 |
266 options(stringsAsFactors = FALSE) | 321 options(stringsAsFactors = FALSE) |
267 #=======================> DONE! | 322 #=======================> DONE! |
268 | 323 |
276 regions = read.table(regions, header = FALSE) | 331 regions = read.table(regions, header = FALSE) |
277 regions = regions[regions[[1]] == chromName, , drop = FALSE] | 332 regions = regions[regions[[1]] == chromName, , drop = FALSE] |
278 regions = cbind(regions[[2]] + 1, regions[[3]]) | 333 regions = cbind(regions[[2]] + 1, regions[[3]]) |
279 | 334 |
280 #import read data | 335 #import read data |
281 if (cornum > 1) { | 336 if (type == "single") { |
282 datain = mclapply(readsFiles, parsein, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control) | 337 if (cornum > 1) { |
283 } else { | 338 datain = mclapply(readsFiles, parsein, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control) |
284 datain = lapply(readsFiles, parsein) #read in all bed files (samples and control) | 339 } else { |
340 datain = lapply(readsFiles, parsein) #read in all bed files (samples and control) | |
341 } | |
342 } | |
343 #import read data | |
344 if (type == "paired") { | |
345 if (cornum > 1) { | |
346 datain = mclapply(readsFiles, parseinpe, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control) | |
347 } else { | |
348 datain = lapply(readsFiles, parseinpe) #read in all bed files (samples and control) | |
349 } | |
285 } | 350 } |
286 #=======================> DONE! | 351 #=======================> DONE! |
287 | 352 |
288 | 353 |
289 | 354 |
290 # =============== | 355 # =============== |
291 # Counting Reads | 356 # Counting Reads |
292 # =============== | 357 # =============== |
293 if (cornum > 1) { | 358 if (type == "single") { |
294 counts = mclapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, mc.cores = cornum, mc.preschedule = TRUE) | 359 if (cornum > 1) { |
295 } else { | 360 counts = mclapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount, mc.cores = cornum, mc.preschedule = TRUE) |
296 counts = lapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles) | 361 } else { |
362 counts = lapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount) | |
363 } | |
364 } | |
365 | |
366 if (type == "paired") { | |
367 if (cornum > 1) { | |
368 counts = mclapply(readsFiles, countreadspe, reads = datain, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount, mc.cores = cornum, mc.preschedule = TRUE) | |
369 } else { | |
370 counts = lapply(readsFiles, countreadspe, reads = datain, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount) | |
371 } | |
297 } | 372 } |
298 rm(datain) | 373 rm(datain) |
299 #=======================> DONE! | 374 #=======================> DONE! |
300 | 375 |
301 | 376 |