comparison signalmaker.r @ 1:243f75d0ed6e draft default tip

Uploaded. Includes new release 1.0.7 with fixed optional controls.
author messersc
date Thu, 19 Feb 2015 05:39:45 -0500
parents d42f4d78c85e
children
comparison
equal deleted inserted replaced
0:d42f4d78c85e 1:243f75d0ed6e
1 ############################### 1 ########################################################################
2 #### Signal Maker for NGS Data 2 # JAMMv1.0.7rev1 is a peak finder for joint analysis of NGS replicates.
3 #### R script 3 # Copyright (C) 2014-2015 Mahmoud Ibrahim
4 ############################### 4 #
5 5 # This program is free software: you can redistribute it and/or modify
6 6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation, either version 3 of the License, or
8 # (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # You should have received a copy of the GNU General Public License
16 # along with this program. If not, see <http://www.gnu.org/licenses/>.
17 #
18 # Contact: mahmoud.ibrahim@mdc-berlin.de
19 ########################################################################
7 20
8 21
9 # ======================= 22 # =======================
10 # User-defined variables 23 # User-defined variables
11 # ======================= 24 # =======================
74 parsein = function(bedfile) { 87 parsein = function(bedfile) {
75 reads = read.table(bedfile, header = FALSE)[[1]] 88 reads = read.table(bedfile, header = FALSE)[[1]]
76 return(reads) 89 return(reads)
77 } 90 }
78 91
79 92 #Read in bedpe(st2)
93 parseinpe = function(bedfile) {
94 l = read.table(bedfile, header = FALSE)
95 l = cbind((l[[1]] + 1), l[[2]])
96 return(l)
97 }
80 98
81 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats) 99 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats)
82 countreads = function(bedfile, reads, frag, chromsize, filelist) { 100 countreads = function(bedfile, reads, frag, chromsize, filelist, chrcount) {
83 101
84 o = which(filelist == bedfile) 102 o = which(filelist == bedfile)
85 103
86 counts = vector(mode = "numeric", length = chromsize) 104 counts = vector(mode = "numeric", length = chromsize)
87 105
108 126
109 return(counts) 127 return(counts)
110 } 128 }
111 129
112 130
131
132
133 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats / PAIRED END)
134 countreadspe = function(bedfile, reads, chromsize, filelist, chrcount) {
135
136 o = which(filelist == bedfile)
137
138 counts = vector(mode = "numeric", length = chromsize)
139
140 for (j in 1:length(reads[[o]][,1])) {
141 counts[(reads[[o]][j,1]):(reads[[o]][j,2])] = counts[(reads[[o]][j,1]):(reads[[o]][j,2])] + 1
142 }
143
144 mCount = mean(counts)
145
146 if (chrcount == 1) {
147 counts = counts/mCount
148 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"))
149 } else {
150 meanCounts = mean(as.numeric(read.table(paste0(out, "/norma.", o, ".info"))[[1]]))
151 if ((mCount > (5*meanCounts)) || (mCount < (0.2*meanCounts))) {
152 mCount = meanCounts
153 } else {
154 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"), append = TRUE)
155 }
156 counts = counts/mCount
157 }
158
159 return(counts)
160 }
113 161
114 162
115 #generate signal 163 #generate signal
116 getsig = function(winStart, winEnds, startList, stepSize, chromName) { 164 getsig = function(winStart, winEnds, startList, stepSize, chromName) {
117 165
185 } 233 }
186 #bed files directory 234 #bed files directory
187 if (grepl('^-frag=',each.arg)) { 235 if (grepl('^-frag=',each.arg)) {
188 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 236 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
189 if (! is.na(arg.split[2]) ) { 237 if (! is.na(arg.split[2]) ) {
190 frag <- arg.split[2] 238 frag <- arg.split[2]
191 } else { 239 }
192 stop('No fragment length given')
193 }
194 } 240 }
195 #bakcground files directory 241 #bakcground files directory
196 if (grepl('^-bkgd=',each.arg)) { 242 if (grepl('^-bkgd=',each.arg)) {
197 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 243 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
198 if (! is.na(arg.split[2]) ) { 244 if (! is.na(arg.split[2]) ) {
248 #chrcount 294 #chrcount
249 if (grepl('^-chrcount=',each.arg)) { 295 if (grepl('^-chrcount=',each.arg)) {
250 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]] 296 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
251 if (! is.na(arg.split[2]) ) { 297 if (! is.na(arg.split[2]) ) {
252 chrcount <- as.numeric(arg.split[2]) 298 chrcount <- as.numeric(arg.split[2])
299 }
300 }
301 #alignment type
302 if (grepl('^-t=',each.arg)) {
303 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
304 if (! is.na(arg.split[2]) ) {
305 type <- arg.split[2]
253 } 306 }
254 } 307 }
255 } 308 }
256 309
257 ##Parse in variables 310 ##Parse in variables
258 readsFiles = as.list(strsplit(bednames, ",", fixed = TRUE)[[1]]) 311 readsFiles = as.list(strsplit(bednames, ",", fixed = TRUE)[[1]])
259 numdup = length(readsFiles) #number of replicates 312 numdup = length(readsFiles) #number of replicates
260 if (bkgd != "None") { 313 if (bkgd != "None") {
261 readsFiles[[numdup+1]] = bkgd 314 readsFiles[[numdup+1]] = bkgd
262 } 315 }
263 frags = as.numeric(strsplit(frag, ",", fixed = TRUE)[[1]]) 316 if (type == "single") {
317 frags = as.numeric(strsplit(frag, ",", fixed = TRUE)[[1]])
318 }
264 rm(bednames) 319 rm(bednames)
265 320
266 options(stringsAsFactors = FALSE) 321 options(stringsAsFactors = FALSE)
267 #=======================> DONE! 322 #=======================> DONE!
268 323
276 regions = read.table(regions, header = FALSE) 331 regions = read.table(regions, header = FALSE)
277 regions = regions[regions[[1]] == chromName, , drop = FALSE] 332 regions = regions[regions[[1]] == chromName, , drop = FALSE]
278 regions = cbind(regions[[2]] + 1, regions[[3]]) 333 regions = cbind(regions[[2]] + 1, regions[[3]])
279 334
280 #import read data 335 #import read data
281 if (cornum > 1) { 336 if (type == "single") {
282 datain = mclapply(readsFiles, parsein, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control) 337 if (cornum > 1) {
283 } else { 338 datain = mclapply(readsFiles, parsein, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control)
284 datain = lapply(readsFiles, parsein) #read in all bed files (samples and control) 339 } else {
340 datain = lapply(readsFiles, parsein) #read in all bed files (samples and control)
341 }
342 }
343 #import read data
344 if (type == "paired") {
345 if (cornum > 1) {
346 datain = mclapply(readsFiles, parseinpe, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control)
347 } else {
348 datain = lapply(readsFiles, parseinpe) #read in all bed files (samples and control)
349 }
285 } 350 }
286 #=======================> DONE! 351 #=======================> DONE!
287 352
288 353
289 354
290 # =============== 355 # ===============
291 # Counting Reads 356 # Counting Reads
292 # =============== 357 # ===============
293 if (cornum > 1) { 358 if (type == "single") {
294 counts = mclapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, mc.cores = cornum, mc.preschedule = TRUE) 359 if (cornum > 1) {
295 } else { 360 counts = mclapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount, mc.cores = cornum, mc.preschedule = TRUE)
296 counts = lapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles) 361 } else {
362 counts = lapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount)
363 }
364 }
365
366 if (type == "paired") {
367 if (cornum > 1) {
368 counts = mclapply(readsFiles, countreadspe, reads = datain, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount, mc.cores = cornum, mc.preschedule = TRUE)
369 } else {
370 counts = lapply(readsFiles, countreadspe, reads = datain, chromsize = chromSize, filelist = readsFiles, chrcount = chrcount)
371 }
297 } 372 }
298 rm(datain) 373 rm(datain)
299 #=======================> DONE! 374 #=======================> DONE!
300 375
301 376