annotate signalmaker.r @ 0:d42f4d78c85e draft

Uploaded
author messersc
date Wed, 17 Dec 2014 10:40:23 -0500
parents
children 243f75d0ed6e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
1 ###############################
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
2 #### Signal Maker for NGS Data
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
3 #### R script
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
4 ###############################
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
5
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
6
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
7
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
8
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
9 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
10 # User-defined variables
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
11 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
12 options(warn = -1, scipen = 1000) #R will not report any warnings (warn = -1), R will not use scientific notation (scipen = 1000)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
13 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
14
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
15
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
16
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
17
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
18 # ================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
19 # Required Libraries check & load
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
20 # ================================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
21 if ((is.element('signal', installed.packages()[,1]) == FALSE) || (is.element('parallel', installed.packages()[,1]) == FALSE)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
22 stop("R packages 'signal'and 'parallel' are required. Please install them!")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
23 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
24 suppressPackageStartupMessages(library("signal"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
25 suppressPackageStartupMessages(library("parallel"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
26 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
27
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
28
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
29
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
30
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
31
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
32 # =================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
33 # Custom Functions
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
34 # =================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
35 #Get per-row Geometric mean (takes list, returns vectors, not lists!)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
36 geomeanL <- function(mat){
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
37 n = length(mat)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
38 if (n > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
39 mult = (mat[[1]])*(mat[[2]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
40 if (n > 2) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
41 for (i in 3:n) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
42 mult = mult*(mat[[i]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
43 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
44 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
45 mult = mult^(1/n)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
46 mat = mult
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
47 return(mat)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
48 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
49 return(mat[[1]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
50 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
51 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
52
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
53
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
54
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
55 #Get per-row Geometric mean (takes matrix, returns vectors, not lists!)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
56 geomean <- function(mat){
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
57 n = NCOL(mat)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
58 if (n > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
59 mult = (mat[,1])*(mat[,2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
60 if (n > 2) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
61 for (i in 3:n) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
62 mult = mult*(mat[,i])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
63 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
64 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
65 mult = mult^(1/n)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
66 mat = mult
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
67 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
68 return(mat)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
69 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
70
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
71
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
72
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
73 #Read in bed(st2) file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
74 parsein = function(bedfile) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
75 reads = read.table(bedfile, header = FALSE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
76 return(reads)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
77 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
78
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
79
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
80
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
81 #Produces normalized extended read counts (takes output of parsein(), return a vector of floats)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
82 countreads = function(bedfile, reads, frag, chromsize, filelist) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
83
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
84 o = which(filelist == bedfile)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
85
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
86 counts = vector(mode = "numeric", length = chromsize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
87
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
88 for (j in 1:length(reads[[o]])) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
89 if ((reads[[o]][j]+frag[o]-1) <= chromsize) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
90 counts[(reads[[o]][j]):(reads[[o]][j]+frag[o]-1)] = counts[(reads[[o]][j]):(reads[[o]][j]+frag[o]-1)] + 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
91 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
92 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
93
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
94 mCount = mean(counts)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
95
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
96 if (chrcount == 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
97 counts = counts/mCount
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
98 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
99 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
100 meanCounts = mean(as.numeric(read.table(paste0(out, "/norma.", o, ".info"))[[1]]))
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
101 if ((mCount > (5*meanCounts)) || (mCount < (0.2*meanCounts))) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
102 mCount = meanCounts
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
103 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
104 write(paste(mCount), file = paste0(out, "/norma.", o, ".info"), append = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
105 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
106 counts = counts/mCount
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
107 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
108
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
109 return(counts)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
110 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
111
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
112
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
113
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
114
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
115 #generate signal
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
116 getsig = function(winStart, winEnds, startList, stepSize, chromName) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
117
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
118
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
119 plz = which(startList == winStart)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
120 winEnd = winEnds[plz]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
121 rWinSizeTemp = winEnd - winStart + 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
122
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
123 #extract subset of the IP
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
124 Rs = matrix(nrow = rWinSizeTemp, ncol = numdup)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
125 for (j in 1:numdup) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
126 Rs[,j] = counts[[j]][winStart:winEnd]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
127 Rs[,j] = filtfilt(rep(1,80)/80,1,Rs[,j])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
128 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
129
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
130 #extract subset of the background and initial signal
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
131 if (bkgd != "None") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
132 Cs = counts[[numdup+1]][winStart:winEnd]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
133 Cs = filtfilt(rep(1,80)/80,1,Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
134 signal = (geomean(Rs)) - (Cs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
135 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
136 signal = geomean(Rs)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
137 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
138
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
139
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
140 #get binned signal
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
141 signal = round(colMeans(matrix(c(signal, rep(0, stepSize - length(signal) %% stepSize)), stepSize)), digits = 2)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
142 steps = seq(winStart, winEnd, by = stepSize)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
143 starts = steps - 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
144 ends = steps + stepSize - 1
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
145 if (ends[length(ends)] > winEnd) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
146 ends[length(ends)] = winEnd
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
147 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
148
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
149 #write to file
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
150 write(paste(chromName, starts, ends, signal, sep = " "), file = paste0(out, "/", chromName, plz, ".bedSignal"), ncolumns = 1)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
151
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
152
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
153 return(NULL)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
154 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
155 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
156
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
157
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
158
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
159
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
160
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
161 # ==========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
162 # Parse-in System Variables
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
163 # ==========================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
164 args = commandArgs(trailingOnly = TRUE) # Read Arguments from command line
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
165
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
166 #Parsing arguments and storing values
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
167 for (each.arg in args) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
168 #bed file names
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
169 if (grepl('^-bednames=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
170 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
171 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
172 bednames <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
173 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
174 stop('No bed file names')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
175 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
176 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
177 #bed file names
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
178 if (grepl('^-chromo=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
179 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
180 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
181 chromName <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
182 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
183 stop('No bed file names')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
184 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
185 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
186 #bed files directory
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
187 if (grepl('^-frag=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
188 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
189 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
190 frag <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
191 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
192 stop('No fragment length given')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
193 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
194 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
195 #bakcground files directory
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
196 if (grepl('^-bkgd=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
197 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
198 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
199 bkgd <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
200 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
201 stop('No background file')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
202 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
203 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
204 #bakcground files directory
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
205 if (grepl('^-out=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
206 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
207 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
208 out <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
209 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
210 stop('No output directory given')
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
211 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
212 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
213 #Cluster number
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
214 if (grepl('^-clustnummer=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
215 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
216 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
217 clustnummer <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
218 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
219 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
220 #regions
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
221 if (grepl('^-regions=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
222 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
223 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
224 regions <- arg.split[2]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
225 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
226 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
227 #processor cores
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
228 if (grepl('^-p=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
229 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
230 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
231 cornum <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
232 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
233 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
234 #chromSize
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
235 if (grepl('^-chromoS=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
236 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
237 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
238 chromSize <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
239 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
240 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
241 #stepsize
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
242 if (grepl('^-sig=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
243 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
244 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
245 sigstep <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
246 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
247 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
248 #chrcount
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
249 if (grepl('^-chrcount=',each.arg)) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
250 arg.split <- strsplit(each.arg,'=',fixed=TRUE)[[1]]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
251 if (! is.na(arg.split[2]) ) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
252 chrcount <- as.numeric(arg.split[2])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
253 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
254 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
255 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
256
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
257 ##Parse in variables
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
258 readsFiles = as.list(strsplit(bednames, ",", fixed = TRUE)[[1]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
259 numdup = length(readsFiles) #number of replicates
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
260 if (bkgd != "None") {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
261 readsFiles[[numdup+1]] = bkgd
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
262 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
263 frags = as.numeric(strsplit(frag, ",", fixed = TRUE)[[1]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
264 rm(bednames)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
265
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
266 options(stringsAsFactors = FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
267 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
268
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
269
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
270
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
271
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
272 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
273 # Some preliminary stuff
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
274 # =======================
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
275 #import regions
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
276 regions = read.table(regions, header = FALSE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
277 regions = regions[regions[[1]] == chromName, , drop = FALSE]
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
278 regions = cbind(regions[[2]] + 1, regions[[3]])
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
279
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
280 #import read data
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
281 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
282 datain = mclapply(readsFiles, parsein, mc.cores = cornum, mc.preschedule = TRUE) #read in all bed files (samples and control)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
283 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
284 datain = lapply(readsFiles, parsein) #read in all bed files (samples and control)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
285 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
286 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
287
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
288
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
289
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
290 # ===============
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
291 # Counting Reads
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
292 # ===============
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
293 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
294 counts = mclapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles, mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
295 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
296 counts = lapply(readsFiles, countreads, reads = datain, frag = frags, chromsize = chromSize, filelist = readsFiles)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
297 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
298 rm(datain)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
299 #=======================> DONE!
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
300
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
301
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
302
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
303 # ==============
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
304 # Getting signal
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
305 # ==============
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
306 if (cornum > 1) {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
307 sig = mclapply(regions[,1], getsig, winEnds = regions[,2], startList = regions[,1], stepSize = sigstep, chromName, mc.cores = cornum, mc.preschedule = TRUE)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
308 } else {
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
309 sig = lapply(regions[,1], getsig, winEnds = regions[,2], startList = regions[,1], stepSize = sigstep, chromName)
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
310 }
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
311 message("Done!")
d42f4d78c85e Uploaded
messersc
parents:
diff changeset
312 #=======================> DONE!