Mercurial > repos > rlegendre > ving
comparison ving.R @ 1:5feff08a06ea draft
Uploaded
author | rlegendre |
---|---|
date | Tue, 29 Jul 2014 03:15:34 -0400 |
parents | |
children | 5ed41b948030 |
comparison
equal
deleted
inserted
replaced
0:d7006c780d35 | 1:5feff08a06ea |
---|---|
1 #! /usr/bin/Rscript | |
2 | |
3 # Il est interdit de mettre ARGUMENT comme argument long | |
4 | |
5 | |
6 | |
7 # spec contains at least 4 columns, as many as 5 columns. | |
8 # | |
9 # column 1: long name of flag | |
10 # | |
11 # column 2: short name of flag | |
12 # | |
13 # column 3: argument flag. 0=without argument, 1=with argument | |
14 # | |
15 # column 4: mode of argument. one of "logical", "integer", "double", | |
16 # "complex", "character", "" for arguments without value (return logical) | |
17 # | |
18 # column 5 (optional): description of ARGUMENT. | |
19 # | |
20 | |
21 | |
22 RetreiveAndDestroy=function(opt,root,stem,regexp,SearchNames,Out,isValue,NbFound,StockNames){ | |
23 Bool=lapply(paste(root,SearchNames,stem,sep=""),grepl,opt) | |
24 names(Bool)=StockNames | |
25 Pos=lapply(Bool,which) | |
26 names(Pos)=StockNames | |
27 disable=c() | |
28 for (i in StockNames){ | |
29 nbmatch=length(Pos[[i]]) | |
30 if(nbmatch>0){ | |
31 NbFound[[i]]=NbFound[[i]]+nbmatch | |
32 disable=c(disable,-1*Pos[[i]]) | |
33 if(is.null(Out[[i]])){ | |
34 if(isValue[[i]]!=0){ | |
35 if(regexp=="next"){ | |
36 Out[[i]]=opt[Pos[[i]]+1] | |
37 disable=c(disable,-1*(Pos[[i]]+1)) | |
38 }else{ | |
39 Out[[i]]=sub(regexp,"\\1",opt[Pos[[i]]]) | |
40 } | |
41 }else{ | |
42 Out[[i]]=TRUE | |
43 } | |
44 }else{ | |
45 if(isValue[[i]]!=0){ | |
46 if(regexp=="next"){ | |
47 Out[[i]]=c(Out[[i]],opt[Pos[[i]]+1]) | |
48 disable=c(disable,-1*(Pos[[i]]+1)) | |
49 }else{ | |
50 Out[[i]]=c(Out[[i]],sub(regexp,"\\1",opt[Pos[[i]]])) | |
51 } | |
52 }else{ | |
53 Out[[i]]=c(Out[[i]],TRUE) | |
54 } | |
55 } | |
56 } | |
57 } | |
58 if(length(disable)>0){ | |
59 opt=opt[disable] | |
60 } | |
61 Out[["ARGUMENT"]]=list() | |
62 Out[["ARGUMENT"]][["opt"]]=opt | |
63 Out[["ARGUMENT"]][["NbFound"]]=NbFound | |
64 return(Out) | |
65 } | |
66 | |
67 getopt = function (spec=NULL,opt=commandArgs()) { | |
68 | |
69 FindArgs=which(opt=="--args") | |
70 if(length(FindArgs)!=1){ | |
71 stop(length(FindArgs)," --args found where 1 expected.",call.=F) | |
72 } | |
73 ExecName=sub("--file=","",opt[FindArgs-1]) | |
74 | |
75 if(FindArgs<length(opt)){ | |
76 opt=opt[(FindArgs+1):length(opt)] | |
77 }else{ | |
78 opt="" | |
79 } | |
80 | |
81 | |
82 min.columns=5 | |
83 colNames=c("LongName","ShortName","Flag","Mod","Default") | |
84 max.columns=6 | |
85 DimSpec=dim(spec) | |
86 if(DimSpec[2]>min.columns){ | |
87 colNames=c(colNames,"Description") | |
88 } | |
89 | |
90 if(is.null(spec) | !is.matrix(spec) | (DimSpec[2]<min.columns | DimSpec[2]>max.columns)){ | |
91 stop('argument "spec" is required and must be a matrix with 4|5 columns.',call.=F) | |
92 } | |
93 colnames(spec)=colNames | |
94 | |
95 spec=as.data.frame(spec,stringsAsFactors=F) | |
96 #spec validation | |
97 if(length(unique(c(spec$ShortName,"ARGUMENT","args")))!=DimSpec[1]+2 | length(unique(spec$LongName))!=DimSpec[1]){ | |
98 stop('Long|Short names for flags must be unique (Long name : "ARGUMENT" and "args" forbidden).', | |
99 "\n","List of duplicated :", | |
100 "\n","Short: ",paste(spec$ShortName[duplicated(c(spec$ShortName,"ARGUMENT","args"))],collapse=" "), | |
101 "\n","Long: ",paste(spec$ShortName[duplicated(spec$LongName)],collapse=" "),call.=F) | |
102 } | |
103 if(length(which(nchar(spec$ShortName)>1))!=0){ | |
104 stop('Short names flags can\'t be longer than 1 character.') | |
105 } | |
106 | |
107 | |
108 #initialize | |
109 Out=list() | |
110 Short2Long=list() | |
111 NbFound=list() | |
112 isValue=list() | |
113 for (i in 1:DimSpec[1]){ | |
114 Short2Long[[spec$ShortName[i]]]=spec$LongName[i] | |
115 NbFound[[spec$LongName[i]]]=0 | |
116 isValue[[spec$LongName[i]]]=spec$Flag[i] | |
117 } | |
118 | |
119 #Map, retreive and suppress ARGUMENTs and arguments | |
120 #Value ARGUMENT --example=value | |
121 Out=RetreiveAndDestroy(opt,"^--","=.+$",".+=(.+)$",spec$LongName,Out,isValue,NbFound,spec$LongName) | |
122 opt=Out[["ARGUMENT"]][["opt"]] | |
123 NbFound=Out[["ARGUMENT"]][["NbFound"]] | |
124 Out[["ARGUMENT"]]=NULL | |
125 #boolean ARGUMENT --example | |
126 Out=RetreiveAndDestroy(opt,"^--","$","$",spec$LongName,Out,isValue,NbFound,spec$LongName) | |
127 opt=Out[["ARGUMENT"]][["opt"]] | |
128 NbFound=Out[["ARGUMENT"]][["NbFound"]] | |
129 Out[["ARGUMENT"]]=NULL | |
130 #short name ARGUMENT -t value OR boolean -t | |
131 Out=RetreiveAndDestroy(opt,"^-","$","next",spec$ShortName,Out,isValue,NbFound,spec$LongName) | |
132 opt=Out[["ARGUMENT"]][["opt"]] | |
133 NbFound=Out[["ARGUMENT"]][["NbFound"]] | |
134 Out[["ARGUMENT"]]=NULL | |
135 #Warn about non mapped ARGUMENTs | |
136 if(length(opt)>0){ | |
137 PosUnkArg=which(grepl("^-",opt)) | |
138 if(length(PosUnkArg)){ | |
139 message("Error, argument unreconized :","\n",paste(opt[PosUnkArg],collapse="\n"),"\n\n") | |
140 } | |
141 if(length(PosUnkArg)>0){ | |
142 opt=opt[PosUnkArg*-1] | |
143 } | |
144 } | |
145 #Arguments | |
146 Out[["ARGUMENT"]]=opt | |
147 | |
148 #Validation of ARGUMENTs | |
149 for(i in 1:DimSpec[1]){ | |
150 if(spec$Flag[i]=="0"){#verify boolean arguments | |
151 NbValue=length(Out[[spec$LongName[i]]]) | |
152 if(NbValue>1){ | |
153 message("Warning : ",spec$LongName[i]," found ",NbValue," times") | |
154 } | |
155 } | |
156 if(length(Out[[spec$LongName[i]]])==0){ | |
157 Out[[spec$LongName[i]]]=spec$Default[i] | |
158 } | |
159 library("methods") | |
160 Out[[spec$LongName[i]]]=as(Out[[spec$LongName[i]]],spec$Mod[i]) | |
161 } | |
162 | |
163 return(Out) | |
164 } | |
165 | |
166 # column 3: argument flag. 0=no argument, 1=required argument, 2=optional argument | |
167 | |
168 | |
169 | |
170 ### ========================================================================= | |
171 ### Ving's methods | |
172 ### ------------------------------------------------------------------------- | |
173 ### | |
174 | |
175 ##Converts the flag numbers into binary | |
176 integer.base.b = function(x, b=2){ | |
177 xi <- as.integer(x) | |
178 if(any(is.na(xi) | ((x-xi)!=0))) | |
179 print(list(ERROR="x not integer", x=x)) | |
180 N <- length(x) | |
181 xMax <- max(c(x,1)) | |
182 ndigits <- 11 | |
183 Base.b <- array(NA, dim=c(N, ndigits)) | |
184 for(i in 1:ndigits){#i <- 1 | |
185 Base.b[, ndigits-i+1] <- (x %% b) | |
186 x <- (x %/% b) | |
187 } | |
188 Base.b | |
189 } | |
190 | |
191 ##Checks if color used is an acceptable color | |
192 is.acceptable.color = function(character) { | |
193 tmp=try(col2rgb(character),TRUE) | |
194 return(class(tmp)!="try-error") | |
195 } | |
196 | |
197 ##Combines two data frame | |
198 listOfDataFrame2DataFrame = function(x,vertical=TRUE) { | |
199 if(length(x)==1) { | |
200 x[[1]] | |
201 } | |
202 else { | |
203 if(vertical) { | |
204 ncol = dim(x[[1]])[2] | |
205 vect=unlist(lapply(x,t)) | |
206 retour = as.data.frame(t(array(vect,dim=c(ncol,length(vect)/ncol))),stringsAsFactors=FALSE) | |
207 names(retour)=names(x[[1]]) | |
208 } | |
209 else { | |
210 nline=dim(x[[1]])[1] | |
211 vect=unlist(x) | |
212 retour=as.data.frame(array(vect,dim=c(nline,length(vect)/nline))) | |
213 names(retour) = unlist(lapply(x,names)) | |
214 } | |
215 retour | |
216 } | |
217 } | |
218 | |
219 # utilise la fonction scanBam d'une façon plus partique pour moi | |
220 extractFromBam = function(file,which,what) { | |
221 return(scanBam(file, param=ScanBamParam(which=which,what=what))[[1]][[1]]) | |
222 } | |
223 | |
224 ##Returns a list from cigar expression | |
225 interpreteCIGAR = function(cigar) { | |
226 cigar_un = strsplit(unique(cigar),split="") | |
227 n_cigar_un = length(cigar_un) | |
228 taille_cigar = list() | |
229 analise_cigar = function(cigar_) { | |
230 cigar_sortie = list() | |
231 acc = "" | |
232 for(j in 1:length(cigar_)) { | |
233 if(sum(cigar_[j]==as.character(0:9))==1) { | |
234 acc=paste(acc,cigar_[j],sep="") | |
235 } | |
236 else { | |
237 cigar_sortie[[length(cigar_sortie)+1]]=as.integer(acc) | |
238 cigar_sortie[[length(cigar_sortie)+1]]=cigar_[j] | |
239 acc="" | |
240 } | |
241 } | |
242 return(cigar_sortie) | |
243 } | |
244 cigar_interprete = lapply(cigar_un,analise_cigar) | |
245 names(cigar_interprete) = unique(cigar) | |
246 | |
247 return(cigar_interprete) | |
248 } | |
249 | |
250 # prend un CIGAR splités et retourne la taille occupé par le read sur la séquence génomique (introns compris) | |
251 calcule_longueur_cigar = function(cigar_) { | |
252 lon = 0 | |
253 N=length(cigar_) | |
254 for(j in seq(2,N,2)) { | |
255 if(cigar_[[j]]!="I") { | |
256 lon=lon+cigar_[[j-1]] | |
257 } | |
258 } | |
259 return(lon) | |
260 } | |
261 | |
262 # prend un CIGAR splités et retourne les positions | |
263 calcule_junction_cigar = function(cigar_) { | |
264 retour=list() | |
265 lon = 0 | |
266 N=length(cigar_) | |
267 for(j in seq(2,N,2)) { | |
268 if(cigar_[[j]]!="I") { | |
269 lon=lon+cigar_[[j-1]] | |
270 } | |
271 if(cigar_[[j]]=="N") { | |
272 retour[[length(retour)+1]]=c(lon-cigar_[[j-1]]+1,lon) | |
273 } | |
274 } | |
275 return(retour) | |
276 } | |
277 | |
278 ##Returns a list of numbers of single read with their coordinates | |
279 compresse_coordonnees = function(debut,fin) { | |
280 if(length(debut)==0) { | |
281 return(list(numeric(),numeric(),numeric())) | |
282 } | |
283 else { | |
284 tmp=sort(paste(debut,fin,sep="_")) | |
285 tmp_rle=rle(tmp) | |
286 poids=tmp_rle$lengths | |
287 values_split=strsplit(tmp_rle$values,split="_") | |
288 doit = function(j) { | |
289 return(as.integer(values_split[[j]][1])) | |
290 } | |
291 debut_uni=sapply(1:length(poids),doit) | |
292 doit = function(j) { | |
293 return(as.integer(values_split[[j]][2])) | |
294 } | |
295 fin_uni=sapply(1:length(poids),doit) | |
296 ordre_debut = order(debut_uni) | |
297 return(list(debut_uni[ordre_debut],fin_uni[ordre_debut],poids[ordre_debut])) | |
298 } | |
299 } | |
300 | |
301 RDataFileName = function(file) { | |
302 return(paste(file,".RData",sep="")) | |
303 } | |
304 | |
305 ##Function converts and extracts the infos from bamfile | |
306 readBam_ <-function(file_,insert_max_=2000,stranded_=TRUE,ncore_=1,libraryType_=c("standard","inverse"),fileNameRData_=NA,normalized_=NULL,chrName_=NULL,from_=1,to_=NULL) { | |
307 suppressPackageStartupMessages(require("Rsamtools")) | |
308 suppressPackageStartupMessages(require("GenomicRanges")) | |
309 ##Declaration of variables | |
310 flagstat = numeric(11) | |
311 names(flagstat)=c("total","duplicates","mapped","paired","read1","read2","properly paired","itself and mate mapped","singletons","mate mapped on a different chr","QC-failed") | |
312 genome_info=scanBamHeader(file_)[[1]]$targets | |
313 noms_chromosomes = names(genome_info) | |
314 longueur_chromosomes = as.integer(genome_info) | |
315 nombre_chromosomes = length(noms_chromosomes) | |
316 brin_F = list() | |
317 brin_R = list() | |
318 brin_F_junction = list() | |
319 brin_R_junction = list() | |
320 pas<-c(1,2,6,7) | |
321 i_zone = 0 | |
322 if(is.null(chrName_)) { | |
323 chrName__ = noms_chromosomes | |
324 } | |
325 else { | |
326 chrName__ = chrName_ | |
327 } | |
328 ##Fragments identification | |
329 for(i in (1:nombre_chromosomes)) { | |
330 i_zone = i_zone +1 | |
331 nom_chromo = noms_chromosomes[i] | |
332 lon_chromo = longueur_chromosomes[i] | |
333 | |
334 if(!(nom_chromo %in% chrName__)) { | |
335 brin_F[[i]] = list(numeric(),numeric(),numeric()) | |
336 brin_R[[i]] = list(numeric(),numeric(),numeric()) | |
337 brin_F_junction[[i]] = list(numeric(),numeric(),numeric()) | |
338 brin_R_junction[[i]] = list(numeric(),numeric(),numeric()) | |
339 } | |
340 else { | |
341 if(is.null(to_)) { | |
342 to_i=lon_chromo | |
343 } | |
344 else { | |
345 to_i=to_[min(i_zone,length(to_))] | |
346 } | |
347 from_i=from_[min(i_zone,length(from_))] | |
348 | |
349 commande = paste("RangesList(`",nom_chromo,"` = IRanges(",from_i,",",to_i,"))",sep="") | |
350 expr=try(parse(text=commande),TRUE) | |
351 ##Function used from GenomicRanges package | |
352 which = eval(expr) | |
353 what<-c("flag","mpos","cigar","mrnm","isize") | |
354 param<- ScanBamParam(what=what, which=which) | |
355 G_alignement<- readGAlignmentsFromBam(file=file_, param=param, use.names=TRUE) | |
356 ##Case of no reads on the chromosome | |
357 start=start(G_alignement) | |
358 if(length(start) == 0 ){ | |
359 brin_F[[i]] = list(numeric(),numeric(),numeric()) | |
360 brin_R[[i]] = list(numeric(),numeric(),numeric()) | |
361 brin_F_junction[[i]] = list(numeric(),numeric(),numeric()) | |
362 brin_R_junction[[i]] = list(numeric(),numeric(),numeric()) | |
363 } | |
364 else { | |
365 end=end(G_alignement) | |
366 strand=as.character(strand(G_alignement)) | |
367 flag=mcols(G_alignement)$flag | |
368 mpos=mcols(G_alignement)$mpos | |
369 cigar=mcols(G_alignement)$cigar | |
370 mrnm=mcols(G_alignement)$mrnm | |
371 isize=mcols(G_alignement)$isize | |
372 | |
373 first_read=integer.base.b(flag)[,5]==1 | |
374 strand[strand =="+" & !first_read ]="*" | |
375 strand[strand =="-" & !first_read ] ="+" | |
376 strand[strand =="*" & !first_read ] ="-" | |
377 | |
378 ##Case of pairend reads | |
379 is_on_same_chr = mrnm==nom_chromo | |
380 is_on_same_chr[is.na(is_on_same_chr)] = FALSE | |
381 is_paired = is_on_same_chr & abs(isize) <= insert_max_ | |
382 is_paired[first_read & strand=="+" & (isize<0 | isize>insert_max_)] = FALSE | |
383 is_paired[!first_read & strand=="+" & (isize>0 | isize < -insert_max_)] = FALSE | |
384 is_paired[first_read & strand=="-" & (isize>0 | isize < -insert_max_)] = FALSE | |
385 is_paired[!first_read & strand=="-" & (isize<0 | isize>insert_max_)] = FALSE | |
386 is_paired[is.na(is_paired)] = FALSE | |
387 | |
388 debut_fragment_paired_plus<-mpos[!first_read & strand =="+" & is_paired] | |
389 fin_fragment_paired_plus<-end[!first_read & strand=="+" & is_paired] | |
390 debut_fragment_paired_moins<-mpos[first_read & strand=="-" & is_paired] | |
391 fin_fragment_paired_moins<-end[first_read & strand =="-" & is_paired] | |
392 | |
393 ##Case of single reads | |
394 debut_fragment_singleton_plus = start[!is_paired & strand =="+"] | |
395 fin_fragment_singleton_plus = end[!is_paired & strand =="+"] | |
396 debut_fragment_singleton_moins=start[!is_paired & strand =="-"] | |
397 fin_fragment_singleton_moins= end[!is_paired & strand =="-"] | |
398 | |
399 ##Fragments | |
400 debut_frag_plus = c(debut_fragment_paired_plus,debut_fragment_singleton_plus) | |
401 fin_frag_plus = c(fin_fragment_paired_plus,fin_fragment_singleton_plus) | |
402 debut_frag_moins=c(debut_fragment_paired_moins,debut_fragment_singleton_moins) | |
403 fin_frag_moins = c(fin_fragment_paired_moins,fin_fragment_singleton_moins) | |
404 brin_F[[i]] = compresse_coordonnees(debut_frag_plus,fin_frag_plus) | |
405 brin_R[[i]] = compresse_coordonnees(debut_frag_moins,fin_frag_moins) | |
406 | |
407 ##CIGAR's interpreter | |
408 cigar_interprete = interpreteCIGAR(cigar) | |
409 longueur_cigar = lapply(cigar_interprete,calcule_longueur_cigar) | |
410 junction_cigar = lapply(cigar_interprete,calcule_junction_cigar) | |
411 debut_junction = numeric() | |
412 fin_junction = numeric() | |
413 brin_junction = numeric() | |
414 i_junction = 0 | |
415 ##junction read | |
416 for(j in 1:length(cigar)) { | |
417 junctions_ = junction_cigar[[cigar[j]]] | |
418 if(length(junctions_)){ | |
419 for(k in 1:length(junctions_)) { | |
420 i_junction = i_junction + 1 | |
421 debut_junction[i_junction] = start[j] + junctions_[[k]][1] - 1 | |
422 fin_junction[i_junction] = start[j] + junctions_[[k]][2] - 1 | |
423 brin_junction[i_junction] = strand[j] | |
424 } | |
425 } | |
426 } | |
427 if(i_junction==0) { | |
428 brin_F_junction[[i]] = list(numeric(),numeric(),numeric()) | |
429 brin_R_junction[[i]] = list(numeric(),numeric(),numeric()) | |
430 } | |
431 else { | |
432 brin_F_junction[[i]] = compresse_coordonnees(debut_junction[brin_junction=="+"],fin_junction[brin_junction=="+"]) | |
433 brin_R_junction[[i]] = compresse_coordonnees(debut_junction[brin_junction=="-"],fin_junction[brin_junction=="-"]) | |
434 } | |
435 ##Flagstat interpreter | |
436 flag_bits = integer.base.b(flag)#remplie les données stat pour un flag donné | |
437 | |
438 ##flagstat | |
439 ##total | |
440 flagstat[1] = flagstat[1] + sum(flag_bits[,2]==0) | |
441 ##duplicates | |
442 flagstat[2] = flagstat[2] + sum((flag_bits[,1]==1)&(flag_bits[,2]==0)) | |
443 ##mapped | |
444 flagstat[3] = flagstat[3] + sum((flag_bits[,9]==0)&(flag_bits[,2]==0)) | |
445 ##paired | |
446 flagstat[4] = flagstat[4] + sum((flag_bits[,11]==1)&(flag_bits[,2]==0)) | |
447 ##read1 | |
448 flagstat[5] = flagstat[5] + sum((flag_bits[,5]==1)&(flag_bits[,2]==0)) | |
449 ##read2 | |
450 flagstat[6] = flagstat[6] + sum((flag_bits[,4]==1)&(flag_bits[,2]==0)) | |
451 ##iself and mate mapped | |
452 flagstat[8] = flagstat[8] + sum((flag_bits[,11]==1)&(flag_bits[,9]==0)&(flag_bits[,8]==0)&(flag_bits[,2]==0)) | |
453 ##singletons | |
454 flagstat[9] = flagstat[9] + sum((flag_bits[,8]==1)&(flag_bits[,2]==0)) | |
455 ##QC-failed | |
456 flagstat[11] = flagstat[11] + sum(flag_bits[,2]==1) | |
457 ##flagstat | |
458 ##mate on a different chr | |
459 flagstat[10] = flagstat[10] + sum((!is_on_same_chr)&(flag_bits[,11]==1)&(flag_bits[,9]==0)&(flag_bits[,8]==0)&(flag_bits[,2]==0)) | |
460 ##flagstat | |
461 ##properly paired | |
462 flagstat[7] = flagstat[7] + sum(is_paired) | |
463 | |
464 } | |
465 } | |
466 } | |
467 ##Data storing | |
468 names(brin_F) = noms_chromosomes | |
469 names(brin_R) = noms_chromosomes | |
470 names(brin_F_junction) = noms_chromosomes | |
471 names(brin_R_junction) = noms_chromosomes | |
472 | |
473 bamHandler = list() | |
474 if(libraryType_[1]=="inverse") { | |
475 bamHandler[[1]] = brin_R | |
476 bamHandler[[2]] = brin_F | |
477 } | |
478 else { | |
479 bamHandler[[1]] = brin_F | |
480 bamHandler[[2]] = brin_R | |
481 } | |
482 bamHandler[[3]] = longueur_chromosomes | |
483 bamHandler[[4]] = flagstat | |
484 bamHandler[[5]] = stranded_ | |
485 | |
486 if(libraryType_[1]=="inverse") { | |
487 bamHandler[[6]] = brin_R_junction | |
488 bamHandler[[7]] = brin_F_junction | |
489 } | |
490 else { | |
491 bamHandler[[6]] = brin_F_junction | |
492 bamHandler[[7]] = brin_R_junction | |
493 } | |
494 bamHandler[[8]] = FALSE | |
495 if(!is.null(normalized_)) { | |
496 for( i in pas) { | |
497 for(j in 1:nombre_chromosomes){ | |
498 bamHandler[[i]][[j]][[4]]=normalized_*bamHandler[[i]][[j]][[3]] | |
499 } | |
500 } | |
501 bamHandler[[8]] = TRUE | |
502 } | |
503 | |
504 names(bamHandler) = c("F","R","chrLength","flagstat","stranded","junctions_F","junctions_R","norm") | |
505 if((is.null(chrName_))&(from_==1)&(is.null(to_))) { | |
506 if(is.null(fileNameRData_)|is.na(fileNameRData_)) { | |
507 save(bamHandler,file=RDataFileName(file_)) | |
508 } | |
509 else { | |
510 save(bamHandler,file=fileNameRData_) | |
511 } | |
512 } | |
513 return(bamHandler) | |
514 } | |
515 | |
516 chercheBamHandlerDansFichierRData = function(fichierRData) { | |
517 tmp=try(load(fichierRData),TRUE) | |
518 if(class(tmp)=="try-error") { | |
519 return(tmp) | |
520 } | |
521 else { | |
522 return(try(bamHandler,TRUE)) | |
523 } | |
524 } | |
525 | |
526 ##Public Function | |
527 readBam = function(file,insert_max=2000,stranded=TRUE,reload=FALSE,ncore=1,libraryType=c("standard","inverse"),normalized=NULL,chrName=NULL,from=1,to=NULL) { | |
528 if(length(file)==1) { | |
529 nom_fichier_RData = RDataFileName(file) | |
530 if(!file.exists(nom_fichier_RData)|reload) { | |
531 return(try(readBam_(file_=file,insert_max_=insert_max,stranded_=stranded,ncore_=ncore,libraryType_=libraryType,normalized_=normalized,chrName_=chrName,from_=from,to_=to),TRUE)) | |
532 } | |
533 else { | |
534 load(nom_fichier_RData) | |
535 return(bamHandler) | |
536 } | |
537 } | |
538 else { | |
539 require("multicore") | |
540 chargeBamsDansFichiersRData = function(oneFile) { | |
541 nom_fichier_RData = RDataFileName(oneFile) | |
542 if(!file.exists(nom_fichier_RData)|reload) { | |
543 bamHandler=try(readBam_(oneFile,insert_max_=insert_max,stranded_=stranded,libraryType_=libraryType,normalized_=normalized,chrName_=chrName,from_=from,to_=to),TRUE) | |
544 } | |
545 return(nom_fichier_RData) | |
546 } | |
547 fichiersRData = mclapply(file,chargeBamsDansFichiersRData,mc.cores=ncore) | |
548 gc() | |
549 return(lapply(fichiersRData,chercheBamHandlerDansFichierRData)) | |
550 } | |
551 } | |
552 | |
553 ##Extracts the signal from bamHandler objects | |
554 extractSignal = function(bamHandlerList,chrName,from=1, to=NULL,normalized_=FALSE) { | |
555 forward=list() | |
556 reverse=list() | |
557 chr=which(names(bamHandlerList[[1]]$F)==chrName) | |
558 if(is.null(to)){ | |
559 to=bamHandlerList[[1]]$chrLength[chr] | |
560 } | |
561 if(normalized_) { | |
562 end=4 | |
563 }else{ | |
564 end=3 | |
565 } | |
566 for(i in 1:length(bamHandlerList)){ | |
567 forward_=numeric(to-from+1) | |
568 which_read = which((bamHandlerList[[i]]$F[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$F[[chrName]][[1]]<=to)) | |
569 n_reads=length(which_read) | |
570 if(n_reads>0) { | |
571 for(k in which_read) { | |
572 debut_read = max(1,bamHandlerList[[i]]$F[[chrName]][[1]][k]-from+1) | |
573 fin_read = min(bamHandlerList[[i]]$F[[chrName]][[2]][k]-from+1,to-from+1) | |
574 forward_[debut_read:fin_read]=forward_[debut_read:fin_read]+bamHandlerList[[i]]$F[[chrName]][[end]][k] | |
575 } | |
576 } | |
577 which_junctions = which((bamHandlerList[[i]]$junctions_F[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$junctions_F[[chrName]][[1]]<=to)) | |
578 n_junctions=length(which_junctions) | |
579 if(n_junctions>0) { | |
580 for(k in which_junctions) { | |
581 debut_junction = max(1,bamHandlerList[[i]]$junctions_F[[chrName]][[1]][k]-from+1) | |
582 fin_junction = min(bamHandlerList[[i]]$junctions_F[[chrName]][[2]][k]-from+1,to-from+1) | |
583 forward_[debut_junction:fin_junction]=forward_[debut_junction:fin_junction]-bamHandlerList[[i]]$junctions_F[[chrName]][[end]][k] | |
584 } | |
585 } | |
586 | |
587 reverse_=numeric(to-from+1) | |
588 which_read = which((bamHandlerList[[i]]$R[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$R[[chrName]][[1]]<=to)) | |
589 n_reads= length(which_read) | |
590 if(n_reads>0) { | |
591 for(k in which_read) { | |
592 debut_read = max(1,bamHandlerList[[i]]$R[[chrName]][[1]][k]-from+1) | |
593 fin_read = min(bamHandlerList[[i]]$R[[chrName]][[2]][k]-from+1,to-from+1) | |
594 reverse_[debut_read:fin_read]=reverse_[debut_read:fin_read]+bamHandlerList[[i]]$R[[chrName]][[end]][k] | |
595 } | |
596 } | |
597 which_junctions = which((bamHandlerList[[i]]$junctions_R[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$junctions_R[[chrName]][[1]]<=to)) | |
598 n_junctions=length(which_junctions) | |
599 if(n_junctions>0) { | |
600 for(k in which_junctions) { | |
601 debut_junction = max(1,bamHandlerList[[i]]$junctions_R[[chrName]][[1]][k]-from+1) | |
602 fin_junction = min(bamHandlerList[[i]]$junctions_R[[chrName]][[2]][k]-from+1,to-from+1) | |
603 reverse_[debut_junction:fin_junction]=reverse_[debut_junction:fin_junction]-bamHandlerList[[i]]$junctions_R[[chrName]][[end]][k] | |
604 } | |
605 } | |
606 forward_[forward_<0]=0 | |
607 reverse_[reverse_<0]=0 | |
608 if(bamHandlerList[[i]]$stranded) { | |
609 forward[[i]]=forward_ | |
610 reverse[[i]]=reverse_ | |
611 } | |
612 else { | |
613 forward[[i]]=forward_+reverse_ | |
614 reverse[[i]]=numeric(to-from+1) | |
615 } | |
616 } | |
617 chr_ = list() | |
618 chr_$F = forward | |
619 chr_$R = reverse | |
620 return(chr_) | |
621 } | |
622 | |
623 ##Intern function for readGff function | |
624 my.read.lines2=function(fname) { | |
625 | |
626 s = file.info( fname )$size | |
627 buf = readChar( fname, s, useBytes=T) | |
628 strsplit( buf,"\n",fixed=T,useBytes=T)[[1]] | |
629 } | |
630 | |
631 ##Extracts the annotation infos from Gff file | |
632 readGff = function(file_in, from=1, to=Inf, chr=NULL, infoName=c("ID","Name","Parent","gene","Alias","orf_classification","Ontology_term","Note","GO")) { | |
633 tmp=try(my.read.lines2(file_in)) | |
634 if(!is.null(chr)) { | |
635 tmp1=grep(chr, tmp, value=TRUE,useBytes=T) | |
636 } | |
637 else { | |
638 tmp1=tmp | |
639 } | |
640 N=length(tmp1) | |
641 Chr = array() | |
642 Start= array() | |
643 Stop= array() | |
644 Strand=array() | |
645 Type=array() | |
646 info=list() | |
647 for(i in 1:length(infoName)) info[[i]] = array() | |
648 names(info)=infoName | |
649 j<-1 | |
650 for (i in 1:N) { | |
651 if(substr(tmp1[i],1,1)!="#") { | |
652 line_split=unlist(strsplit(tmp1[i],"\t",fixed=T,useBytes=T)) | |
653 if((as.integer(line_split[4])<=to) & (as.integer(line_split[5])>=from)) { | |
654 Chr[j] = line_split[1] | |
655 Start[j] = as.integer(line_split[4]) | |
656 Stop[j] = as.integer(line_split[5]) | |
657 Strand[j]=line_split[7] | |
658 Type[j] = line_split[3] | |
659 ninth=unlist(strsplit(line_split[9],";",fixed=T,useBytes=T)) | |
660 element_ninth_empty=rep(TRUE,length(infoName)) | |
661 for(element_ninth in ninth) { | |
662 element_ninth_split = unlist(strsplit(element_ninth,"=",fixed=T,useBytes=T)) | |
663 if(length(element_ninth_split)==2) { | |
664 if(element_ninth_split[1] %in% infoName) { | |
665 info[[element_ninth_split[1]]][j]=element_ninth_split[2] | |
666 element_ninth_empty[infoName==element_ninth_split[1]]=FALSE | |
667 } | |
668 } | |
669 } | |
670 for(infoName_ in infoName[element_ninth_empty]) { | |
671 info[[infoName_]][j]="." | |
672 } | |
673 j<-j+1 | |
674 } | |
675 } | |
676 } | |
677 retour = data.frame(Chr,Type,Start,Stop,Strand,info,stringsAsFactors = FALSE) | |
678 return(retour) | |
679 } | |
680 | |
681 ##Returns the classic visualisation | |
682 plotRNAseq = function(forward,reverse,debut_vue=1,fin_vue=length(forward),chr=NULL,annot=NULL,style=NULL,top=NULL,bottom=NULL,x="",y="",titre="",repeated=FALSE,name_flags="",decal=0,ataxises=NULL,classic_plus_color="navyblue",classic_minus_color="mediumvioletred",stranded=TRUE) { | |
683 if(repeated) { | |
684 forward_=numeric(fin_vue) | |
685 forward_[debut_vue:fin_vue]=forward | |
686 reverse_=numeric(fin_vue) | |
687 reverse_[debut_vue:fin_vue]=reverse | |
688 } | |
689 else { | |
690 forward_=forward | |
691 reverse_=reverse | |
692 } | |
693 if(is.null(top)) { | |
694 top=max(forward_[debut_vue:fin_vue]) | |
695 } | |
696 if(is.null(bottom)) { | |
697 bottom=max(reverse_[debut_vue:fin_vue]) | |
698 } | |
699 if(is.null(ataxises)) { | |
700 plot(c(debut_vue,fin_vue)+decal,c(-bottom,top),ylim=c(-bottom,top),xlab=x,ylab=y,main=titre,col="white",xaxs="i",cex.main=2,yaxt="n",cex.lab=1.8) | |
701 } | |
702 else { | |
703 plot(c(debut_vue,fin_vue)+decal,c(-bottom,top),ylim=c(-bottom,top),xlab=x,ylab=y,main=titre,col="white",xaxs="i",xaxt="n",cex.main=2,yaxt="n",cex.lab=1.8) | |
704 ataxisesLabels=as.character(ataxises) | |
705 ataxisesLabels[((1:length(ataxises))%%2)==0]="" | |
706 lim=c(-bottom,top) | |
707 ataxises_y=pretty(lim,n=4) | |
708 ataxisesLabels_y=as.character(abs(ataxises_y)) | |
709 axis(1,at=ataxises,labels=FALSE,cex.axis=2) | |
710 axis(1,at=ataxises,labels=ataxisesLabels,cex.axis=2,line=0.4,lwd=0) | |
711 axis(2, at=ataxises_y,labels=FALSE,cex.axis=2) | |
712 axis(2, at=ataxises_y,labels=ataxisesLabels_y,cex.axis=2,line=-0.4,lwd=0) | |
713 } | |
714 polygon(c(debut_vue,debut_vue:fin_vue,fin_vue)+decal,c(0,forward_[debut_vue:fin_vue],0),col=classic_plus_color,border=NA) | |
715 if(stranded){ | |
716 text(fin_vue+(fin_vue-debut_vue)*0.01,top/2,"+",xpd=NA,cex=3) | |
717 text(fin_vue+(fin_vue-debut_vue)*0.01,-bottom/2,"-",xpd=NA,cex=3) | |
718 text(fin_vue+(fin_vue-debut_vue)*0.025,(top-bottom)/2,"Strand",xpd=NA,cex=2,srt=-90) | |
719 polygon(c(debut_vue,debut_vue:fin_vue,fin_vue)+decal,c(0,-reverse_[debut_vue:fin_vue],0),col=classic_minus_color,border=NA) | |
720 abline(h=0,lwd=2) | |
721 abline(h=0,col="white") | |
722 } | |
723 #if(!is.null(chr)&!is.null(annot)&!is.null(style)) { | |
724 # N=dim(annot)[1] | |
725 #if(N>0) { | |
726 # for(i in 1:N) { | |
727 #if(annot$Strand[i]=="+") { | |
728 # arrows(annot$Start[i],top,annot$Stop[i],top,col=style$col[style$Type==annot$Type[i]],length=0.10) | |
729 #} | |
730 #if(annot$Strand[i]=="-") { | |
731 # arrows(annot$Stop[i],-bottom,annot$Start[i],-bottom,col=style$col[style$Type==annot$Type[i]],length=0.10) | |
732 #} | |
733 #if(annot$Strand[i]==".") { | |
734 # segments(annot$Start[i],top,annot$Stop[i],top,col=style$col[style$Type==annot$Type[i]]) | |
735 #segments(annot$Stop[i],-bottom,annot$Start[i],-bottom,col=style$col[style$Type==annot$Type[i]]) | |
736 #} | |
737 #} | |
738 #} | |
739 #} | |
740 if(name_flags!="") { | |
741 flags=try(get(name_flags),TRUE) | |
742 if(class(flags)!="try-error") { | |
743 f_=flags[(flags$Chr==chr)&(flags$Stop>=debut_vue)&(flags$Start<=fin_vue),] | |
744 N=dim(f_)[1] | |
745 points(f_$Start,rep(0,N),col=2,pch=19,cex=2) | |
746 } | |
747 } | |
748 if(decal<=0) { | |
749 lines(c(0,0),c(top,-bottom),col=1) | |
750 } | |
751 } | |
752 | |
753 ##Intern function for heatmap visualisation | |
754 paletteFromColors = function(colMin="blue",colMax="red",n=300,method=c("hsv","rgb")) { | |
755 colMinRGB = col2rgb(colMin)[,1]/255 | |
756 colMaxRGB = col2rgb(colMax)[,1]/255 | |
757 seqList = list() | |
758 if(method[1]=="rgb") { | |
759 for(i in 1:3) { | |
760 seqList[[i]]=seq(colMinRGB[i],colMaxRGB[i],length.out=n) | |
761 } | |
762 return(rgb(seqList[[1]],seqList[[2]],seqList[[3]])) | |
763 } | |
764 else { | |
765 colMinHSV = rgb2hsv(colMinRGB) | |
766 colMaxHSV = rgb2hsv(colMaxRGB) | |
767 for(i in 1:3) { | |
768 seqList[[i]]=seq(colMinHSV[i],colMaxHSV[i],length.out=n) | |
769 } | |
770 return(hsv(seqList[[1]],s=seqList[[2]],v=seqList[[3]])) | |
771 } | |
772 } | |
773 | |
774 ##Retuns the heatmap visualisation | |
775 myHeatMap = function(data,debut_vue,fin_vue,ataxises=NULL,lim=NULL,heatmap_max_color="#000055",heatmap_min_color="#FFFFAA",heatmap_palette_method="hsv",textOnLeftSide="") { | |
776 palette=paletteFromColors(heatmap_min_color,heatmap_max_color,method=heatmap_palette_method) | |
777 if(is.null(lim)) { | |
778 image(debut_vue:fin_vue,1:dim(data)[1],t(data), col = palette,xlab="",ylab="",xaxt="n",yaxt="n") | |
779 } | |
780 else { | |
781 image(debut_vue:fin_vue,1:dim(data)[1],t(data), col = palette,xlab="",ylab="",xaxt="n",yaxt="n",zlim=lim) | |
782 } | |
783 box() | |
784 if(is.null(ataxises)) { | |
785 axis(1) | |
786 } | |
787 else { | |
788 if(sum(!is.na(ataxises))!=0) { | |
789 axis(1,at=ataxises,labels=FALSE) | |
790 axis(3,at=ataxises,labels=FALSE) | |
791 } | |
792 } | |
793 if(sum(!is.na(ataxises))!=0) { | |
794 axis(2,at=1:dim(data)[1],labels=rownames(data),las=2,cex.axis=1) | |
795 } | |
796 text(fin_vue+(fin_vue-debut_vue)*0.015,(dim(data)[1]+1)/2,textOnLeftSide,xpd=NA,cex=1,srt=-90) | |
797 } | |
798 | |
799 ##Returns the title of visualisation | |
800 plotTitle=function(debut_vue=1,fin_vue=length(listForward[[1]]),chr=NULL,style=NULL) { | |
801 plot(c(0,1),c(0,1),col="white",ylab="",xlab="",fg="white",col.axis="white",xaxs="i",yaxs="i") | |
802 text(0,0.5,paste(chr,":",debut_vue,"-",fin_vue,sep=""),cex=2.2,adj=0) | |
803 } | |
804 | |
805 ##The main function of visualisation | |
806 plotVisu = function(file,typeVisu="classic",listForward,listReverse,which=1:length(listForward),stranded=TRUE, | |
807 debut_vue=1,fin_vue=length(listForward[[1]]),chr=NULL,annot=NULL,style=NULL,tops=NULL,bottoms=NULL,marks=NULL,strandMarks=NULL, | |
808 titres="",repeated=FALSE,name_flags="",decal=0,log=TRUE,classic_plus_color="navyblue",classic_minus_color="mediumvioletred", | |
809 heatmap_max_color="#FFFFAA",heatmap_min_color="#000055",heatmap_palette_method="hsv", | |
810 lines_samples_colors=c(1,3,4,2)[((0:length(listForward))%%4)+1],lines_samples_type_line=((0:length(listForward))%%4)+1, | |
811 smoothLength=trunc((fin_vue-debut_vue)/1200),annotation_color_by_strand=FALSE,annotation_placed_by_strand=FALSE) { | |
812 n_element_vue = length(which) | |
813 i_data = which[1] | |
814 forward_matrice=NULL | |
815 reverse_matrice=NULL | |
816 all_stranded = data[[i_data]]$stranded | |
817 if(!repeated) { | |
818 for(i in 1:n_element_vue) { | |
819 i_data = which[i] | |
820 forward_matrice = rbind(forward_matrice,forward[[i_data]][debut_vue:fin_vue]) | |
821 reverse_matrice = rbind(reverse_matrice,reverse[[i_data]][debut_vue:fin_vue]) | |
822 all_stranded = all_stranded & data[[i_data]]$stranded | |
823 } | |
824 } | |
825 else{ | |
826 for(i in 1:n_element_vue) { | |
827 i_data = which[i] | |
828 forward_matrice = rbind(forward_matrice,forward[[i_data]]) | |
829 reverse_matrice = rbind(reverse_matrice,reverse[[i_data]]) | |
830 all_stranded = all_stranded & data[[i_data]]$stranded | |
831 } | |
832 } | |
833 rownames(forward_matrice)=titres[which] | |
834 rownames(reverse_matrice)=titres[which] | |
835 for( i in 1:n_element_vue){ | |
836 if(smoothLength>1) { | |
837 lo=smooth(forward_matrice[i,],L=smoothLength) | |
838 forward_matrice[i,]=lo | |
839 los=smooth(reverse_matrice[i,],L=smoothLength) | |
840 reverse_matrice[i,]=los | |
841 } | |
842 } | |
843 if(is.null(annot)) { | |
844 heights_ann=NULL | |
845 } | |
846 else { | |
847 heights_ann=sizePlotAnnotation(annot=annot,chr=chr,debut=debut_vue,fin=fin_vue,style=style,annotation_placed_by_strand=annotation_placed_by_strand)*50 | |
848 } | |
849 ataxises=pretty((debut_vue:fin_vue)+decal,n=14) | |
850 if(log){ | |
851 label_scal="log2 tag densities" | |
852 }else{ | |
853 label_scal="tag densities" | |
854 } | |
855 if(debut_vue==1&decal==0) { | |
856 ataxises[1]=1 | |
857 }##Classic visualisation | |
858 if(typeVisu=="classic") { | |
859 height_panels = c(40,rep(200,n_element_vue),heights_ann) | |
860 png(file,1200,sum(height_panels)) | |
861 prev=par(no.readonly = TRUE) | |
862 n_panels=length(height_panels) | |
863 layout(matrix(1:n_panels,n_panels,1),heights=height_panels) | |
864 par(mar=c(0, 5, 0, 4)+0.1) | |
865 plotTitle(debut_vue=debut_vue,fin_vue=fin_vue,chr=chr) | |
866 par(mar=c(2.5, 5, 2.5, 4)+0.1) | |
867 for(element in 1:n_element_vue) { | |
868 i_data = which[element] | |
869 plotRNAseq(listForward[[i_data]],listReverse[[i_data]],debut_vue,fin_vue,chr,annot,style,top=tops[min(element,length(tops))],bottom=bottoms[min(element,length(bottoms))],y=label_scal,titre=titres[min(i_data,length(titres))],name_flags=name_flags,repeated=repeated,decal=decal,ataxises=ataxises,classic_plus_color=classic_plus_color,classic_minus_color=classic_minus_color,stranded=stranded) | |
870 if(!is.null(marks)) { | |
871 if(is.null(tops)) { | |
872 top=max(forward[[i_data]]) | |
873 } | |
874 else { | |
875 top=tops[min(element,length(tops))] | |
876 } | |
877 if(is.null(bottoms)) { | |
878 bottom=max(reverse[[i_data]]) | |
879 } | |
880 else { | |
881 bottom=bottoms[min(element,length(bottoms))] | |
882 } | |
883 if(is.null(strandMarks)) { | |
884 segments(marks,top,y1=-bottom,col=2) | |
885 } | |
886 else { | |
887 if(strandMarks==0|!data[[i_data]]$stranded) { | |
888 segments(marks,0,y1=top,col=2) | |
889 } | |
890 else { | |
891 if(strandMarks==1) { | |
892 segments(marks,0,y1=-bottom,col=2) | |
893 } | |
894 else { | |
895 segments(marks,top,y1=-bottom,col=2) | |
896 } | |
897 } | |
898 } | |
899 } | |
900 } | |
901 par(mar=c(0,5,1,4)+0.1) | |
902 if(!is.null(annot)) { | |
903 plot_annotation(annot,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=1.5,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) | |
904 } | |
905 } | |
906 else {##Heatmap visualisation | |
907 if(typeVisu=="heatmap") { | |
908 if(stranded) { | |
909 height_panels=c(40,50*n_element_vue+28,heights_ann,50*n_element_vue+30,100) | |
910 } | |
911 else { | |
912 height_panels=c(40,50*n_element_vue+28,heights_ann,100) | |
913 } | |
914 png(file,1200,sum(height_panels)) | |
915 prev=par(no.readonly = TRUE) | |
916 n_panels=length(height_panels) | |
917 layout(matrix(1:n_panels,n_panels,1),heights=height_panels) | |
918 par(mar=c(0, 8, 0, 2)+0.1) | |
919 plotTitle(debut_vue=debut_vue,fin_vue=fin_vue,chr=chr) | |
920 limIntensity = c(0,max(forward_matrice,reverse_matrice,na.rm=TRUE)) | |
921 ataxisesLabels=as.character(ataxises) | |
922 ataxisesLabels[((1:length(ataxises))%%2)==0]="" | |
923 rownames(forward_matrice)=titres[which] | |
924 rownames(reverse_matrice)=titres[which] | |
925 par(mar=c(1, 8, 1.8, 2)+0.1,cex=1.1) | |
926 if(stranded) { | |
927 tmp="Plus strand" | |
928 } | |
929 else { | |
930 tmp="Both strands" | |
931 } | |
932 myHeatMap(forward_matrice[(dim(forward_matrice)[1]):1,],debut_vue+decal,fin_vue+decal,ataxises,lim=limIntensity,heatmap_max_color=heatmap_max_color,heatmap_min_color=heatmap_min_color,heatmap_palette_method=heatmap_palette_method,textOnLeftSide=tmp) | |
933 axis(3,at=ataxises,labels=FALSE,cex.axis=1.4) | |
934 axis(3,at=ataxises,labels=ataxisesLabels,cex.axis=1.4,line=-0.4,lwd=0) | |
935 if(!is.null(marks)) { | |
936 if(is.null(strandMarks)|!all_stranded) { | |
937 segments(marks,-2,y1=dim(forward_matrice)[1]+2,col=2,lwd=2,xpd=TRUE) | |
938 } | |
939 else { | |
940 if(strandMarks!=1) { | |
941 segments(marks,-2,y1=dim(forward_matrice)[1]+2,col=2,lwd=2,xpd=TRUE) | |
942 } | |
943 } | |
944 } | |
945 if(name_flags!="") { | |
946 flags=try(get(name_flags),TRUE) | |
947 if(class(flags)!="try-error") { | |
948 f_=flags[(flags$Chr==chr)&(flags$Stop>=debut_vue)&(flags$Start<=fin_vue),] | |
949 N=dim(f_)[1] | |
950 points(f_$Start,rep(0,N),col=2,pch=19,cex=2) | |
951 } | |
952 } | |
953 if(!is.null(annot)) { | |
954 par(mar=c(0,8,0,2)+0.1) | |
955 plot_annotation(annot,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=0.9,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) | |
956 } | |
957 if(stranded) { | |
958 par(mar=c(1, 8, 0, 2)+0.1) | |
959 myHeatMap(reverse_matrice[(dim(reverse_matrice)[1]):1,],debut_vue+decal,fin_vue+decal,ataxises,lim=limIntensity,heatmap_max_color=heatmap_max_color,heatmap_min_color=heatmap_min_color,heatmap_palette_method=heatmap_palette_method,textOnLeftSide="Minus strand") | |
960 if(!is.null(marks)) { | |
961 if(is.null(strandMarks)|!all_stranded) { | |
962 segments(marks,-1,y1=dim(forward_matrice)[1]+1,col=2,lwd=2,xpd=TRUE) | |
963 } | |
964 else { | |
965 if(strandMarks!=0) { | |
966 segments(marks,-1,y1=dim(forward_matrice)[1]+1,col=2,lwd=2,xpd=TRUE) | |
967 } | |
968 } | |
969 } | |
970 axis(1,at=ataxises,labels=ataxisesLabels,cex.axis=1.4) | |
971 } | |
972 palette=matrix(seq(limIntensity[1],limIntensity[2],length.out=2000),1,2000) | |
973 rownames(palette)="0" | |
974 par(mar=c(2,18,3, 12)+0.1) | |
975 myHeatMap(palette,1,2000,ataxises=NA,heatmap_max_color=heatmap_max_color,heatmap_min_color=heatmap_min_color,heatmap_palette_method=heatmap_palette_method) | |
976 labelAxisHeatmapLegend=pretty(c(limIntensity[1],limIntensity[2]),n=7) | |
977 atAxisHeatmapLegend=1+((labelAxisHeatmapLegend-limIntensity[1])/(limIntensity[2]-limIntensity[1]))*1999 | |
978 axis(1,at=atAxisHeatmapLegend,labels=FALSE,cex.axis=1.4) | |
979 axis(1,at=atAxisHeatmapLegend,labels=labelAxisHeatmapLegend,cex.axis=1.4,line=-0.4,lwd=0) | |
980 text(1000,2,label_scal,xpd=NA,font=2,cex=1.4) | |
981 | |
982 }##Lines visualisation | |
983 else if(typeVisu=="lines"){ | |
984 legendSize = (floor(n_element_vue/2)+n_element_vue%%2)*40 | |
985 height_panels=c(40,legendSize,400,heights_ann) | |
986 n_panels=length(height_panels) | |
987 png(file,1200,sum(height_panels)) | |
988 prev=par(no.readonly = TRUE) | |
989 par(mar=c(0, 5, 0,4)+0.1,cex=1.1) | |
990 layout(matrix(c(1:n_panels),n_panels,4),heights=height_panels) | |
991 par(mar=c(0, 5, 0,4)+0.1) | |
992 plotTitle(debut_vue=debut_vue,fin_vue=fin_vue,chr=chr) | |
993 lines_legend(n_element_vue=n_element_vue,which,titres,lines_samples_colors=lines_samples_colors,lines_samples_type_line=lines_samples_type_line) | |
994 par(mar=c(3, 5, 0,4)+0.1) | |
995 plotlines(forward_matrice,reverse_matrice,which=which,debut_vue, fin_vue , chr, annot, style, tops, bottoms,marks,strandMarks,titres, repeated,name_flags,decal,ataxises=ataxises,n_element_vue=n_element_vue,y=label_scal,lines_samples_colors=lines_samples_colors,lines_samples_type_line=lines_samples_type_line,stranded=stranded) | |
996 par(mar=c(0,5,0,4)+0.1) | |
997 if(!is.null(annot)) { | |
998 plot_annotation(annot,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=1.5,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) | |
999 } | |
1000 | |
1001 } | |
1002 } | |
1003 invisible(dev.off()) | |
1004 } | |
1005 | |
1006 ## | |
1007 smooth = function(X,L=10) { | |
1008 x_smooth=filter(X,rep(1,L)/L) | |
1009 x_smooth[is.na(x_smooth)]=0 | |
1010 return(x_smooth) | |
1011 } | |
1012 | |
1013 ##Returns lines visualisation | |
1014 plotlines = function(forward_matrice,reverse_matrice,which=1:length(forward), debut_vue = 1,fin_vue =length(forward), chr=NULL, annot=NULL, style=NULL, tops=NULL,bottoms=NULL,marks=NULL,strandMarks=NULL, titres="", repeated=FALSE,name_flags="",decal=0,ataxises=NULL,n_element_vue=length(which),y="",lines_samples_colors=c(1,3,4,2)[((0:length(forward))%%4)+1],lines_samples_type_line=((0:length(forward))%%4)+1,stranded=TRUE){ | |
1015 limIntensity = c(-max(bottoms),max(tops)) | |
1016 plot(c(debut_vue,fin_vue)+decal,limIntensity,col="white",ylab=y,main="",xaxs="i",xaxt="n",yaxt="n",cex.lab=1.2,xlab="",cex.lab=1.8) | |
1017 lty=lines_samples_type_line | |
1018 col=lines_samples_colors | |
1019 ataxises_y= pretty(limIntensity,n=8) | |
1020 for(i in ataxises) abline(v=i,lty=2,col="#808080") | |
1021 for(i in ataxises_y) abline(h=i,lty=2,col="#808080") | |
1022 for( i in 1:n_element_vue){ | |
1023 lo=forward_matrice[i,] | |
1024 lines((debut_vue:fin_vue)+decal,lo,type="l",lty =lty[i],col=col[i],lwd=2,xaxt="n") | |
1025 if(stranded) { | |
1026 los=-reverse_matrice[i,] | |
1027 lines((debut_vue:fin_vue)+decal,los,type="l",lty= lty[i],col=col[i],lwd=2,xaxt="n") | |
1028 } | |
1029 } | |
1030 ataxisesLabels=as.character(ataxises) | |
1031 ataxisesLabels[((1:length(ataxises))%%2)==0]="" | |
1032 ataxisesLabels_y=as.character(abs(ataxises_y)) | |
1033 ataxisesLabels_y[((1:length(ataxises_y))%%2)==0]="" | |
1034 axis(1,at=ataxises,labels=FALSE,cex.axis=2.6) | |
1035 axis(1,at=ataxises,labels=ataxisesLabels,cex.axis=2.6,line=0.6,lwd=0) | |
1036 axis(2, at=ataxises_y,labels=FALSE,cex.axis=2) | |
1037 axis(2, at=ataxises_y,labels=ataxisesLabels_y,cex.axis=2,line=-0.4,lwd=0) | |
1038 if(stranded) { | |
1039 abline(h=0,lwd=6) | |
1040 abline(h=0,col="white",lwd=4) | |
1041 text(fin_vue+(fin_vue-debut_vue)*0.01,tops/2,"+",xpd=NA,cex=3) | |
1042 text(fin_vue+(fin_vue-debut_vue)*0.01,-bottoms/2,"-",xpd=NA,cex=3) | |
1043 text(fin_vue+(fin_vue-debut_vue)*0.025,(tops-bottoms)/2,"Strand",xpd=NA,cex=2,srt=-90) | |
1044 } | |
1045 } | |
1046 | |
1047 ##Returns lines legend | |
1048 lines_legend=function(n_element_vue,which,titres,lines_samples_colors=c(1,3,4,2)[((0:n_element_vue)%%4)+1],lines_samples_type_line=((0:n_element_vue)%%4)+1){ | |
1049 lty=lines_samples_type_line | |
1050 col=lines_samples_colors | |
1051 n_y = floor(n_element_vue/2)+n_element_vue%%2 | |
1052 plot(c(0,4),c(0,-(n_y+1)),col="white", ylab="",xlab="",main="",fg="white",col.axis="white",yaxs="i") | |
1053 i_style=0 | |
1054 for(i in 1:n_y) { | |
1055 i_style=i_style+1 | |
1056 lines(c(1.5,2),-c(i,i),col=col[i_style],lty=lty[i_style],lwd=4) | |
1057 text(1.48,-i,titres[which[i_style]],cex=2.6,adj=1) | |
1058 } | |
1059 if(n_element_vue>1) { | |
1060 for(i in (n_y+1):n_element_vue) { | |
1061 i_style=i_style+1 | |
1062 lines(c(3.5,4),-c(i,i)+n_y,col=col[i_style],lty=lty[i_style],lwd=4) | |
1063 text(3.48,-i+n_y,titres[which[i_style]],cex=2.6,adj=1) | |
1064 } | |
1065 } | |
1066 } | |
1067 | |
1068 ##Returns the shape of plain arrow for the annotation | |
1069 plain_arrow = function(left,right,y,thickness=1,pickSize=(right-left)*0.1,pickSide=c("right","left","both","none"),col="blue") { | |
1070 if(pickSide[1]=="right") { | |
1071 pick_point = min(right - pickSize,right) | |
1072 polygon(c(left,pick_point,right,pick_point,left),c(y-thickness/2,y-thickness/2,y,y+thickness/2,y+thickness/2),col=col) | |
1073 } | |
1074 if(pickSide[1]=="left") { | |
1075 pick_point = max(left + pickSize,left) | |
1076 polygon(c(right,pick_point,left,pick_point,right),c(y-thickness/2,y-thickness/2,y,y+thickness/2,y+thickness/2),col=col) | |
1077 } | |
1078 if(pickSide[1]=="none") { | |
1079 polygon(c(right,left,left,right),c(y-thickness/2,y-thickness/2,y+thickness/2,y+thickness/2),col=col) | |
1080 } | |
1081 if(pickSide[1]=="both") { | |
1082 pick_point_1 = max(left + pickSize,left) | |
1083 pick_point_2 = min(right - pickSize,right) | |
1084 polygon(c(left,pick_point_1,pick_point_2,right,pick_point_2,pick_point_1),c(y,y-thickness/2,y-thickness/2,y,y+thickness/2,y+thickness/2),col=col) | |
1085 } | |
1086 } | |
1087 | |
1088 ##Returns the size of the panel of the annotation | |
1089 sizePlotAnnotation=function(annot,chr,debut,fin,style=NULL,annotation_placed_by_strand=FALSE) { | |
1090 left=c() | |
1091 right=c() | |
1092 annot_chr=annot[annot$Chr==chr,] | |
1093 unique_ID=unique(annot_chr$ID) | |
1094 for(j in 1:length(unique_ID)){ | |
1095 left[j]=min(annot_chr$Start[annot_chr$ID==unique_ID[j]]) | |
1096 right[j]=max(annot_chr$Stop[annot_chr$ID==unique_ID[j]]) | |
1097 } | |
1098 y_plot=parking(left,right) | |
1099 return(max(y_plot)-min(y_plot)+1) | |
1100 } | |
1101 | |
1102 ##Function to organise the annotation shapes to display | |
1103 parking = function(left,right,plus=rep(TRUE,length(left)),biggestOnesInside=TRUE) { | |
1104 y=rep(0,length(left)) | |
1105 if(sum(plus)>0) { | |
1106 left_plus=left[plus] | |
1107 right_plus=right[plus] | |
1108 y_plus=rep(0,sum(plus)) | |
1109 lengths_plus=right_plus-left_plus | |
1110 for(i in order(lengths_plus,decreasing=TRUE)) { | |
1111 otherleft_plus=left_plus | |
1112 otherleft_plus[i]=NA | |
1113 otherright_plus=right_plus | |
1114 otherright_plus[i]=NA | |
1115 placed=FALSE | |
1116 y_plus[i]=0.5 | |
1117 while(placed==FALSE) { | |
1118 placed=sum((right_plus[i]>otherleft_plus[y_plus==y_plus[i]])&(left_plus[i]<otherright_plus[y_plus==y_plus[i]]),na.rm=TRUE)==0 | |
1119 if(placed==FALSE) { | |
1120 y_plus[i]=y_plus[i]+1 | |
1121 } | |
1122 } | |
1123 } | |
1124 if(biggestOnesInside) { | |
1125 y[plus]=y_plus | |
1126 } | |
1127 else { | |
1128 y[plus]=sapply(y_plus,function(i) sort(unique(y_plus))[i==sort(unique(y_plus),decreasing=TRUE)]) | |
1129 } | |
1130 } | |
1131 if(sum(!plus)>0) { | |
1132 left_minus=left[!plus] | |
1133 right_minus=right[!plus] | |
1134 y_minus=rep(0,sum(!plus)) | |
1135 lengths_minus=right_minus-left_minus | |
1136 for(i in order(lengths_minus,decreasing=TRUE)) { | |
1137 otherleft_minus=left_minus | |
1138 otherleft_minus[i]=NA | |
1139 otherright_minus=right_minus | |
1140 otherright_minus[i]=NA | |
1141 placed=FALSE | |
1142 y_minus[i]=-0.5 | |
1143 while(placed==FALSE) { | |
1144 placed=sum((right_minus[i]>otherleft_minus[y_minus==y_minus[i]])&(left_minus[i]<otherright_minus[y_minus==y_minus[i]]),na.rm=TRUE)==0 | |
1145 if(placed==FALSE) { | |
1146 y_minus[i]=y_minus[i]-1 | |
1147 } | |
1148 } | |
1149 } | |
1150 if(biggestOnesInside) { | |
1151 y[!plus]=y_minus | |
1152 } | |
1153 else { | |
1154 y[!plus]=sapply(y_minus,function(i) sort(unique(y_minus))[i==sort(unique(y_minus),decreasing=TRUE)]) | |
1155 } | |
1156 } | |
1157 return(y) | |
1158 } | |
1159 | |
1160 ##Function to | |
1161 plot_annotation = function(annot,chr,debut,fin,style=NULL,textSize=par("cex"),annotation_color_by_strand=FALSE,annotation_placed_by_strand=FALSE) { | |
1162 left=c() | |
1163 right=c() | |
1164 annot_chr=annot[annot$Chr==chr,] | |
1165 N = dim(annot_chr)[1] | |
1166 strand=c("-","+") | |
1167 if(N>0) { | |
1168 unique_ID=unique(annot_chr$ID) | |
1169 par_prev=par(no.readonly=TRUE) | |
1170 for(j in 1:length(unique_ID)){ | |
1171 left[j]=min(annot_chr$Start[annot_chr$ID==unique_ID[j]]) | |
1172 right[j]=max(annot_chr$Stop[annot_chr$ID==unique_ID[j]]) | |
1173 } | |
1174 if(annotation_placed_by_strand) { | |
1175 y_plot=parking(left,right,annot_chr$Strand=="+",FALSE) | |
1176 } | |
1177 else { | |
1178 y_plot=parking(left,right,biggestOnesInside=FALSE) | |
1179 } | |
1180 plot(c(debut,fin),c(min(y_plot)-0.5,max(y_plot)+0.5),col="white",ylab="",xlab="",fg="white",col.axis="white",xaxs="i",yaxs="i") | |
1181 for(j in 1:length(unique_ID)){ | |
1182 annot_ID=annot_chr[annot_chr$ID==unique_ID[j],] | |
1183 x_text<-Inf | |
1184 for(i_annot_ID in 1:dim(annot_ID)[1]) { | |
1185 if(annot_ID$Strand[i_annot_ID]=="-") { | |
1186 orientation="left" | |
1187 } | |
1188 else { | |
1189 orientation="right" | |
1190 } | |
1191 iDraw = FALSE | |
1192 if(annot_ID$Strand[i_annot_ID]==".") { | |
1193 tmp="+" | |
1194 } | |
1195 else { | |
1196 tmp=annot_ID$Strand[i_annot_ID] | |
1197 } | |
1198 style_demande = style[style$Type==annot_ID$Type[i_annot_ID]&style$Strand==tmp,] | |
1199 x_text<-min(x_text,annot_ID$Start[i_annot_ID]) | |
1200 if(style_demande$shape=="box") { | |
1201 plain_arrow(annot_ID$Start[i_annot_ID],annot_ID$Stop[i_annot_ID],y_plot[j],thickness=0.5, pickSide=orientation,col=style_demande$col,pickSize=(fin-debut)*0.02) | |
1202 iDraw=TRUE | |
1203 } | |
1204 if(style_demande$shape =="line") { | |
1205 segments(annot_ID$Start[i_annot_ID],y_plot[j],annot_ID$Stop[i_annot_ID],y_plot[j],col=style_demande$col) | |
1206 iDraw=TRUE | |
1207 } | |
1208 if(style_demande$shape == "rectangle") { | |
1209 plain_arrow(annot_ID$Start[i_annot_ID],annot_ID$Stop[i_annot_ID],y_plot[j],thickness=0.5, pickSide="none",col=style_demande$col) | |
1210 iDraw=TRUE | |
1211 } | |
1212 if(style_demande$shape == "arrow") { | |
1213 arrowHeads = pretty(debut:fin,n=50) | |
1214 x<-c(annot_ID$Start[i_annot_ID],arrowHeads[arrowHeads>annot_ID$Start[i_annot_ID]&arrowHeads<annot_ID$Stop[i_annot_ID]],annot_ID$Stop[i_annot_ID]) | |
1215 if(annot_ID$Strand[i_annot_ID]=="-") { | |
1216 arrows(x[2:length(x)],y_plot[j],x[1:length(x)-1],col=style_demande$col,length=0.08) | |
1217 } | |
1218 else { | |
1219 arrows(x[1:length(x)-1],y_plot[j],x[2:length(x)],col=style_demande$col,length=0.08) | |
1220 } | |
1221 iDraw=TRUE | |
1222 } | |
1223 if(iDraw == FALSE) { | |
1224 segments(annot_ID$Start[i_annot_ID],y_plot[j],annot_ID$Stop[i_annot_ID],y_plot[j],col="black") | |
1225 } | |
1226 } | |
1227 text(max(x_text,debut),y_plot[j]+0.4,adj=0,labels=unique_ID[j],cex=textSize) | |
1228 } | |
1229 if(annotation_placed_by_strand) { | |
1230 abline(h=0.125,lty=2,col="black") | |
1231 } | |
1232 } | |
1233 } | |
1234 | |
1235 # produit la visualisation de la légende | |
1236 plotLegend= function(style,width_plot=400,length_arrows=100,type="horizontal") { | |
1237 if(type=="horizontal") { | |
1238 style_unique = unique(style$col) | |
1239 n_style = length(style_unique) | |
1240 plot(c(0,width_plot),c(1,2*n_style+1),col="white",ylab="",xlab="",xaxs="i",fg="white",col.axis="white",main="Legend") | |
1241 for(i in 1:n_style) { | |
1242 arrows(ceiling((width_plot-length_arrows)/2),2*(n_style-i+1),ceiling((width_plot+length_arrows)/2),2*(n_style-i+1),col=style_unique[i],length=0.10) | |
1243 text(ceiling(width_plot/2),2*(n_style-i+1)+0.5,paste(style$Type[style$col==style_unique[i]],collapse=", "),col=style_unique[i]) | |
1244 } | |
1245 } | |
1246 else { | |
1247 style_unique = unique(style$col) | |
1248 n_style = length(style_unique) | |
1249 length_arrows = width_plot/(n_style*1.25) | |
1250 inbetween = length_arrows/4 | |
1251 plot(c(0,width_plot),c(0,1),col="white",ylab="",xlab="",yaxs="i",xaxt="n",yaxt="n",bty="n",xaxs="i",col.axis="white",main="") | |
1252 text(width_plot/2,0.8,"Legend",cex=1.4,font=2) | |
1253 for(i in 1:n_style) { | |
1254 fleche_start = (length_arrows+inbetween)*(i-1) + inbetween/2 | |
1255 fleche_stop = (length_arrows+inbetween)*(i-1) + length_arrows + inbetween/2 | |
1256 # arrows(fleche_start,0.2,fleche_stop,0.2,col=style_unique[i],length=0.10) | |
1257 rect(fleche_start,0.2,fleche_stop,0.6,border=style_unique[i]) | |
1258 text((fleche_start+fleche_stop)/2,0.4,paste(style$Type[style$col==style_unique[i]],collapse=", "),col=style_unique[i],cex=1.2) | |
1259 } | |
1260 } | |
1261 } | |
1262 | |
1263 html2rgb = function(codeHTML) { | |
1264 chiffres=c(0:9,"A","B","C","D","E","F") | |
1265 codeHTMLsplit=unlist(strsplit(codeHTML,split="")) | |
1266 red = (((which(chiffres==codeHTMLsplit[2])-1)*16)+(which(chiffres==codeHTMLsplit[3])-1))/255 | |
1267 green = (((which(chiffres==codeHTMLsplit[4])-1)*16)+(which(chiffres==codeHTMLsplit[5])-1))/255 | |
1268 blue = (((which(chiffres==codeHTMLsplit[6])-1)*16)+(which(chiffres==codeHTMLsplit[7])-1))/255 | |
1269 return(c(red,green,blue)) | |
1270 } | |
1271 | |
1272 rgb2hsv = function(vectRGB) { | |
1273 Cmax=max(vectRGB) | |
1274 Cmin=min(vectRGB) | |
1275 delta=Cmax-Cmin | |
1276 if(delta==0) { | |
1277 H=0 | |
1278 } | |
1279 else { | |
1280 if(Cmax==vectRGB[1]) H=(1/6)*(((vectRGB[2]-vectRGB[3])/delta)%%6) | |
1281 if(Cmax==vectRGB[2]) H=(1/6)*(((vectRGB[3]-vectRGB[1])/delta)+2) | |
1282 if(Cmax==vectRGB[3]) H=(1/6)*(((vectRGB[1]-vectRGB[2])/delta)+4) | |
1283 } | |
1284 if(delta==0) { | |
1285 S=0 | |
1286 } | |
1287 else { | |
1288 S=delta/Cmax | |
1289 } | |
1290 V=Cmax | |
1291 | |
1292 return(c(H,S,V)) | |
1293 } | |
1294 | |
1295 ### ========================================================================= | |
1296 ### Ving's Main | |
1297 ### ------------------------------------------------------------------------- | |
1298 ### | |
1299 { | |
1300 if(version$major != "3" & version$minor != "0.2") { | |
1301 warning("You need 3.0.2 R Version to run the program") | |
1302 } | |
1303 tmp = unlist(strsplit(commandArgs()[4],split="="))[2] | |
1304 path_split = unlist(strsplit(tmp,split="/")) | |
1305 path = paste(path_split[1:(length(path_split)-1)],collapse="/") | |
1306 arg<-commandArgs(TRUE) | |
1307 if(length(arg)==0) { | |
1308 cat("Ving for visualisation of RNA seq data: | |
1309 Usage: | |
1310 ./visualiseur.R [options] <input> [<input2> <input3> ...] | |
1311 options: | |
1312 -o/--output <string> [default:./ouput.png] | |
1313 -v/--typeVisu <string> [default: classic ] (classic,lines,heatmap) | |
1314 -t/--description-data <string> [default: filename ] | |
1315 -n/--normalization-coefficient <string> [default: none ] | |
1316 -i/--inverseStrand | |
1317 -u/--unstranded | |
1318 -c/--chromosome-name <string> [default: first one ] | |
1319 -d/--start <integer> [default: 1 ] | |
1320 -f/--end <integer> [default: chr length] | |
1321 -a/--annotation <filename> | |
1322 -r/--typeTranscript <string> [default: all ] | |
1323 -C/--annotation-colors <string> [default: fabulous ] | |
1324 -s/--annotation-shapes <string> [default: box ] (box,line,rectangle,arrow) | |
1325 -l/--scale-log | |
1326 -y/--symetric-scale | |
1327 --classic-plus-color <string> [default: purple ] | |
1328 --classic-minus-color <string> [default: pink ] | |
1329 --heatmap-max-color <string> [default: green ] | |
1330 --heatmap-min-color <string> [default: yellow ] | |
1331 --heatmap-palette-method <string> [default: hsv ] (hsv,rgb) | |
1332 --lines-samples-colors <string> [default: black ] | |
1333 --lines-samples-type-line <string> [default: plain ] | |
1334 --annotation-color-by-strand <logical> [default: FALSE ] | |
1335 --annotation-placed-by-strand <logical> [default: FALSE ] | |
1336 \n\n") | |
1337 q("no") | |
1338 }else { | |
1339 tmp=suppressPackageStartupMessages(require("Rsamtools")) | |
1340 if(!tmp) { | |
1341 stop("Package Rsamtools required !!") | |
1342 } | |
1343 tmp=suppressPackageStartupMessages(require("GenomicRanges")) | |
1344 if(!tmp) { | |
1345 stop("Package GenomicRanges required !!") | |
1346 } | |
1347 optArgs=getopt( | |
1348 rbind( | |
1349 | |
1350 c('output','o', 1, 'character',"output.png"), | |
1351 c('typeVisu', 'v', 1, 'character', "classic"), | |
1352 c('description-data','t',1,'character',NA), | |
1353 c('chromosome-name', 'c', 1, 'character', NA), | |
1354 c('start', 'd', 1, 'numeric',1), | |
1355 c('end', 'f', 1, 'numeric',NA), | |
1356 c('annotation','a',1,'character',NA), | |
1357 c('typeTranscript','r',1,'character',NA), | |
1358 c('annotation-colors','C',1,'character',NA), | |
1359 c('annotation-shapes','s','1','character',NA), | |
1360 c('normalization-coefficient','n','1','character',NA), | |
1361 c('classic-plus-color',1,1,'character',"navyblue"), | |
1362 c('classic-minus-color',2,1,'character',"mediumvioletred"), | |
1363 c('heatmap-max-color',3,1,'character',"#000055"), | |
1364 c('heatmap-min-color',4,1,'character',"#FFFFAA"), | |
1365 c('heatmap-palette-method',7,1,'character',"hsv"), | |
1366 c('lines-samples-colors',5,1,'character',NA), | |
1367 c('lines-samples-type-line',6,1,'character',NA), | |
1368 c('scale-log', 'l',0,'logical',FALSE), | |
1369 c('inverseStrand','i',0,'logical', FALSE), | |
1370 c('unstranded','u',0,'logical', FALSE), | |
1371 c('symetric-scale','y',0,'logical', FALSE), | |
1372 c('annotation-color-by-strand',8,0,'logical',FALSE), | |
1373 c('annotation-placed-by-strand',9,0,'logical',FALSE) | |
1374 ) | |
1375 ) | |
1376 } | |
1377 ################### | |
1378 ## ARGUMENTS | |
1379 ################################################################################# | |
1380 files=optArgs$ARGUMENT | |
1381 ##Case file doesn't exist | |
1382 for( i in 1:length(files)) { | |
1383 if(!file.exists(files[i])) { | |
1384 stop(paste(files[i],"do not exists!","\n")) | |
1385 } | |
1386 } | |
1387 imagefile=optArgs$output | |
1388 typeVisu=optArgs$typeVisu | |
1389 description_data=optArgs$`description-data` | |
1390 chrName = optArgs$`chromosome-name`[1] | |
1391 start=optArgs$start[1] | |
1392 end=optArgs$end[1] | |
1393 annotation=optArgs$annotation | |
1394 typeTranscript=optArgs$typeTranscript | |
1395 colors=optArgs$`annotation-colors` | |
1396 shape_data=optArgs$`annotation-shapes` | |
1397 weight=optArgs$`normalization-coefficient` | |
1398 classic_plus_color=optArgs$`classic-plus-color` | |
1399 classic_minus_color=optArgs$`classic-minus-color` | |
1400 heatmap_max_color=optArgs$`heatmap-max-color` | |
1401 heatmap_min_color=optArgs$`heatmap-min-color` | |
1402 heatmap_palette_method=optArgs$`heatmap-palette-method` | |
1403 lines_samples_colors=optArgs$`lines-samples-colors` | |
1404 lines_samples_type_line=optArgs$`lines-samples-type-line` | |
1405 log=optArgs$`scale-log` | |
1406 inverseStrand=optArgs$inverseStrand | |
1407 unstranded=optArgs$unstranded | |
1408 symetric= optArgs$`symetric-scale` | |
1409 annotation_color_by_strand=optArgs$`annotation-color-by-strand` | |
1410 annotation_placed_by_strand=optArgs$`annotation-placed-by-strand` | |
1411 ################### | |
1412 ## MAIN | |
1413 ################################################################################### | |
1414 | |
1415 | |
1416 genome_info=scanBamHeader(files[1])[[1]]$targets | |
1417 noms_chromosomes = names(genome_info) | |
1418 longueur_chromosomes = as.integer(genome_info) | |
1419 nombre_chromosomes = length(noms_chromosomes) | |
1420 | |
1421 ##Case no chromosome specified | |
1422 if(sum(is.na(chrName))) { | |
1423 chrName = noms_chromosomes[1] | |
1424 cat("No chromosome specified, processing chromosome :",chrName,"\n") | |
1425 } | |
1426 | |
1427 | |
1428 ##Case false chromosome name | |
1429 if(!(chrName %in% noms_chromosomes)) { | |
1430 stop(paste("\"",chrName,"\" is not a proper chromosome name")) | |
1431 } | |
1432 | |
1433 if(is.na(end)) { | |
1434 end = longueur_chromosomes[chrName==noms_chromosomes] | |
1435 } | |
1436 | |
1437 if(start > end) { | |
1438 stop("The start is bigger than the end!") | |
1439 } | |
1440 | |
1441 ##Case asked coordinates outside the chromosome | |
1442 if(start<0|end>longueur_chromosomes[chrName==noms_chromosomes]) { | |
1443 stop("You are outside the chromosome") | |
1444 } | |
1445 | |
1446 if(sum(is.na(weight))>0) { | |
1447 normalized_data=NULL | |
1448 isnormalized=FALSE | |
1449 } | |
1450 else { | |
1451 isnormalized=TRUE | |
1452 normalized_data=unlist(strsplit(weight,split=",")) | |
1453 if(length(files)!=length(normalized_data)) { | |
1454 stop("Different number of files and weights ") | |
1455 } | |
1456 else { | |
1457 normalized_data=as.numeric(normalized_data) | |
1458 } | |
1459 } | |
1460 | |
1461 if(inverseStrand) { | |
1462 libraryType="inverse" | |
1463 } | |
1464 else { | |
1465 libraryType="standard" | |
1466 } | |
1467 ##Read the bam file and extract the infos | |
1468 doit=function(i,libraryType) { | |
1469 | |
1470 try(readBam_(files[i], libraryType=libraryType, chrName_=chrName, from_=start, to_=end,normalized_=normalized_data[i])) | |
1471 } | |
1472 data=lapply(1:length(files),doit,libraryType=libraryType) | |
1473 ctrl=unlist(lapply(data,class))=="try-error" | |
1474 | |
1475 if(sum(ctrl)>0) { | |
1476 for(i in which(ctrl)) { | |
1477 cat("Problem with file :",files[i],"\n") | |
1478 } | |
1479 stop("At least a file has problem") | |
1480 } | |
1481 | |
1482 ##Read the GFF file and extract the infos | |
1483 if(sum(is.na(annotation))==0) { | |
1484 gff=try(readGff(annotation[1],from=start,to=end,chr=chrName),TRUE) | |
1485 ctrl=class(gff)=="try-error" | |
1486 if(sum(ctrl)>0) { | |
1487 stop(paste("Problem with Gff file :",annotation,"\n")) | |
1488 } | |
1489 if(length(annotation)>1) { | |
1490 for(i in 2:length(annotation)) { | |
1491 gff1=try(readGff(annotation[i],from=start,to=end,chr=chrName),TRUE) | |
1492 ctrl=class(gff1)=="try-error" | |
1493 if(sum(ctrl)>0) { | |
1494 stop(paste("Problem with Gff file :",gff1,"\n")) | |
1495 } | |
1496 gff=rbind(gff,gff1) | |
1497 } | |
1498 } | |
1499 | |
1500 } | |
1501 else { | |
1502 gff=NA | |
1503 } | |
1504 | |
1505 | |
1506 if(sum(is.na(description_data))>0) { | |
1507 description_data = files | |
1508 } | |
1509 ##Case of different number of files and description | |
1510 if(length(description_data)!=length(files)) { | |
1511 stop("Different number of files and description") | |
1512 } | |
1513 | |
1514 if(sum(is.na(typeTranscript)>0)) { | |
1515 if(sum(is.na(annotation))==0) { | |
1516 typeTranscritSplit=unique(gff$Type) | |
1517 }else { | |
1518 typeTranscritSplit=NA | |
1519 } | |
1520 }else { | |
1521 typeTranscritSplit=unlist(strsplit(typeTranscript,split=",")) | |
1522 } | |
1523 | |
1524 if((sum(is.na(gff))>0)|(sum(is.na(typeTranscritSplit))>0)) { | |
1525 annot=NULL | |
1526 }else { | |
1527 annot = gff[gff$Type %in% typeTranscritSplit,] | |
1528 } | |
1529 | |
1530 ##check the colors | |
1531 | |
1532 if(sum(is.na(colors)>0)) { | |
1533 nTypeTranscrit = length(typeTranscritSplit) | |
1534 if(annotation_color_by_strand) { | |
1535 colorsSplit=c(classic_plus_color,classic_minus_color) | |
1536 } | |
1537 else { | |
1538 colorsSplit=rainbow(nTypeTranscrit+1)[1:nTypeTranscrit] | |
1539 } | |
1540 } | |
1541 else { | |
1542 colorsSplit=unlist(strsplit(colors,split=",")) | |
1543 } | |
1544 for(i in 1:length(colorsSplit)) { | |
1545 tmp=unlist(strsplit(colorsSplit[i],split="")) | |
1546 if(length(tmp)==6|length(tmp)==8) { | |
1547 if(sum(tmp %in% c(0:9,"A","B","C","D","E","F"))==length(tmp)) { | |
1548 colorsSplit[i] = paste("#",colorsSplit[i],sep="") | |
1549 } | |
1550 } | |
1551 } | |
1552 | |
1553 tmp=unlist(strsplit(classic_plus_color,split="")) | |
1554 if(length(tmp)==6|length(tmp)==8) { | |
1555 if(sum(tmp %in% c(0:9,"A","B","C","D","E","F"))==length(tmp)) { | |
1556 classic_plus_color = paste("#",classic_plus_color,sep="") | |
1557 } | |
1558 } | |
1559 tmp=unlist(strsplit(classic_minus_color,split="")) | |
1560 if(length(tmp)==6|length(tmp)==8) { | |
1561 if(sum(tmp %in% c(0:9,"A","B","C","D","E","F"))==length(tmp)) { | |
1562 classic_minus_color = paste("#",classic_minus_color,sep="") | |
1563 } | |
1564 } | |
1565 tmp=unlist(strsplit(heatmap_max_color,split="")) | |
1566 if(length(tmp)==6|length(tmp)==8) { | |
1567 if(sum(tmp %in% c(0:9,"A","B","C","D","E","F"))==length(tmp)) { | |
1568 heatmap_max_color = paste("#",heatmap_max_color,sep="") | |
1569 } | |
1570 } | |
1571 tmp=unlist(strsplit(heatmap_min_color,split="")) | |
1572 if(length(tmp)==6|length(tmp)==8) { | |
1573 if(sum(tmp %in% c(0:9,"A","B","C","D","E","F"))==length(tmp)) { | |
1574 heatmap_min_color = paste("#",heatmap_min_color,sep="") | |
1575 } | |
1576 } | |
1577 if(sum(is.na(lines_samples_colors))>0) { | |
1578 lines_samples_colors_split=c(1,3,4,2)[((0:(length(files)-1))%%4)+1] | |
1579 } | |
1580 else{ | |
1581 lines_samples_colors_split=unlist(strsplit(lines_samples_colors,split=",")) | |
1582 for(i in 1:length(lines_samples_colors_split)) { | |
1583 tmp=unlist(strsplit(lines_samples_colors_split[i],split="")) | |
1584 if(length(tmp)==6|length(tmp)==8) { | |
1585 if(sum(tmp %in% c(0:9,"A","B","C","D","E","F"))==length(tmp)) { | |
1586 lines_samples_colors_split[i] = paste("#",lines_samples_colors_split[i],sep="") | |
1587 } | |
1588 } | |
1589 } | |
1590 } | |
1591 colorToCheck=c(colorsSplit,classic_plus_color,classic_minus_color,heatmap_max_color,heatmap_min_color,lines_samples_colors_split) | |
1592 ctrl=sapply(colorToCheck,is.acceptable.color) | |
1593 if(sum(!ctrl)>0) { | |
1594 for(i in which(!ctrl)) { | |
1595 cat("\"",colorToCheck[i],"\" is not a proper color name.\n",sep="") | |
1596 } | |
1597 stop("At least one color has problem") | |
1598 } | |
1599 if(annotation_color_by_strand){ | |
1600 if(length(colorsSplit)>2) { | |
1601 stop("You have to specify two and only two colors!") | |
1602 } | |
1603 }else { | |
1604 if(length(typeTranscritSplit)!=length(colorsSplit)) { | |
1605 stop("Please specify the same number of transcript types and colors") | |
1606 } | |
1607 } | |
1608 ##check the line types | |
1609 if(sum(is.na(lines_samples_type_line))>0) { | |
1610 lines_samples_type_line_split=((0:(length(files)-1))%/%4)+1 | |
1611 } | |
1612 else { | |
1613 lines_samples_type_line_split=unlist(strsplit(lines_samples_type_line,split=",")) | |
1614 } | |
1615 if(typeVisu=="lines") { | |
1616 ctrl=sapply(lines_samples_type_line_split,function(x) { | |
1617 tmp=suppressWarnings(as.numeric(x)) | |
1618 if(!is.na(tmp)) { | |
1619 return((tmp==floor(tmp))&tmp>=1&tmp<=5) | |
1620 } | |
1621 else { | |
1622 return(FALSE) | |
1623 } | |
1624 } | |
1625 ) | |
1626 if(sum(!ctrl)>0) { | |
1627 for(i in which(!ctrl)) { | |
1628 cat("\"",lines_samples_type_line_split[i],"\" is not a proper line style.\n",sep="") | |
1629 } | |
1630 stop("At least one line style has problem") | |
1631 } | |
1632 lines_samples_type_line_split=as.integer(lines_samples_type_line_split) | |
1633 } | |
1634 ##check the shapes | |
1635 type_shape=rep(1,length(typeTranscritSplit)) | |
1636 | |
1637 if(sum(is.na(shape_data)>0)) { | |
1638 for( i in 1:length(typeTranscritSplit)) { | |
1639 type_shape[i]="box" | |
1640 } | |
1641 }else{ | |
1642 shape=unlist(strsplit(shape_data,split=",")) | |
1643 shape=as.array(shape) | |
1644 if(length(typeTranscritSplit)!=length(shape)) { | |
1645 stop("Please specify the same number of transcript types and shapes") | |
1646 }else { | |
1647 for( i in 1:length(typeTranscritSplit)){ | |
1648 type_shape[i]= shape[[i]] | |
1649 } | |
1650 } | |
1651 } | |
1652 | |
1653 | |
1654 ##Style for the annotation | |
1655 label=rep(1,length(typeTranscritSplit)) | |
1656 style=data.frame(Type=c(typeTranscritSplit,typeTranscritSplit),Strand=c(rep("+",length(typeTranscritSplit)),rep("-",length(typeTranscritSplit))),col=NA,shape=NA,label,stringsAsFactors = FALSE) | |
1657 for(i in 1:length(typeTranscritSplit)) { | |
1658 style$shape[style$Type==typeTranscritSplit[i]]=type_shape[i] | |
1659 } | |
1660 if(annotation_color_by_strand) { | |
1661 style$col[style$Strand=="+"]=colorsSplit[1] | |
1662 style$col[style$Strand=="-"]=colorsSplit[2] | |
1663 } | |
1664 else { | |
1665 for(i in 1:length(typeTranscritSplit)) { | |
1666 style$col[style$Type==typeTranscritSplit[i]]=colorsSplit[i] | |
1667 } | |
1668 } | |
1669 | |
1670 ##Main for visualisation | |
1671 databychr=extractSignal(data,chrName,from=start,to=end,normalized_=isnormalized) | |
1672 | |
1673 { | |
1674 reverse=list() | |
1675 forward=list() | |
1676 if(log) { | |
1677 for(i in 1:length(databychr$F)){ | |
1678 forward_ = numeric() | |
1679 tmp=log2(1+databychr$F[[i]]) | |
1680 forward_[1:length(tmp)]=tmp | |
1681 forward[[i]]=forward_ | |
1682 reverse_ = numeric() | |
1683 tmp=log2(1+databychr$R[[i]]) | |
1684 reverse_[1:length(tmp)]=tmp | |
1685 reverse[[i]]=reverse_ | |
1686 } | |
1687 } | |
1688 else { | |
1689 forward=databychr$F | |
1690 reverse=databychr$R | |
1691 } | |
1692 } | |
1693 | |
1694 smoothLength=max(trunc((end-start)/1200),1) | |
1695 if(unstranded) { | |
1696 for(i in 1:length(databychr$F)){ | |
1697 lo=smooth(forward[[i]]+reverse[[i]],L=smoothLength) | |
1698 forward[[i]]=lo | |
1699 los=rep(0,length(lo)) | |
1700 reverse[[i]]=los | |
1701 } | |
1702 } | |
1703 else { | |
1704 for(i in 1:length(databychr$F)){ | |
1705 lo=smooth(forward[[i]],L=smoothLength) | |
1706 forward[[i]]=lo | |
1707 los=smooth(reverse[[i]],L=smoothLength) | |
1708 reverse[[i]]=los | |
1709 } | |
1710 } | |
1711 | |
1712 group_maximum = rep(1,length(databychr$F)) | |
1713 max_forward = numeric(length(databychr$F)) | |
1714 max_reverse = numeric(length(databychr$F)) | |
1715 for(i_data in 1:length(databychr$F)) { | |
1716 max_forward[i_data]=max(forward[[i_data]],na.rm=TRUE) | |
1717 max_reverse[i_data]=max(reverse[[i_data]],na.rm=TRUE) | |
1718 } | |
1719 for(i_max in unique(group_maximum)) { | |
1720 max_forward[group_maximum==i_max]=max(max_forward[group_maximum==i_max],na.rm=TRUE) | |
1721 max_reverse[group_maximum==i_max]=max(max_reverse[group_maximum==i_max],na.rm=TRUE) | |
1722 } | |
1723 | |
1724 if(symetric){ | |
1725 for(i_data in 1:length(databychr$F)) { | |
1726 max_forward[i_data]=max(max_forward[i_data],max_reverse[i_data],na.rm=TRUE) | |
1727 } | |
1728 max_reverse=max_forward | |
1729 } | |
1730 | |
1731 plotVisu(imagefile,typeVisu=typeVisu,listForward=forward,listReverse=reverse, | |
1732 debut_vue=start,fin_vue=end,chr=chrName,annot=annot,repeated=TRUE, | |
1733 titres=description_data,name_flags="",style=style,log=log,stranded=!unstranded, | |
1734 tops=max_forward,bottoms=max_reverse, | |
1735 classic_plus_color=classic_plus_color,classic_minus_color=classic_minus_color, | |
1736 heatmap_max_color=heatmap_max_color,heatmap_min_color=heatmap_min_color,heatmap_palette_method=heatmap_palette_method, | |
1737 lines_samples_colors=lines_samples_colors_split,lines_samples_type_line=lines_samples_type_line_split, | |
1738 smoothLength=1,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) | |
1739 | |
1740 } |