Mercurial > repos > rlegendre > ving
changeset 4:5ed41b948030 draft
Uploaded
author | rlegendre |
---|---|
date | Tue, 29 Jul 2014 09:47:47 -0400 |
parents | d1993299c96e |
children | 0cec66d7f637 |
files | ving.R |
diffstat | 1 files changed, 291 insertions(+), 152 deletions(-) [+] |
line wrap: on
line diff
--- a/ving.R Tue Jul 29 05:49:34 2014 -0400 +++ b/ving.R Tue Jul 29 09:47:47 2014 -0400 @@ -1,24 +1,5 @@ #! /usr/bin/Rscript -# Il est interdit de mettre ARGUMENT comme argument long - - - -# spec contains at least 4 columns, as many as 5 columns. -# -# column 1: long name of flag -# -# column 2: short name of flag -# -# column 3: argument flag. 0=without argument, 1=with argument -# -# column 4: mode of argument. one of "logical", "integer", "double", -# "complex", "character", "" for arguments without value (return logical) -# -# column 5 (optional): description of ARGUMENT. -# - - RetreiveAndDestroy=function(opt,root,stem,regexp,SearchNames,Out,isValue,NbFound,StockNames){ Bool=lapply(paste(root,SearchNames,stem,sep=""),grepl,opt) names(Bool)=StockNames @@ -163,10 +144,6 @@ return(Out) } -# column 3: argument flag. 0=no argument, 1=required argument, 2=optional argument - - - ### ========================================================================= ### Ving's methods ### ------------------------------------------------------------------------- @@ -352,9 +329,8 @@ which = eval(expr) what<-c("flag","mpos","cigar","mrnm","isize") param<- ScanBamParam(what=what, which=which) - G_alignement<- readGAlignmentsFromBam(file=file_, param=param, use.names=TRUE) ##Case of no reads on the chromosome - start=start(G_alignement) + start=extractFromBam(file=file_,which=which,what="pos") if(length(start) == 0 ){ brin_F[[i]] = list(numeric(),numeric(),numeric()) brin_R[[i]] = list(numeric(),numeric(),numeric()) @@ -362,19 +338,29 @@ brin_R_junction[[i]] = list(numeric(),numeric(),numeric()) } else { - end=end(G_alignement) - strand=as.character(strand(G_alignement)) - flag=mcols(G_alignement)$flag - mpos=mcols(G_alignement)$mpos - cigar=mcols(G_alignement)$cigar - mrnm=mcols(G_alignement)$mrnm - isize=mcols(G_alignement)$isize + strand=extractFromBam(file=file_,which=which,what="strand") + flag=extractFromBam(file=file_,which=which,what="flag") + mpos=extractFromBam(file=file_,which=which,what="mpos") + cigar=extractFromBam(file=file_,which=which,what="cigar") + mrnm=extractFromBam(file=file_,which=which,what="mrnm") + isize=extractFromBam(file=file_,which=which,what="isize") first_read=integer.base.b(flag)[,5]==1 strand[strand =="+" & !first_read ]="*" strand[strand =="-" & !first_read ] ="+" strand[strand =="*" & !first_read ] ="-" +##CIGAR's interpreter + cigar_interprete = interpreteCIGAR(cigar) + longueur_cigar = lapply(cigar_interprete,calcule_longueur_cigar) + junction_cigar = lapply(cigar_interprete,calcule_junction_cigar) + debut_junction = numeric() + fin_junction = numeric() + brin_junction = numeric() + i_junction = 0 + + end=start+sapply(1:length(cigar),function(j) longueur_cigar[[cigar[j]]]) + ##Case of pairend reads is_on_same_chr = mrnm==nom_chromo is_on_same_chr[is.na(is_on_same_chr)] = FALSE @@ -404,14 +390,6 @@ brin_F[[i]] = compresse_coordonnees(debut_frag_plus,fin_frag_plus) brin_R[[i]] = compresse_coordonnees(debut_frag_moins,fin_frag_moins) -##CIGAR's interpreter - cigar_interprete = interpreteCIGAR(cigar) - longueur_cigar = lapply(cigar_interprete,calcule_longueur_cigar) - junction_cigar = lapply(cigar_interprete,calcule_junction_cigar) - debut_junction = numeric() - fin_junction = numeric() - brin_junction = numeric() - i_junction = 0 ##junction read for(j in 1:length(cigar)) { junctions_ = junction_cigar[[cigar[j]]] @@ -420,7 +398,7 @@ i_junction = i_junction + 1 debut_junction[i_junction] = start[j] + junctions_[[k]][1] - 1 fin_junction[i_junction] = start[j] + junctions_[[k]][2] - 1 - brin_junction[i_junction] = strand[j] + brin_junction[i_junction] = as.character(strand[j]) } } } @@ -550,6 +528,124 @@ } } +## Returns an adjusted bamHandler object +ajustBam = function(bamHandler,coeff=1) { + if(class(bamHandler)=="try-error") { + return(bamHandler) + } + else { + bamHandler_ = bamHandler + L = length(bamHandler_$F) + for(i in 1:L) { + bamHandler_$F[[i]][[4]]=bamHandler_$F[[i]][[3]]*coeff + bamHandler_$R[[i]][[4]]=bamHandler_$R[[i]][[3]]*coeff + bamHandler_$junctions_F[[i]][[4]]=bamHandler_$junctions_F[[i]][[3]]*coeff + bamHandler_$junctions_R[[i]][[4]]=bamHandler_$junctions_R[[i]][[3]]*coeff + } + bamHandler_$norm=TRUE + return(bamHandler_) + } + } + +##Returns an ajusted bamHandler +normalizeBams = function(data,poids,which=1:length(poids),relative=TRUE) { + N=length(data) + j=0 + retour = list() + for(i in 1:N) { + + if(i %in% which) { + j = j + 1 + if(relative) { + coeff=min(poids)/poids[j] + } + else { + coeff=poids[j] + } + retour[[i]] = ajustBam(data[[i]],coeff) + } + else { + retour[[i]] = data[[i]] + } + } + return(retour) +} + +##Returns the sum of two bamHandler objects +addBam = function(bamHandler1,bamHandler2) { + if(class(bamHandler1)=="try-error"|class(bamHandler2)=="try-error") { + if(class(bamHandler1)=="try-error") { + return(bamHandler2) + } + else { + return(bamHandler1) + } + } + else { + brin_F = list() + junctions_brin_F = list() + brin_R = list() + junctions_brin_R = list() + L = length(bamHandler1$F) + bamHandler = list() + if(bamHandler1$norm != bamHandler2$norm ){ + warning(expr="Two different bam files(normalized and non normalized)!!",immediate.=TRUE) + } + for(i in 1:L) { + brin_F[[i]] = list(c(bamHandler1$F[[i]][[1]],bamHandler2$F[[i]][[1]]),c(bamHandler1$F[[i]][[2]],bamHandler2$F[[i]][[2]]),c(bamHandler1$F[[i]][[3]],bamHandler2$F[[i]][[3]])) + junctions_brin_F[[i]] = list(c(bamHandler1$junctions_F[[i]][[1]],bamHandler2$junctions_F[[i]][[1]]),c(bamHandler1$junctions_F[[i]][[2]],bamHandler2$junctions_F[[i]][[2]]),c(bamHandler1$junctions_F[[i]][[3]],bamHandler2$junctions_F[[i]][[3]])) + brin_R[[i]] = list(c(bamHandler1$R[[i]][[1]],bamHandler2$R[[i]][[1]]),c(bamHandler1$R[[i]][[2]],bamHandler2$R[[i]][[2]]),c(bamHandler1$R[[i]][[3]],bamHandler2$R[[i]][[3]])) + junctions_brin_R[[i]] = list(c(bamHandler1$junctions_R[[i]][[1]],bamHandler2$junctions_R[[i]][[1]]),c(bamHandler1$junctions_R[[i]][[2]],bamHandler2$junctions_R[[i]][[2]]),c(bamHandler1$junctions_R[[i]][[3]],bamHandler2$junctions_R[[i]][[3]])) + if(bamHandler1$norm & bamHandler2$norm){ + brin_F[[i]][[4]] = c(bamHandler1$F[[i]][[4]],bamHandler2$F[[i]][[4]]) + junctions_brin_F[[i]][[4]] = c(bamHandler1$junctions_F[[i]][[4]],bamHandler2$junctions_F[[i]][[4]]) + brin_R[[i]][[4]] = c(bamHandler1$R[[i]][[4]],bamHandler2$R[[i]][[4]]) + junctions_brin_R[[i]][[4]] = c(bamHandler1$junctions_R[[i]][[4]],bamHandler2$junctions_R[[i]][[4]]) + } + } + + names(brin_F) = names(bamHandler1$F) + names(brin_R) = names(bamHandler2$R) + names(junctions_brin_F) = names(bamHandler1$junctions_F) + names(junctions_brin_R) = names(bamHandler2$junctions_R) + bamHandler[[1]] = brin_F + bamHandler[[2]] = brin_R + bamHandler[[3]] = bamHandler1[[3]] + bamHandler[[4]] = bamHandler1[[4]] + bamHandler2[[4]] + bamHandler[[5]] = bamHandler1[[5]] & bamHandler2[[5]] + bamHandler[[6]] = junctions_brin_F + bamHandler[[7]] = junctions_brin_R + bamHandler[[8]] = bamHandler1$norm & bamHandler2$norm + names(bamHandler) = c("F","R","chrLength","flagstat","stranded","junctions_F","junctions_R","norm") + return(bamHandler) + } +} + +##Returns the sum of a list of bamHandler objects +sumBam = function(...) { + args = list(...) + retour = args[[1]] + if(length(args)>1) { + for(i in 2:length(args)) { + retour = addBam(retour,args[[i]]) + } + } + return(retour) +} + +##Returns the mean of bamHandler objects +meanBam = function(...) { + args = list(...) + bamsReduced=normalizeBams(args,rep(1,length(args))/length(args),relative=FALSE) + retour = bamsReduced[[1]] + if(length(bamsReduced)>1) { + for(i in 2:length(bamsReduced)) { + retour = addBam(retour,bamsReduced[[i]]) + } + } + return(retour) +} + ##Extracts the signal from bamHandler objects extractSignal = function(bamHandlerList,chrName,from=1, to=NULL,normalized_=FALSE) { forward=list() @@ -560,23 +656,24 @@ } if(normalized_) { end=4 - }else{ + } + else { end=3 } for(i in 1:length(bamHandlerList)){ - forward_=numeric(to-from+1) - which_read = which((bamHandlerList[[i]]$F[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$F[[chrName]][[1]]<=to)) - n_reads=length(which_read) - if(n_reads>0) { + forward_=numeric(to-from+1) + which_read = which((bamHandlerList[[i]]$F[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$F[[chrName]][[1]]<=to)) + n_reads=length(which_read) + if(n_reads>0) { for(k in which_read) { debut_read = max(1,bamHandlerList[[i]]$F[[chrName]][[1]][k]-from+1) fin_read = min(bamHandlerList[[i]]$F[[chrName]][[2]][k]-from+1,to-from+1) forward_[debut_read:fin_read]=forward_[debut_read:fin_read]+bamHandlerList[[i]]$F[[chrName]][[end]][k] } } - which_junctions = which((bamHandlerList[[i]]$junctions_F[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$junctions_F[[chrName]][[1]]<=to)) - n_junctions=length(which_junctions) - if(n_junctions>0) { + which_junctions = which((bamHandlerList[[i]]$junctions_F[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$junctions_F[[chrName]][[1]]<=to)) + n_junctions=length(which_junctions) + if(n_junctions>0) { for(k in which_junctions) { debut_junction = max(1,bamHandlerList[[i]]$junctions_F[[chrName]][[1]][k]-from+1) fin_junction = min(bamHandlerList[[i]]$junctions_F[[chrName]][[2]][k]-from+1,to-from+1) @@ -587,17 +684,17 @@ reverse_=numeric(to-from+1) which_read = which((bamHandlerList[[i]]$R[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$R[[chrName]][[1]]<=to)) n_reads= length(which_read) - if(n_reads>0) { - for(k in which_read) { - debut_read = max(1,bamHandlerList[[i]]$R[[chrName]][[1]][k]-from+1) - fin_read = min(bamHandlerList[[i]]$R[[chrName]][[2]][k]-from+1,to-from+1) - reverse_[debut_read:fin_read]=reverse_[debut_read:fin_read]+bamHandlerList[[i]]$R[[chrName]][[end]][k] + if(n_reads>0) { + for(k in which_read) { + debut_read = max(1,bamHandlerList[[i]]$R[[chrName]][[1]][k]-from+1) + fin_read = min(bamHandlerList[[i]]$R[[chrName]][[2]][k]-from+1,to-from+1) + reverse_[debut_read:fin_read]=reverse_[debut_read:fin_read]+bamHandlerList[[i]]$R[[chrName]][[end]][k] } } - which_junctions = which((bamHandlerList[[i]]$junctions_R[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$junctions_R[[chrName]][[1]]<=to)) - n_junctions=length(which_junctions) - if(n_junctions>0) { - for(k in which_junctions) { + which_junctions = which((bamHandlerList[[i]]$junctions_R[[chrName]][[2]]>=from)&(bamHandlerList[[i]]$junctions_R[[chrName]][[1]]<=to)) + n_junctions=length(which_junctions) + if(n_junctions>0) { + for(k in which_junctions) { debut_junction = max(1,bamHandlerList[[i]]$junctions_R[[chrName]][[1]][k]-from+1) fin_junction = min(bamHandlerList[[i]]$junctions_R[[chrName]][[2]][k]-from+1,to-from+1) reverse_[debut_junction:fin_junction]=reverse_[debut_junction:fin_junction]-bamHandlerList[[i]]$junctions_R[[chrName]][[end]][k] @@ -613,13 +710,18 @@ forward[[i]]=forward_+reverse_ reverse[[i]]=numeric(to-from+1) } - } + } chr_ = list() chr_$F = forward chr_$R = reverse return(chr_) } +##Intern function for +totalReads = function(bamHandler) { + return(bamHandler[[4]][1]+bamHandler[[4]][11]) +} + ##Intern function for readGff function my.read.lines2=function(fname) { @@ -805,10 +907,16 @@ ##The main function of visualisation plotVisu = function(file,typeVisu="classic",listForward,listReverse,which=1:length(listForward),stranded=TRUE, debut_vue=1,fin_vue=length(listForward[[1]]),chr=NULL,annot=NULL,style=NULL,tops=NULL,bottoms=NULL,marks=NULL,strandMarks=NULL, -titres="",repeated=FALSE,name_flags="",decal=0,log=TRUE,classic_plus_color="navyblue",classic_minus_color="mediumvioletred", -heatmap_max_color="#FFFFAA",heatmap_min_color="#000055",heatmap_palette_method="hsv", -lines_samples_colors=c(1,3,4,2)[((0:length(listForward))%%4)+1],lines_samples_type_line=((0:length(listForward))%%4)+1, +titres="",repeated=FALSE,name_flags="",decal=0,log=TRUE,classic_plus_color="navyblue",classic_minus_color="deeppink3", +heatmap_max_color="#000055",heatmap_min_color="#FFFFAA",heatmap_palette_method="hsv", +lines_samples_colors=c(1,3,4,2)[((0:length(listForward))%%4)+1],lines_samples_type_line=((0:length(listForward))%/%4)+1, smoothLength=trunc((fin_vue-debut_vue)/1200),annotation_color_by_strand=FALSE,annotation_placed_by_strand=FALSE) { + if(fin_vue-debut_vue+1>1000000) { + png(file,1200,400) + plot(0:1,0:1,fg="white",col="white",axes=FALSE,frame=FALSE,xlab="",ylab="") + text(0.5,0.5,"Window too long !",cex=5) + } + else { n_element_vue = length(which) i_data = which[1] forward_matrice=NULL @@ -817,16 +925,16 @@ if(!repeated) { for(i in 1:n_element_vue) { i_data = which[i] - forward_matrice = rbind(forward_matrice,forward[[i_data]][debut_vue:fin_vue]) - reverse_matrice = rbind(reverse_matrice,reverse[[i_data]][debut_vue:fin_vue]) + forward_matrice = rbind(forward_matrice,listForward[[i_data]][debut_vue:fin_vue]) + reverse_matrice = rbind(reverse_matrice,listReverse[[i_data]][debut_vue:fin_vue]) all_stranded = all_stranded & data[[i_data]]$stranded } } else{ for(i in 1:n_element_vue) { i_data = which[i] - forward_matrice = rbind(forward_matrice,forward[[i_data]]) - reverse_matrice = rbind(reverse_matrice,reverse[[i_data]]) + forward_matrice = rbind(forward_matrice,listForward[[i_data]]) + reverse_matrice = rbind(reverse_matrice,listReverse[[i_data]]) all_stranded = all_stranded & data[[i_data]]$stranded } } @@ -842,9 +950,11 @@ } if(is.null(annot)) { heights_ann=NULL + annot_selec=NULL } else { - heights_ann=sizePlotAnnotation(annot=annot,chr=chr,debut=debut_vue,fin=fin_vue,style=style,annotation_placed_by_strand=annotation_placed_by_strand)*50 + annot_selec=annot[(annot$Chr==chr)&(annot$Stop>=debut_vue)&(annot$Start<=fin_vue),] + heights_ann=sizePlotAnnotation(annot=annot_selec,chr=chr,debut=debut_vue,fin=fin_vue,style=style,annotation_placed_by_strand=annotation_placed_by_strand)*50 } ataxises=pretty((debut_vue:fin_vue)+decal,n=14) if(log){ @@ -866,47 +976,47 @@ par(mar=c(2.5, 5, 2.5, 4)+0.1) for(element in 1:n_element_vue) { i_data = which[element] - plotRNAseq(listForward[[i_data]],listReverse[[i_data]],debut_vue,fin_vue,chr,annot,style,top=tops[min(element,length(tops))],bottom=bottoms[min(element,length(bottoms))],y=label_scal,titre=titres[min(i_data,length(titres))],name_flags=name_flags,repeated=repeated,decal=decal,ataxises=ataxises,classic_plus_color=classic_plus_color,classic_minus_color=classic_minus_color,stranded=stranded) + plotRNAseq(listForward[[i_data]],listReverse[[i_data]],debut_vue,fin_vue,chr,annot_selec,style,top=tops[min(element,length(tops))],bottom=bottoms[min(element,length(bottoms))],y=label_scal,titre=titres[min(i_data,length(titres))],name_flags=name_flags,repeated=repeated,decal=decal,ataxises=ataxises,classic_plus_color=classic_plus_color,classic_minus_color=classic_minus_color,stranded=stranded) if(!is.null(marks)) { if(is.null(tops)) { - top=max(forward[[i_data]]) + top=max(listForward[[i_data]]) } else { top=tops[min(element,length(tops))] } - if(is.null(bottoms)) { - bottom=max(reverse[[i_data]]) - } - else { - bottom=bottoms[min(element,length(bottoms))] - } - if(is.null(strandMarks)) { - segments(marks,top,y1=-bottom,col=2) - } - else { - if(strandMarks==0|!data[[i_data]]$stranded) { - segments(marks,0,y1=top,col=2) + if(is.null(bottoms)) { + bottom=max(listReverse[[i_data]]) + } + else { + bottom=bottoms[min(element,length(bottoms))] + } + if(is.null(strandMarks)) { + segments(marks,top,y1=-bottom,col=2) } else { - if(strandMarks==1) { - segments(marks,0,y1=-bottom,col=2) + if(strandMarks==0|!stranded) { + segments(marks,0,y1=top,col=2) } else { - segments(marks,top,y1=-bottom,col=2) + if(strandMarks==1) { + segments(marks,0,y1=-bottom,col=2) + } + else { + segments(marks,top,y1=-bottom,col=2) + } } } } - } } par(mar=c(0,5,1,4)+0.1) - if(!is.null(annot)) { - plot_annotation(annot,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=1.5,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) + if(!is.null(annot_selec)) { + plot_annotation(annot_selec,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=1.5,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) } } else {##Heatmap visualisation if(typeVisu=="heatmap") { if(stranded) { - height_panels=c(40,50*n_element_vue+28,heights_ann,50*n_element_vue+30,100) + height_panels=c(40,50*n_element_vue+28,heights_ann,50*n_element_vue+10,100) } else { height_panels=c(40,50*n_element_vue+28,heights_ann,100) @@ -918,6 +1028,9 @@ par(mar=c(0, 8, 0, 2)+0.1) plotTitle(debut_vue=debut_vue,fin_vue=fin_vue,chr=chr) limIntensity = c(0,max(forward_matrice,reverse_matrice,na.rm=TRUE)) + if(limIntensity[2]==0) { + limIntensity[2]=0.01 + } ataxisesLabels=as.character(ataxises) ataxisesLabels[((1:length(ataxises))%%2)==0]="" rownames(forward_matrice)=titres[which] @@ -950,9 +1063,9 @@ points(f_$Start,rep(0,N),col=2,pch=19,cex=2) } } - if(!is.null(annot)) { + if(!is.null(annot_selec)) { par(mar=c(0,8,0,2)+0.1) - plot_annotation(annot,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=0.9,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) + plot_annotation(annot_selec,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=0.9,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) } if(stranded) { par(mar=c(1, 8, 0, 2)+0.1) @@ -994,12 +1107,13 @@ par(mar=c(3, 5, 0,4)+0.1) plotlines(forward_matrice,reverse_matrice,which=which,debut_vue, fin_vue , chr, annot, style, tops, bottoms,marks,strandMarks,titres, repeated,name_flags,decal,ataxises=ataxises,n_element_vue=n_element_vue,y=label_scal,lines_samples_colors=lines_samples_colors,lines_samples_type_line=lines_samples_type_line,stranded=stranded) par(mar=c(0,5,0,4)+0.1) - if(!is.null(annot)) { - plot_annotation(annot,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=1.5,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) + if(!is.null(annot_selec)) { + plot_annotation(annot_selec,chr,debut=debut_vue,fin=fin_vue,style=style,textSize=1.5,annotation_color_by_strand=annotation_color_by_strand,annotation_placed_by_strand=annotation_placed_by_strand) } } } + } invisible(dev.off()) } @@ -1042,6 +1156,36 @@ text(fin_vue+(fin_vue-debut_vue)*0.01,-bottoms/2,"-",xpd=NA,cex=3) text(fin_vue+(fin_vue-debut_vue)*0.025,(tops-bottoms)/2,"Strand",xpd=NA,cex=2,srt=-90) } + if(!is.null(marks)) { + if(is.null(tops)) { + top=max(listForward[[i_data]]) + } + else { + top=max(tops) + } + if(is.null(bottoms)) { + bottom=max(listReverse[[i_data]]) + } + else { + bottom=max(bottoms) + } + if(is.null(strandMarks)) { + segments(marks,top,y1=-bottom,col=2) + } + else { + if(strandMarks==0|!stranded) { + segments(marks,0,y1=top,col=2) + } + else { + if(strandMarks==1) { + segments(marks,0,y1=-bottom,col=2) + } + else { + segments(marks,top,y1=-bottom,col=2) + } + } + } + } } ##Returns lines legend @@ -1067,20 +1211,21 @@ ##Returns the shape of plain arrow for the annotation plain_arrow = function(left,right,y,thickness=1,pickSize=(right-left)*0.1,pickSide=c("right","left","both","none"),col="blue") { + middle=(left+right)/2 if(pickSide[1]=="right") { - pick_point = min(right - pickSize,right) + pick_point = max(right - pickSize,middle) polygon(c(left,pick_point,right,pick_point,left),c(y-thickness/2,y-thickness/2,y,y+thickness/2,y+thickness/2),col=col) } if(pickSide[1]=="left") { - pick_point = max(left + pickSize,left) + pick_point = min(left + pickSize,middle) polygon(c(right,pick_point,left,pick_point,right),c(y-thickness/2,y-thickness/2,y,y+thickness/2,y+thickness/2),col=col) } if(pickSide[1]=="none") { polygon(c(right,left,left,right),c(y-thickness/2,y-thickness/2,y+thickness/2,y+thickness/2),col=col) } if(pickSide[1]=="both") { - pick_point_1 = max(left + pickSize,left) - pick_point_2 = min(right - pickSize,right) + pick_point_1 = min(left + pickSize,middle) + pick_point_2 = max(right - pickSize,middle) polygon(c(left,pick_point_1,pick_point_2,right,pick_point_2,pick_point_1),c(y,y-thickness/2,y-thickness/2,y,y+thickness/2,y+thickness/2),col=col) } } @@ -1232,34 +1377,6 @@ } } -# produit la visualisation de la légende -plotLegend= function(style,width_plot=400,length_arrows=100,type="horizontal") { - if(type=="horizontal") { - style_unique = unique(style$col) - n_style = length(style_unique) - plot(c(0,width_plot),c(1,2*n_style+1),col="white",ylab="",xlab="",xaxs="i",fg="white",col.axis="white",main="Legend") - for(i in 1:n_style) { - arrows(ceiling((width_plot-length_arrows)/2),2*(n_style-i+1),ceiling((width_plot+length_arrows)/2),2*(n_style-i+1),col=style_unique[i],length=0.10) - text(ceiling(width_plot/2),2*(n_style-i+1)+0.5,paste(style$Type[style$col==style_unique[i]],collapse=", "),col=style_unique[i]) - } - } - else { - style_unique = unique(style$col) - n_style = length(style_unique) - length_arrows = width_plot/(n_style*1.25) - inbetween = length_arrows/4 - plot(c(0,width_plot),c(0,1),col="white",ylab="",xlab="",yaxs="i",xaxt="n",yaxt="n",bty="n",xaxs="i",col.axis="white",main="") - text(width_plot/2,0.8,"Legend",cex=1.4,font=2) - for(i in 1:n_style) { - fleche_start = (length_arrows+inbetween)*(i-1) + inbetween/2 - fleche_stop = (length_arrows+inbetween)*(i-1) + length_arrows + inbetween/2 -# arrows(fleche_start,0.2,fleche_stop,0.2,col=style_unique[i],length=0.10) - rect(fleche_start,0.2,fleche_stop,0.6,border=style_unique[i]) - text((fleche_start+fleche_stop)/2,0.4,paste(style$Type[style$col==style_unique[i]],collapse=", "),col=style_unique[i],cex=1.2) - } - } -} - html2rgb = function(codeHTML) { chiffres=c(0:9,"A","B","C","D","E","F") codeHTMLsplit=unlist(strsplit(codeHTML,split="")) @@ -1298,42 +1415,44 @@ ### { if(version$major != "3" & version$minor != "0.2") { - warning("You need 3.0.2 R Version to run the program") + write("Ving has been developped on R 3.0.2, +therefore it might misbehave here.",stderr()) } tmp = unlist(strsplit(commandArgs()[4],split="="))[2] path_split = unlist(strsplit(tmp,split="/")) path = paste(path_split[1:(length(path_split)-1)],collapse="/") arg<-commandArgs(TRUE) if(length(arg)==0) { - cat("Ving for visualisation of RNA seq data: + write("Ving for visualisation of RNA seq data: Usage: ./visualiseur.R [options] <input> [<input2> <input3> ...] options: -o/--output <string> [default:./ouput.png] -v/--typeVisu <string> [default: classic ] (classic,lines,heatmap) - -t/--description-data <string> [default: filename ] + -t/--description-data <string> [default: <filename>] -n/--normalization-coefficient <string> [default: none ] -i/--inverseStrand -u/--unstranded -c/--chromosome-name <string> [default: first one ] - -d/--start <integer> [default: 1 ] - -f/--end <integer> [default: chr length] + -S/--start <integer> [default: 1 ] + -E/--end <integer> [default: chr length] -a/--annotation <filename> - -r/--typeTranscript <string> [default: all ] + -r/--typeTranscript <string> [default: <all> ] -C/--annotation-colors <string> [default: fabulous ] -s/--annotation-shapes <string> [default: box ] (box,line,rectangle,arrow) -l/--scale-log -y/--symetric-scale - --classic-plus-color <string> [default: purple ] - --classic-minus-color <string> [default: pink ] - --heatmap-max-color <string> [default: green ] - --heatmap-min-color <string> [default: yellow ] + --classic-plus-color <string> [default: navyblue ] + --classic-minus-color <string> [default: deeppink3 ] + --heatmap-max-color <string> [default: 000055 ] + --heatmap-min-color <string> [default: FFFFAA ] --heatmap-palette-method <string> [default: hsv ] (hsv,rgb) --lines-samples-colors <string> [default: black ] --lines-samples-type-line <string> [default: plain ] --annotation-color-by-strand <logical> [default: FALSE ] --annotation-placed-by-strand <logical> [default: FALSE ] - \n\n") + -L/--smoothLength <integer> [default: NA ] + ",stdout()) q("no") }else { tmp=suppressPackageStartupMessages(require("Rsamtools")) @@ -1351,15 +1470,15 @@ c('typeVisu', 'v', 1, 'character', "classic"), c('description-data','t',1,'character',NA), c('chromosome-name', 'c', 1, 'character', NA), - c('start', 'd', 1, 'numeric',1), - c('end', 'f', 1, 'numeric',NA), + c('start', 'S', 1, 'numeric',1), + c('end', 'E', 1, 'numeric',NA), c('annotation','a',1,'character',NA), c('typeTranscript','r',1,'character',NA), c('annotation-colors','C',1,'character',NA), c('annotation-shapes','s','1','character',NA), c('normalization-coefficient','n','1','character',NA), c('classic-plus-color',1,1,'character',"navyblue"), - c('classic-minus-color',2,1,'character',"mediumvioletred"), + c('classic-minus-color',2,1,'character',"deeppink3"), c('heatmap-max-color',3,1,'character',"#000055"), c('heatmap-min-color',4,1,'character',"#FFFFAA"), c('heatmap-palette-method',7,1,'character',"hsv"), @@ -1370,7 +1489,8 @@ c('unstranded','u',0,'logical', FALSE), c('symetric-scale','y',0,'logical', FALSE), c('annotation-color-by-strand',8,0,'logical',FALSE), - c('annotation-placed-by-strand',9,0,'logical',FALSE) + c('annotation-placed-by-strand',9,0,'logical',FALSE), + c('smoothLength','L',1,'numeric',NA) ) ) } @@ -1381,7 +1501,7 @@ ##Case file doesn't exist for( i in 1:length(files)) { if(!file.exists(files[i])) { - stop(paste(files[i],"do not exists!","\n")) + stop(paste(files[i],"do not exist!","\n")) } } imagefile=optArgs$output @@ -1408,6 +1528,7 @@ symetric= optArgs$`symetric-scale` annotation_color_by_strand=optArgs$`annotation-color-by-strand` annotation_placed_by_strand=optArgs$`annotation-placed-by-strand` +smoothLength=optArgs$`smoothLength` ################### ## MAIN ################################################################################### @@ -1421,7 +1542,7 @@ ##Case no chromosome specified if(sum(is.na(chrName))) { chrName = noms_chromosomes[1] - cat("No chromosome specified, processing chromosome :",chrName,"\n") + write(paste("No chromosome specified, processing chromosome :",chrName,"\n",sep=""),stderr()) } @@ -1474,7 +1595,7 @@ if(sum(ctrl)>0) { for(i in which(ctrl)) { - cat("Problem with file :",files[i],"\n") + write(paste("Problem with file :",files[i],"\n",sep=""),stderr()) } stop("At least a file has problem") } @@ -1484,32 +1605,45 @@ gff=try(readGff(annotation[1],from=start,to=end,chr=chrName),TRUE) ctrl=class(gff)=="try-error" if(sum(ctrl)>0) { - stop(paste("Problem with Gff file :",annotation,"\n")) + stop(paste("Problem with Gff file :",annotation,"\n")) } if(length(annotation)>1) { for(i in 2:length(annotation)) { gff1=try(readGff(annotation[i],from=start,to=end,chr=chrName),TRUE) ctrl=class(gff1)=="try-error" if(sum(ctrl)>0) { - stop(paste("Problem with Gff file :",gff1,"\n")) + stop(paste("Problem with Gff file :",gff1,"\n")) } gff=rbind(gff,gff1) } } - } else { gff=NA } - if(sum(is.na(description_data))>0) { description_data = files } -##Case of different number of files and description + + ##Case of different number of files and description if(length(description_data)!=length(files)) { stop("Different number of files and description") } + +## pooling bam files (if necessary) + description_data_unique=unique(description_data) + data_pooled=list() + for(i_descri in description_data_unique) { + i_data_voulu=which(description_data==i_descri) + data_pooled_=data[[i_data_voulu[1]]] + if(length(i_data_voulu)>1) { + for(i_i_data_voulu in 2:length(i_data_voulu)) { + data_pooled_=addBam(data_pooled_,data[[i_data_voulu[i_i_data_voulu]]]) + } + } + data_pooled[[i_descri]]=data_pooled_ + } if(sum(is.na(typeTranscript)>0)) { if(sum(is.na(annotation))==0) { @@ -1592,7 +1726,7 @@ ctrl=sapply(colorToCheck,is.acceptable.color) if(sum(!ctrl)>0) { for(i in which(!ctrl)) { - cat("\"",colorToCheck[i],"\" is not a proper color name.\n",sep="") + write(paste("\"",colorToCheck[i],"\" is not a proper color name.\n",sep=""),stderr()) } stop("At least one color has problem") } @@ -1625,7 +1759,7 @@ ) if(sum(!ctrl)>0) { for(i in which(!ctrl)) { - cat("\"",lines_samples_type_line_split[i],"\" is not a proper line style.\n",sep="") + write(paste("\"",lines_samples_type_line_split[i],"\" is not a proper line style.\n",sep=""),stderr()) } stop("At least one line style has problem") } @@ -1668,7 +1802,7 @@ } ##Main for visualisation -databychr=extractSignal(data,chrName,from=start,to=end,normalized_=isnormalized) +databychr=extractSignal(data_pooled,chrName,from=start,to=end,normalized_=isnormalized) { reverse=list() @@ -1691,7 +1825,12 @@ } } -smoothLength=max(trunc((end-start)/1200),1) +if(sum(is.na(smoothLength))>0) { + smoothLength=max(trunc((end-start)/1200),1) +} +else { + smoothLength=smoothLength[1] +} if(unstranded) { for(i in 1:length(databychr$F)){ lo=smooth(forward[[i]]+reverse[[i]],L=smoothLength) @@ -1730,7 +1869,7 @@ plotVisu(imagefile,typeVisu=typeVisu,listForward=forward,listReverse=reverse, debut_vue=start,fin_vue=end,chr=chrName,annot=annot,repeated=TRUE, -titres=description_data,name_flags="",style=style,log=log,stranded=!unstranded, +titres=description_data_unique,name_flags="",style=style,log=log,stranded=!unstranded, tops=max_forward,bottoms=max_reverse, classic_plus_color=classic_plus_color,classic_minus_color=classic_minus_color, heatmap_max_color=heatmap_max_color,heatmap_min_color=heatmap_min_color,heatmap_palette_method=heatmap_palette_method,