comparison lib.r @ 25:230f0bc9e792 draft

planemo upload commit a634879c0e651eb0eb7b435a107ee40cf30524fa
author lecorguille
date Fri, 10 Feb 2017 11:11:27 -0500
parents 2a4a811c663d
children f769635d55f9
comparison
equal deleted inserted replaced
24:10176a940ec6 25:230f0bc9e792
1 # lib.r version="2.2.1" 1 # lib.r version="2.2.1"
2 2
3 #@author G. Le Corguille
3 #The function create a pdf from the different png generated by diffreport 4 #The function create a pdf from the different png generated by diffreport
4 diffreport_png2pdf <- function(filebase, new_file_path) { 5 diffreport_png2pdf <- function(filebase, new_file_path) {
5 6
6 pdfEicOutput = paste(new_file_path,filebase,"-eic_visible_pdf",sep="") 7 pdfEicOutput = paste(new_file_path,filebase,"-eic_visible_pdf",sep="")
7 pdfBoxOutput = paste(new_file_path,filebase,"-box_visible_pdf",sep="") 8 pdfBoxOutput = paste(new_file_path,filebase,"-box_visible_pdf",sep="")
8 9
9 system(paste("gm convert ",filebase,"_eic/*.png ",filebase,"_eic.pdf",sep="")) 10 system(paste("gm convert ",filebase,"_eic/*.png ",filebase,"_eic.pdf",sep=""))
10 system(paste("gm convert ",filebase,"_box/*.png ",filebase,"_box.pdf",sep="")) 11 system(paste("gm convert ",filebase,"_box/*.png ",filebase,"_box.pdf",sep=""))
11 12
12 file.copy(paste(filebase,"_eic.pdf",sep=""), pdfEicOutput) 13 file.copy(paste(filebase,"_eic.pdf",sep=""), pdfEicOutput)
13 file.copy(paste(filebase,"_box.pdf",sep=""), pdfBoxOutput) 14 file.copy(paste(filebase,"_box.pdf",sep=""), pdfBoxOutput)
15 }
16
17 #@author G. Le Corguille
18 #This function convert if it is required the Retention Time in minutes
19 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
20 if (convertRTMinute){
21 #converting the retention times (seconds) into minutes
22 print("converting the retention times into minutes in the variableMetadata")
23 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
24 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
25 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
26 }
27 return (variableMetadata)
28 }
29
30 #@author G. Le Corguille
31 #This function format ions identifiers
32 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
33 splitDeco = strsplit(as.character(variableMetadata$name),"_")
34 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
35 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
36 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
37 return(variableMetadata)
14 } 38 }
15 39
16 #The function annotateDiffreport without the corr function which bugs 40 #The function annotateDiffreport without the corr function which bugs
17 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv", dataMatrixOutput="dataMatrix.tsv",new_file_path=NULL) { 41 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv", dataMatrixOutput="dataMatrix.tsv",new_file_path=NULL) {
18 # Resolve the bug with x11, with the function png 42 # Resolve the bug with x11, with the function png
19 options(bitmapType='cairo') 43 options(bitmapType='cairo')
20 44
21 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. 45 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
22 res=try(is.null(xset@filled)) 46 res=try(is.null(xset@filled))
23 47
24 # ------ annot ------- 48 # ------ annot -------
25 listArguments[["calcCiS"]]=as.logical(listArguments[["calcCiS"]]) 49 listArguments[["calcCiS"]]=as.logical(listArguments[["calcCiS"]])
26 listArguments[["calcIso"]]=as.logical(listArguments[["calcIso"]]) 50 listArguments[["calcIso"]]=as.logical(listArguments[["calcIso"]])
27 listArguments[["calcCaS"]]=as.logical(listArguments[["calcCaS"]]) 51 listArguments[["calcCaS"]]=as.logical(listArguments[["calcCaS"]])
28 52
29 #graphMethod parameter bugs where this parameter is not defined in quick=true 53 # common parameters
30 if(listArguments[["quick"]]==TRUE) { 54 listArguments4annotate = list(object=xset,
31 xa= annotate(object=xset,nSlaves=1,sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]]) 55 nSlaves=listArguments[["nSlaves"]],sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],
32 } 56 maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],
33 else { 57 ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],
34 xa= annotate(object=xset,nSlaves=1,sigma=listArguments[["sigma"]],perfwhm=listArguments[["perfwhm"]],graphMethod=listArguments[["graphMethod"]],cor_eic_th=listArguments[["cor_eic_th"]],pval=listArguments[["pval"]],calcCiS=listArguments[["calcCiS"]],calcIso=listArguments[["calcIso"]],calcCaS=listArguments[["calcCaS"]],multiplier=listArguments[["multiplier"]],maxcharge=listArguments[["maxcharge"]],maxiso=listArguments[["maxiso"]],minfrac=listArguments[["minfrac"]],ppm=listArguments[["ppm"]],mzabs=listArguments[["mzabs"]],quick=listArguments[["quick"]],polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]]) 58 polarity=listArguments[["polarity"]],max_peaks=listArguments[["max_peaks"]],intval=listArguments[["intval"]])
35 59
36 } 60 # quick == FALSE
37 peakList=getPeaklist(xa,intval=listArguments[["intval"]]) 61 if(listArguments[["quick"]]==FALSE) {
38 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); 62 listArguments4annotate = append(listArguments4annotate,
39 63 list(graphMethod=listArguments[["graphMethod"]],cor_eic_th=listArguments[["cor_eic_th"]],pval=listArguments[["pval"]],
40 64 calcCiS=listArguments[["calcCiS"]],calcIso=listArguments[["calcIso"]],calcCaS=listArguments[["calcCaS"]]))
41 # --- Multi condition : diffreport --- 65 # no ruleset
42 diffrep=NULL 66 if (!is.null(listArguments[["multiplier"]])) {
43 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) { 67 listArguments4annotate = append(listArguments4annotate,
68 list(multiplier=listArguments[["multiplier"]]))
69 }
70 # ruleset
71 else {
72 rulset=read.table(listArguments[["rules"]], h=T, sep=";")
73 if (ncol(rulset) < 4) rulset=read.table(listArguments[["rules"]], h=T, sep="\t")
74 if (ncol(rulset) < 4) rulset=read.table(listArguments[["rules"]], h=T, sep=",")
75 if (ncol(rulset) < 4) {
76 error_message="Your ruleset file seems not well formatted. The column separators accepted are ; , and tabulation"
77 print(error_message)
78 stop(error_message)
79 }
80
81 listArguments4annotate = append(listArguments4annotate,
82 list(rules=rulset))
83 }
84 }
85
86
87 # launch annotate
88 xa = do.call("annotate", listArguments4annotate)
89 peakList=getPeaklist(xa,intval=listArguments[["intval"]])
90 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
91
92 # --- dataMatrix ---
93 dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c("name", make.names(sampnames(xa@xcmsSet))))]
94 write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput)
95
96
97 # --- Multi condition : diffreport ---
98 diffrep=NULL
99 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) {
44 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. 100 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
45 res=try(is.null(xset@filled)) 101 res=try(is.null(xset@filled))
46 classes=levels(sampclass(xset)) 102 classes=levels(sampclass(xset))
47 x=1:(length(classes)-1) 103 x=1:(length(classes)-1)
48 for (i in seq(along=x) ) { 104 for (i in seq(along=x) ) {
49 y=1:(length(classes)) 105 y=1:(length(classes))
50 for (n in seq(along=y)){ 106 for (n in seq(along=y)){
51 if(i+n <= length(classes)){ 107 if(i+n <= length(classes)){
52 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-") 108 filebase=paste(classes[i],class2=classes[i+n],sep="-vs-")
53 109
54 diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]]) 110 diffrep=diffreport(object=xset,class1=classes[i],class2=classes[i+n],filebase=filebase,eicmax=listArguments[["eicmax"]],eicwidth=listArguments[["eicwidth"]],sortpval=TRUE,value=listArguments[["value"]],h=listArguments[["h"]],w=listArguments[["w"]],mzdec=listArguments[["mzdec"]])
55 #combines results 111
56 diffreportTSV=merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F) 112 diffrepOri = diffrep
57 diffreportTSV=cbind(diffreportTSV[,!(colnames(diffreportTSV) %in% c(sampnames(xa@xcmsSet)))],diffreportTSV[,(colnames(diffreportTSV) %in% c(sampnames(xa@xcmsSet)))]) 113
58 114 # renamming of the column rtmed to rt to fit with camera peaklist function output
59 if(listArguments[["sortpval"]]){ 115 colnames(diffrep)[colnames(diffrep)=="rtmed"] <- "rt"
60 diffreportTSV=diffreportTSV[order(diffreportTSV$pvalue), ] 116 colnames(diffrep)[colnames(diffrep)=="mzmed"] <- "mz"
61 } 117
62 118 # combines results and reorder columns
63 if (listArguments[["convert_param"]]){ 119 diffrep = merge(peakList, diffrep[,c("name","fold","tstat","pvalue")], by.x="name", by.y="name", sort=F)
64 #converting the retention times (seconds) into minutes 120 diffrep = cbind(diffrep[,!(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))],diffrep[,(colnames(diffrep) %in% c(sampnames(xa@xcmsSet)))])
65 diffreportTSV$rt=diffreportTSV$rt/60;diffreportTSV$rtmin=diffreportTSV$rtmin/60; diffreportTSV$rtmax=diffreportTSV$rtmax/60; 121
66 } 122 diffrep = RTSecondToMinute(diffrep, listArguments[["convertRTMinute"]])
67 write.table(diffreportTSV, sep="\t", quote=FALSE, row.names=FALSE, file=paste(new_file_path,filebase,"-tabular_visible_tabular",sep="")) 123 diffrep = formatIonIdentifiers(diffrep, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
68 124
69 if (listArguments[["eicmax"]] != 0) { 125 if(listArguments[["sortpval"]]){
70 diffreport_png2pdf(filebase, new_file_path) 126 diffrep=diffrep[order(diffrep$pvalue), ]
71 } 127 }
72 } 128
73 } 129 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste(new_file_path,filebase,"-tabular_visible_tabular",sep=""))
74 } 130
75 } 131 if (listArguments[["eicmax"]] != 0) {
76 132 diffreport_png2pdf(filebase, new_file_path)
77 133 }
78 134 }
79 135 }
80 # --- variableMetadata --- 136 }
81 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] 137 }
82 # if we have 2 conditions, we keep stat of diffrep 138
83 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))==2) { 139
84 variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F) 140 # --- variableMetadata ---
85 if(exists("listArguments[[\"sortpval\"]]")){ 141 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
86 variableMetadata=variableMetadata[order(variableMetadata$pvalue), ] 142 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]])
87 } 143 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=listArguments[["numDigitsRT"]], numDigitsMZ=listArguments[["numDigitsMZ"]])
88 } 144 # if we have 2 conditions, we keep stat of diffrep
89 145 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))==2) {
90 variableMetadataOri=variableMetadata 146 variableMetadata = merge(variableMetadata, diffrep[,c("name","fold","tstat","pvalue")],by.x="name", by.y="name", sort=F)
91 if (listArguments[["convert_param"]]){ 147 if(exists("listArguments[[\"sortpval\"]]")){
92 #converting the retention times (seconds) into minutes 148 variableMetadata=variableMetadata[order(variableMetadata$pvalue), ]
93 print("converting the retention times into minutes in the variableMetadata") 149 }
94 variableMetadata$rt=variableMetadata$rt/60;variableMetadata$rtmin=variableMetadata$rtmin/60; variableMetadata$rtmax=variableMetadata$rtmax/60; 150 }
95 } 151
96 #Transform metabolites name 152 variableMetadataOri=variableMetadata
97 variableMetadata$name= paste("M",round(variableMetadata$mz,digits=listArguments[["num_digits"]]),"T",round(variableMetadata$rt),sep="") 153 write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
98 write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput) 154
99 155 return(list("xa"=xa,"diffrep"=diffrepOri,"variableMetadata"=variableMetadataOri));
100 # --- dataMatrix --- 156
101 dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] 157 }
102 dataMatrix=cbind(peakList$name,dataMatrix); colnames(dataMatrix)[1] = c("name"); 158
103 159
104 if (listArguments[["convert_param"]]){ 160 combinexsAnnos_function <- function(xaP, xaN, listOFlistArgumentsP,listOFlistArgumentsN, diffrepP=NULL,diffrepN=NULL,pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, intval="into", convertRTMinute=F, numDigitsMZ=0, numDigitsRT=0, variableMetadataOutput="variableMetadata.tsv"){
105 #converting the retention times (seconds) into minutes 161
106 print("converting the retention times into minutes in the dataMatrix ids") 162 #Load the two Rdata to extract the xset objects from positive and negative mode
107 peakList$rt=peakList$rt/60 163 cat("\tObject xset from positive mode\n")
108 } 164 print(xaP)
109 dataMatrix$name= paste("M",round(peakList$mz,digits=listArguments[["num_digits"]]),"T",round(peakList$rt),sep="") 165 cat("\n")
110 write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput) 166
111 167 cat("\tObject xset from negative mode\n")
112 return(list("xa"=xa,"diffrep"=diffrep,"variableMetadata"=variableMetadataOri)); 168 print(xaN)
113 169 cat("\n")
114 } 170
115 171 cat("\n")
116 172 cat("\tCombining...\n")
117 combinexsAnnos_function <- function(xaP, xaN, listOFlistArgumentsP,listOFlistArgumentsN, diffrepP=NULL,diffrepN=NULL,convert_param=FALSE,pos=TRUE,tol=2,ruleset=NULL,keep_meta=TRUE, variableMetadataOutput="variableMetadata.tsv"){ 173 #Convert the string to numeric for creating matrix
118 174 row=as.numeric(strsplit(ruleset,",")[[1]][1])
119 #Load the two Rdata to extract the xset objects from positive and negative mode 175 column=as.numeric(strsplit(ruleset,",")[[1]][2])
120 cat("\tObject xset from positive mode\n") 176 ruleset=cbind(row,column)
121 print(xaP) 177 #Test if the file comes from an older version tool
122 cat("\n") 178 if ((!is.null(xaP)) & (!is.null(xaN))) {
123 179 #Launch the combinexsannos function from CAMERA
124 cat("\tObject xset from negative mode\n") 180 cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset)
125 print(xaN) 181 } else {
126 cat("\n") 182 stop("You must relauch the CAMERA.annotate step with the lastest version.")
127 183 }
128 cat("\n") 184
129 cat("\tCombining...\n") 185 if(pos){
130 #Convert the string to numeric for creating matrix 186 xa=xaP
131 row=as.numeric(strsplit(ruleset,",")[[1]][1]) 187 listOFlistArgumentsP=listOFlistArguments
132 column=as.numeric(strsplit(ruleset,",")[[1]][2]) 188 mode="neg. Mode"
133 ruleset=cbind(row,column) 189 } else {
134 #Test if the file comes from an older version tool 190 xa=xaN
135 if ((!is.null(xaP)) & (!is.null(xaN))) { 191 listOFlistArgumentsN=listOFlistArguments
136 #Launch the combinexsannos function from CAMERA 192 mode="pos. Mode"
137 cAnnot=combinexsAnnos(xaP, xaN,pos=pos,tol=tol,ruleset=ruleset) 193 }
138 } else { 194
139 stop("You must relauch the CAMERA.annotate step with the lastest version.") 195 peakList=getPeaklist(xa,intval=intval)
140 } 196 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
141 197 variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]);
142 198 variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))]
143 199
144 if(pos){ 200 #Test if there are more than two classes (conditions)
145 xa=xaP 201 if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) {
146 listOFlistArgumentsP=listOFlistArguments 202 diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="")
147 mode="neg. Mode" 203 diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="")
148 } else { 204
149 xa=xaN 205 variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name")
150 listOFlistArgumentsN=listOFlistArguments 206 variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name")
151 mode="pos. Mode" 207 }
152 } 208
153 intval = "into"; for (steps in names(listOFlistArguments)) { if (!is.null(listOFlistArguments[[steps]]$intval)) intval = listOFlistArguments[[steps]]$intval } 209 rownames(variableMetadata) = NULL
154 peakList=getPeaklist(xa,intval=intval) 210 #TODO: checker
155 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); 211 #colnames(variableMetadata)[1:2] = c("name","mz/rt");
156 variableMetadata=cbind(peakList, cAnnot[, c("isotopes", "adduct", "pcgroup",mode)]); 212
157 variableMetadata=variableMetadata[,!(colnames(variableMetadata) %in% c(sampnames(xa@xcmsSet)))] 213 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute)
158 214 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
159 #Test if there are more than two classes (conditions) 215
160 if ( nlevels(sampclass(xaP@xcmsSet))==2 & (!is.null(diffrepN)) & (!is.null(diffrepP))) { 216 #If the user want to keep only the metabolites which match a difference
161 diffrepP = diffrepP[,c("name","fold","tstat","pvalue")]; colnames(diffrepP) = paste("P.",colnames(diffrepP),sep="") 217 if(keep_meta){
162 diffrepN = diffrepN[,c("name","fold","tstat","pvalue")]; colnames(diffrepN) = paste("N.",colnames(diffrepN),sep="") 218 variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",]
163 219 }
164 variableMetadata = merge(variableMetadata, diffrepP, by.x="name", by.y="P.name") 220
165 variableMetadata = merge(variableMetadata, diffrepN, by.x="name", by.y="N.name") 221 #Write the output into a tsv file
166 } 222 write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
167 223 return(variableMetadata);
168 rownames(variableMetadata) = NULL 224
169 #TODO: checker 225 }
170 #colnames(variableMetadata)[1:2] = c("name","mz/rt");
171
172 #If the user want to convert the retention times (seconds) into minutes.
173 if (listArguments[["convert_param"]]){
174 #converting the retention times (seconds) into minutes
175 cat("\tConverting the retention times into minutes\n")
176 variableMetadata$rtmed=cAnnot$rt/60; variableMetadata$rtmin=cAnnot$rtmin/60; variableMetadata$rtmax=cAnnot$rtmax/60;
177 }
178
179 #If the user want to keep only the metabolites which match a difference
180 if(keep_meta){
181 variableMetadata=variableMetadata[variableMetadata[,c(mode)]!="",]
182 }
183
184 #Write the output into a tsv file
185 write.table(variableMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=variableMetadataOutput)
186 return(variableMetadata);
187
188 }