comparison lib.r @ 15:c04568596f40 draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 08e7f269a5c59687a7768be8db5fcb4e4d736093
author lecorguille
date Mon, 30 Jan 2017 08:51:40 -0500
parents 55923d170538
children 20a75ba4345b
comparison
equal deleted inserted replaced
14:55923d170538 15:c04568596f40
1 # lib.r version="2.3"
2 #Authors ABiMS TEAM 1 #Authors ABiMS TEAM
3 #Lib.r for Galaxy Workflow4Metabo 2 #Lib.r for Galaxy Workflow4Metabolomics xcms tools
4 #version 2.3 3 #
5 #Based on lib.r 2.1 4 #version 2.4: lecorguille
6 #Modifications made by Guitton Yann 5 # add getPeaklistW4M
7 #2.3 Note 6 #version 2.3: yguitton
8 #correction for empty PDF when only 1 class 7 # correction for empty PDF when only 1 class
9 #2.2 Note 8 #version 2.2
10 #correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet 9 # correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
11 #Note if scanrange is used a warning is prompted in R console but do not stop PDF generation 10 # Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
12 11 #version 2.1: yguitton
13 12 # Modifications made by Guitton Yann
14 13
14
15 #@author G. Le Corguille
16 #This function convert if it is required the Retention Time in minutes
17 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
18 if (convertRTMinute){
19 #converting the retention times (seconds) into minutes
20 print("converting the retention times into minutes in the variableMetadata")
21 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60
22 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60
23 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60
24 }
25 return (variableMetadata)
26 }
27
28 #@author G. Le Corguille
29 #This function format ions identifiers
30 formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) {
31 return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT))))
32 }
33
34 #@author G. Le Corguille
35 # value: intensity values to be used into, maxo or intb
36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) {
37 groups <- xset@groups
38 values <- groupval(xset, "medret", value=intval)
39
40 # renamming of the column rtmed to rt to fit with camera peaklist function output
41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt"
42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz"
43
44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
45 groups = RTSecondToMinute(groups, convertRTMinute)
46
47 rownames(groups) = ids
48 rownames(values) = ids
49
50 #@TODO: add "name" as the first column name
51 #colnames(groups)[1] = "name"
52 #colnames(values)[1] = "name"
53
54 write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA)
55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA)
56 }
15 57
16 #@author Y. Guitton 58 #@author Y. Guitton
17 getBPC <- function(file,rtcor=NULL, ...) { 59 getBPC <- function(file,rtcor=NULL, ...) {
18 object <- xcmsRaw(file) 60 object <- xcmsRaw(file)
19 sel <- profRange(object, ...) 61 sel <- profRange(object, ...)
45 87
46 88
47 for (j in 1:N) { 89 for (j in 1:N) {
48 90
49 TIC[[j]] <- getBPC(files[j]) 91 TIC[[j]] <- getBPC(files[j])
50 #good for raw 92 #good for raw
51 # seems strange for corrected 93 # seems strange for corrected
52 #errors if scanrange used in xcmsSetgeneration 94 #errors if scanrange used in xcmsSetgeneration
53 if (!is.null(xcmsSet) && rt == "corrected") 95 if (!is.null(xcmsSet) && rt == "corrected")
54 rtcor <- xcmsSet@rt$corrected[[j]] else 96 rtcor <- xcmsSet@rt$corrected[[j]] else
55 rtcor <- NULL 97 rtcor <- NULL
56 98
57 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) 99 TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
58 # TIC[[j]][,1]<-rtcor 100 # TIC[[j]][,1]<-rtcor
59 } 101 }
60 102
61 103
69 ylim = range(sapply(TIC, function(x) range(x[,2]))) 111 ylim = range(sapply(TIC, function(x) range(x[,2])))
70 ylim = c(-ylim[2], ylim[2]) 112 ylim = c(-ylim[2], ylim[2])
71 113
72 114
73 ##plot start 115 ##plot start
74 116
75 if (length(class)>2){ 117 if (length(class)>2){
76 for (k in 1:(length(class)-1)){ 118 for (k in 1:(length(class)-1)){
77 for (l in (k+1):length(class)){ 119 for (l in (k+1):length(class)){
78 #print(paste(class[k],"vs",class[l],sep=" ")) 120 #print(paste(class[k],"vs",class[l],sep=" "))
79 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 121 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
80 colvect<-NULL 122 colvect<-NULL
81 for (j in 1:length(classnames[[k]])) { 123 for (j in 1:length(classnames[[k]])) {
82 tic <- TIC[[classnames[[k]][j]]] 124 tic <- TIC[[classnames[[k]][j]]]
83 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 125 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
115 colvect<-append(colvect,cols[classnames[[l]][j]]) 157 colvect<-append(colvect,cols[classnames[[l]][j]])
116 } 158 }
117 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 159 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
118 160
119 }#end length ==2 161 }#end length ==2
120 162
121 #case where only one class 163 #case where only one class
122 if (length(class)==1){ 164 if (length(class)==1){
123 k=1 165 k=1
124 ylim = range(sapply(TIC, function(x) range(x[,2]))) 166 ylim = range(sapply(TIC, function(x) range(x[,2])))
125 colvect<-NULL 167 colvect<-NULL
129 tic <- TIC[[classnames[[k]][j]]] 171 tic <- TIC[[classnames[[k]][j]]]
130 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 172 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
131 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 173 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
132 colvect<-append(colvect,cols[classnames[[k]][j]]) 174 colvect<-append(colvect,cols[classnames[[k]][j]])
133 } 175 }
134 176
135 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) 177 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
136 178
137 }#end length ==1 179 }#end length ==1
138 180
139 dev.off() #pdf(pdfname,w=16,h=10) 181 dev.off() #pdf(pdfname,w=16,h=10)
140 182
141 invisible(TIC) 183 invisible(TIC)
142 } 184 }
143 185
172 214
173 classnames<-vector("list",length(class)) 215 classnames<-vector("list",length(class))
174 for (i in 1:length(class)){ 216 for (i in 1:length(class)){
175 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) 217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i])
176 } 218 }
177 219
178 N <- length(files) 220 N <- length(files)
179 TIC <- vector("list",N) 221 TIC <- vector("list",N)
180 222
181 for (i in 1:N) { 223 for (i in 1:N) {
182 if (!is.null(xcmsSet) && rt == "corrected") 224 if (!is.null(xcmsSet) && rt == "corrected")
197 239
198 ##plot start 240 ##plot start
199 if (length(class)>2){ 241 if (length(class)>2){
200 for (k in 1:(length(class)-1)){ 242 for (k in 1:(length(class)-1)){
201 for (l in (k+1):length(class)){ 243 for (l in (k+1):length(class)){
202 #print(paste(class[k],"vs",class[l],sep=" ")) 244 #print(paste(class[k],"vs",class[l],sep=" "))
203 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") 245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
204 colvect<-NULL 246 colvect<-NULL
205 for (j in 1:length(classnames[[k]])) { 247 for (j in 1:length(classnames[[k]])) {
206 248
207 tic <- TIC[[classnames[[k]][j]]] 249 tic <- TIC[[classnames[[k]][j]]]
238 colvect<-append(colvect,cols[classnames[[l]][j]]) 280 colvect<-append(colvect,cols[classnames[[l]][j]])
239 } 281 }
240 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 282 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
241 283
242 }#end length ==2 284 }#end length ==2
243 285
244 #case where only one class 286 #case where only one class
245 if (length(class)==1){ 287 if (length(class)==1){
246 k=1 288 k=1
247 ylim = range(sapply(TIC, function(x) range(x[,2]))) 289 ylim = range(sapply(TIC, function(x) range(x[,2])))
248 290
249 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") 291 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
250 colvect<-NULL 292 colvect<-NULL
251 for (j in 1:length(classnames[[k]])) { 293 for (j in 1:length(classnames[[k]])) {
252 tic <- TIC[[classnames[[k]][j]]] 294 tic <- TIC[[classnames[[k]][j]]]
253 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 295 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
254 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 296 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
255 colvect<-append(colvect,cols[classnames[[k]][j]]) 297 colvect<-append(colvect,cols[classnames[[k]][j]])
256 } 298 }
257 299
258 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) 300 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
259 301
260 }#end length ==1 302 }#end length ==1
261 303
262 dev.off() #pdf(pdfname,w=16,h=10) 304 dev.off() #pdf(pdfname,w=16,h=10)
263 305
264 invisible(TIC) 306 invisible(TIC)
265 } 307 }
266 308
275 317
276 #Create the sampleMetada dataframe 318 #Create the sampleMetada dataframe
277 sampleMetadata=xset@phenoData 319 sampleMetadata=xset@phenoData
278 sampleNamesOrigin=rownames(sampleMetadata) 320 sampleNamesOrigin=rownames(sampleMetadata)
279 sampleNamesMakeNames=make.names(sampleNamesOrigin) 321 sampleNamesMakeNames=make.names(sampleNamesOrigin)
280 322
281 if (any(duplicated(sampleNamesMakeNames))) { 323 if (any(duplicated(sampleNamesMakeNames))) {
282 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 324 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
283 for (sampleName in sampleNamesOrigin) { 325 for (sampleName in sampleNamesOrigin) {
284 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 326 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
285 } 327 }
323 library(tools) 365 library(tools)
324 samplename=file_path_sans_ext(filename) 366 samplename=file_path_sans_ext(filename)
325 367
326 #Set the polarity attribute 368 #Set the polarity attribute
327 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity 369 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity
328 370
329 #Delete xcmsRaw object because it creates a bug for the fillpeaks step 371 #Delete xcmsRaw object because it creates a bug for the fillpeaks step
330 rm(xcmsRaw) 372 rm(xcmsRaw)
331 } 373 }
332 374
333 } 375 }
359 # WHAT IS ON THE FILESYSTEM 401 # WHAT IS ON THE FILESYSTEM
360 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) 402 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
361 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] 403 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
362 404
363 # COMPARISON 405 # COMPARISON
364 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { 406 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
365 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) 407 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
366 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) 408 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
367 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 409 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
368 410
369 } 411 }
385 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) 427 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
386 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) 428 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
387 write(capture, stderr()) 429 write(capture, stderr())
388 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") 430 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
389 } 431 }
390 432
391 } 433 }
392 434
393 435
394 ## 436 ##
395 ## This function check if XML contain special characters 437 ## This function check if XML contain special characters
397 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 439 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
398 deleteXmlBadCharacters<- function (directory) { 440 deleteXmlBadCharacters<- function (directory) {
399 cat("Checking Non ASCII characters in the XML...\n") 441 cat("Checking Non ASCII characters in the XML...\n")
400 442
401 processed=F 443 processed=F
402 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) 444 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
403 for (i in l){ 445 for (i in l){
404 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") 446 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="")
405 capture=suppressWarnings(system(cmd,intern=TRUE)) 447 capture=suppressWarnings(system(cmd,intern=TRUE))
406 if (length(capture)>0){ 448 if (length(capture)>0){
407 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) 449 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
408 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) 450 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
409 c=system(cmd,intern=TRUE) 451 c=system(cmd,intern=TRUE)
410 capture="" 452 capture=""
411 processed=T 453 processed=T
412 } 454 }
413 } 455 }
414 if (processed) cat("\n\n") 456 if (processed) cat("\n\n")
415 return(processed) 457 return(processed)
416 } 458 }
417 459
418 460
419 ## 461 ##
420 ## This function will compute MD5 checksum to check the data integrity 462 ## This function will compute MD5 checksum to check the data integrity
421 ## 463 ##
422 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 464 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
423 getMd5sum <- function (directory) { 465 getMd5sum <- function (directory) {
424 cat("Compute md5 checksum...\n") 466 cat("Compute md5 checksum...\n")
435 477
436 #cat("\n\n") 478 #cat("\n\n")
437 479
438 return(as.matrix(md5sum(files))) 480 return(as.matrix(md5sum(files)))
439 } 481 }
440