Mercurial > repos > lecorguille > xcms_retcor
comparison lib.r @ 15:c04568596f40 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 08e7f269a5c59687a7768be8db5fcb4e4d736093
author | lecorguille |
---|---|
date | Mon, 30 Jan 2017 08:51:40 -0500 |
parents | 55923d170538 |
children | 20a75ba4345b |
comparison
equal
deleted
inserted
replaced
14:55923d170538 | 15:c04568596f40 |
---|---|
1 # lib.r version="2.3" | |
2 #Authors ABiMS TEAM | 1 #Authors ABiMS TEAM |
3 #Lib.r for Galaxy Workflow4Metabo | 2 #Lib.r for Galaxy Workflow4Metabolomics xcms tools |
4 #version 2.3 | 3 # |
5 #Based on lib.r 2.1 | 4 #version 2.4: lecorguille |
6 #Modifications made by Guitton Yann | 5 # add getPeaklistW4M |
7 #2.3 Note | 6 #version 2.3: yguitton |
8 #correction for empty PDF when only 1 class | 7 # correction for empty PDF when only 1 class |
9 #2.2 Note | 8 #version 2.2 |
10 #correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet | 9 # correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet |
11 #Note if scanrange is used a warning is prompted in R console but do not stop PDF generation | 10 # Note if scanrange is used a warning is prompted in R console but do not stop PDF generation |
12 | 11 #version 2.1: yguitton |
13 | 12 # Modifications made by Guitton Yann |
14 | 13 |
14 | |
15 #@author G. Le Corguille | |
16 #This function convert if it is required the Retention Time in minutes | |
17 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | |
18 if (convertRTMinute){ | |
19 #converting the retention times (seconds) into minutes | |
20 print("converting the retention times into minutes in the variableMetadata") | |
21 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60 | |
22 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60 | |
23 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60 | |
24 } | |
25 return (variableMetadata) | |
26 } | |
27 | |
28 #@author G. Le Corguille | |
29 #This function format ions identifiers | |
30 formatIonIdentifiers <- function(dataData, numDigitsRT=0, numDigitsMZ=0) { | |
31 return(make.unique(paste0("M",round(dataData[,"mz"],numDigitsMZ),"T",round(dataData[,"rt"],numDigitsRT)))) | |
32 } | |
33 | |
34 #@author G. Le Corguille | |
35 # value: intensity values to be used into, maxo or intb | |
36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { | |
37 groups <- xset@groups | |
38 values <- groupval(xset, "medret", value=intval) | |
39 | |
40 # renamming of the column rtmed to rt to fit with camera peaklist function output | |
41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt" | |
42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz" | |
43 | |
44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | |
45 groups = RTSecondToMinute(groups, convertRTMinute) | |
46 | |
47 rownames(groups) = ids | |
48 rownames(values) = ids | |
49 | |
50 #@TODO: add "name" as the first column name | |
51 #colnames(groups)[1] = "name" | |
52 #colnames(values)[1] = "name" | |
53 | |
54 write.table(groups, file=variableMetadataOutput,sep="\t",quote=F,row.names = T,col.names = NA) | |
55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA) | |
56 } | |
15 | 57 |
16 #@author Y. Guitton | 58 #@author Y. Guitton |
17 getBPC <- function(file,rtcor=NULL, ...) { | 59 getBPC <- function(file,rtcor=NULL, ...) { |
18 object <- xcmsRaw(file) | 60 object <- xcmsRaw(file) |
19 sel <- profRange(object, ...) | 61 sel <- profRange(object, ...) |
45 | 87 |
46 | 88 |
47 for (j in 1:N) { | 89 for (j in 1:N) { |
48 | 90 |
49 TIC[[j]] <- getBPC(files[j]) | 91 TIC[[j]] <- getBPC(files[j]) |
50 #good for raw | 92 #good for raw |
51 # seems strange for corrected | 93 # seems strange for corrected |
52 #errors if scanrange used in xcmsSetgeneration | 94 #errors if scanrange used in xcmsSetgeneration |
53 if (!is.null(xcmsSet) && rt == "corrected") | 95 if (!is.null(xcmsSet) && rt == "corrected") |
54 rtcor <- xcmsSet@rt$corrected[[j]] else | 96 rtcor <- xcmsSet@rt$corrected[[j]] else |
55 rtcor <- NULL | 97 rtcor <- NULL |
56 | 98 |
57 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) | 99 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) |
58 # TIC[[j]][,1]<-rtcor | 100 # TIC[[j]][,1]<-rtcor |
59 } | 101 } |
60 | 102 |
61 | 103 |
69 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 111 ylim = range(sapply(TIC, function(x) range(x[,2]))) |
70 ylim = c(-ylim[2], ylim[2]) | 112 ylim = c(-ylim[2], ylim[2]) |
71 | 113 |
72 | 114 |
73 ##plot start | 115 ##plot start |
74 | 116 |
75 if (length(class)>2){ | 117 if (length(class)>2){ |
76 for (k in 1:(length(class)-1)){ | 118 for (k in 1:(length(class)-1)){ |
77 for (l in (k+1):length(class)){ | 119 for (l in (k+1):length(class)){ |
78 #print(paste(class[k],"vs",class[l],sep=" ")) | 120 #print(paste(class[k],"vs",class[l],sep=" ")) |
79 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 121 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") |
80 colvect<-NULL | 122 colvect<-NULL |
81 for (j in 1:length(classnames[[k]])) { | 123 for (j in 1:length(classnames[[k]])) { |
82 tic <- TIC[[classnames[[k]][j]]] | 124 tic <- TIC[[classnames[[k]][j]]] |
83 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 125 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") |
115 colvect<-append(colvect,cols[classnames[[l]][j]]) | 157 colvect<-append(colvect,cols[classnames[[l]][j]]) |
116 } | 158 } |
117 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 159 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) |
118 | 160 |
119 }#end length ==2 | 161 }#end length ==2 |
120 | 162 |
121 #case where only one class | 163 #case where only one class |
122 if (length(class)==1){ | 164 if (length(class)==1){ |
123 k=1 | 165 k=1 |
124 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 166 ylim = range(sapply(TIC, function(x) range(x[,2]))) |
125 colvect<-NULL | 167 colvect<-NULL |
129 tic <- TIC[[classnames[[k]][j]]] | 171 tic <- TIC[[classnames[[k]][j]]] |
130 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 172 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") |
131 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 173 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") |
132 colvect<-append(colvect,cols[classnames[[k]][j]]) | 174 colvect<-append(colvect,cols[classnames[[k]][j]]) |
133 } | 175 } |
134 | 176 |
135 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | 177 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) |
136 | 178 |
137 }#end length ==1 | 179 }#end length ==1 |
138 | 180 |
139 dev.off() #pdf(pdfname,w=16,h=10) | 181 dev.off() #pdf(pdfname,w=16,h=10) |
140 | 182 |
141 invisible(TIC) | 183 invisible(TIC) |
142 } | 184 } |
143 | 185 |
172 | 214 |
173 classnames<-vector("list",length(class)) | 215 classnames<-vector("list",length(class)) |
174 for (i in 1:length(class)){ | 216 for (i in 1:length(class)){ |
175 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) | 217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) |
176 } | 218 } |
177 | 219 |
178 N <- length(files) | 220 N <- length(files) |
179 TIC <- vector("list",N) | 221 TIC <- vector("list",N) |
180 | 222 |
181 for (i in 1:N) { | 223 for (i in 1:N) { |
182 if (!is.null(xcmsSet) && rt == "corrected") | 224 if (!is.null(xcmsSet) && rt == "corrected") |
197 | 239 |
198 ##plot start | 240 ##plot start |
199 if (length(class)>2){ | 241 if (length(class)>2){ |
200 for (k in 1:(length(class)-1)){ | 242 for (k in 1:(length(class)-1)){ |
201 for (l in (k+1):length(class)){ | 243 for (l in (k+1):length(class)){ |
202 #print(paste(class[k],"vs",class[l],sep=" ")) | 244 #print(paste(class[k],"vs",class[l],sep=" ")) |
203 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | 245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") |
204 colvect<-NULL | 246 colvect<-NULL |
205 for (j in 1:length(classnames[[k]])) { | 247 for (j in 1:length(classnames[[k]])) { |
206 | 248 |
207 tic <- TIC[[classnames[[k]][j]]] | 249 tic <- TIC[[classnames[[k]][j]]] |
238 colvect<-append(colvect,cols[classnames[[l]][j]]) | 280 colvect<-append(colvect,cols[classnames[[l]][j]]) |
239 } | 281 } |
240 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 282 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) |
241 | 283 |
242 }#end length ==2 | 284 }#end length ==2 |
243 | 285 |
244 #case where only one class | 286 #case where only one class |
245 if (length(class)==1){ | 287 if (length(class)==1){ |
246 k=1 | 288 k=1 |
247 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 289 ylim = range(sapply(TIC, function(x) range(x[,2]))) |
248 | 290 |
249 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | 291 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") |
250 colvect<-NULL | 292 colvect<-NULL |
251 for (j in 1:length(classnames[[k]])) { | 293 for (j in 1:length(classnames[[k]])) { |
252 tic <- TIC[[classnames[[k]][j]]] | 294 tic <- TIC[[classnames[[k]][j]]] |
253 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 295 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") |
254 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 296 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") |
255 colvect<-append(colvect,cols[classnames[[k]][j]]) | 297 colvect<-append(colvect,cols[classnames[[k]][j]]) |
256 } | 298 } |
257 | 299 |
258 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | 300 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) |
259 | 301 |
260 }#end length ==1 | 302 }#end length ==1 |
261 | 303 |
262 dev.off() #pdf(pdfname,w=16,h=10) | 304 dev.off() #pdf(pdfname,w=16,h=10) |
263 | 305 |
264 invisible(TIC) | 306 invisible(TIC) |
265 } | 307 } |
266 | 308 |
275 | 317 |
276 #Create the sampleMetada dataframe | 318 #Create the sampleMetada dataframe |
277 sampleMetadata=xset@phenoData | 319 sampleMetadata=xset@phenoData |
278 sampleNamesOrigin=rownames(sampleMetadata) | 320 sampleNamesOrigin=rownames(sampleMetadata) |
279 sampleNamesMakeNames=make.names(sampleNamesOrigin) | 321 sampleNamesMakeNames=make.names(sampleNamesOrigin) |
280 | 322 |
281 if (any(duplicated(sampleNamesMakeNames))) { | 323 if (any(duplicated(sampleNamesMakeNames))) { |
282 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 324 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
283 for (sampleName in sampleNamesOrigin) { | 325 for (sampleName in sampleNamesOrigin) { |
284 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 326 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) |
285 } | 327 } |
323 library(tools) | 365 library(tools) |
324 samplename=file_path_sans_ext(filename) | 366 samplename=file_path_sans_ext(filename) |
325 | 367 |
326 #Set the polarity attribute | 368 #Set the polarity attribute |
327 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity | 369 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity |
328 | 370 |
329 #Delete xcmsRaw object because it creates a bug for the fillpeaks step | 371 #Delete xcmsRaw object because it creates a bug for the fillpeaks step |
330 rm(xcmsRaw) | 372 rm(xcmsRaw) |
331 } | 373 } |
332 | 374 |
333 } | 375 } |
359 # WHAT IS ON THE FILESYSTEM | 401 # WHAT IS ON THE FILESYSTEM |
360 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) | 402 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) |
361 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | 403 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] |
362 | 404 |
363 # COMPARISON | 405 # COMPARISON |
364 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | 406 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { |
365 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | 407 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) |
366 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | 408 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) |
367 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 409 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
368 | 410 |
369 } | 411 } |
385 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | 427 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) |
386 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 428 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
387 write(capture, stderr()) | 429 write(capture, stderr()) |
388 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | 430 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") |
389 } | 431 } |
390 | 432 |
391 } | 433 } |
392 | 434 |
393 | 435 |
394 ## | 436 ## |
395 ## This function check if XML contain special characters | 437 ## This function check if XML contain special characters |
397 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 439 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
398 deleteXmlBadCharacters<- function (directory) { | 440 deleteXmlBadCharacters<- function (directory) { |
399 cat("Checking Non ASCII characters in the XML...\n") | 441 cat("Checking Non ASCII characters in the XML...\n") |
400 | 442 |
401 processed=F | 443 processed=F |
402 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) | 444 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) |
403 for (i in l){ | 445 for (i in l){ |
404 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") | 446 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") |
405 capture=suppressWarnings(system(cmd,intern=TRUE)) | 447 capture=suppressWarnings(system(cmd,intern=TRUE)) |
406 if (length(capture)>0){ | 448 if (length(capture)>0){ |
407 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | 449 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) |
408 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | 450 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) |
409 c=system(cmd,intern=TRUE) | 451 c=system(cmd,intern=TRUE) |
410 capture="" | 452 capture="" |
411 processed=T | 453 processed=T |
412 } | 454 } |
413 } | 455 } |
414 if (processed) cat("\n\n") | 456 if (processed) cat("\n\n") |
415 return(processed) | 457 return(processed) |
416 } | 458 } |
417 | 459 |
418 | 460 |
419 ## | 461 ## |
420 ## This function will compute MD5 checksum to check the data integrity | 462 ## This function will compute MD5 checksum to check the data integrity |
421 ## | 463 ## |
422 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 464 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
423 getMd5sum <- function (directory) { | 465 getMd5sum <- function (directory) { |
424 cat("Compute md5 checksum...\n") | 466 cat("Compute md5 checksum...\n") |
435 | 477 |
436 #cat("\n\n") | 478 #cat("\n\n") |
437 | 479 |
438 return(as.matrix(md5sum(files))) | 480 return(as.matrix(md5sum(files))) |
439 } | 481 } |
440 |