comparison lib.r @ 0:3f0a218e2ebc draft default tip

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a
author lecorguille
date Tue, 03 Apr 2018 11:45:58 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3f0a218e2ebc
1 #@authors ABiMS TEAM, Y. Guitton
2 # lib.r for Galaxy Workflow4Metabolomics xcms tools
3
4 #@author G. Le Corguille
5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE","FALSE"))
10 args[key] = as.logical(args[key])
11 }
12 return(args)
13 }
14
15 #@author G. Le Corguille
16 # This function will
17 # - load the packages
18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) {
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
21
22 sessioninfo = sessionInfo()
23 cat(sessioninfo$R.version$version.string,"\n")
24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
26 cat("Other loaded packages:\n")
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
28 }
29
30 #@author G. Le Corguille
31 # This function merge several xdata into one.
32 mergeXData <- function(args) {
33 for(image in args$images) {
34 load(image)
35 # Handle infiles
36 if (!exists("singlefile")) singlefile <- NULL
37 if (!exists("zipfile")) zipfile <- NULL
38 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args)
39 zipfile <- rawFilePath$zipfile
40 singlefile <- rawFilePath$singlefile
41 retrieveRawfileInTheWorkingDirectory(singlefile, zipfile)
42 if (exists("raw_data")) xdata <- raw_data
43 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*")
44 cat(sampleNamesList$sampleNamesOrigin,"\n")
45 if (!exists("xdata_merged")) {
46 xdata_merged <- xdata
47 singlefile_merged <- singlefile
48 md5sumList_merged <- md5sumList
49 sampleNamesList_merged <- sampleNamesList
50 } else {
51 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata)
52 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata)
53 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata")
54 singlefile_merged <- c(singlefile_merged,singlefile)
55 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin)
56 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin)
57 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames)
58 }
59 }
60 rm(image)
61 xdata <- xdata_merged; rm(xdata_merged)
62 singlefile <- singlefile_merged; rm(singlefile_merged)
63 md5sumList <- md5sumList_merged; rm(md5sumList_merged)
64 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged)
65
66 if (!is.null(args$sampleMetadata)) {
67 cat("\tXSET PHENODATA SETTING...\n")
68 sampleMetadataFile <- args$sampleMetadata
69 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F)
70 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)]
71
72 if (any(is.na(pData(xdata)$sample_group))) {
73 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)]
74 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" "))
75 print(error_message)
76 stop(error_message)
77 }
78 }
79 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList))
80 }
81
82 #@author G. Le Corguille
83 # This function convert if it is required the Retention Time in minutes
84 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
85 if (convertRTMinute){
86 #converting the retention times (seconds) into minutes
87 print("converting the retention times into minutes in the variableMetadata")
88 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60
89 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60
90 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60
91 }
92 return (variableMetadata)
93 }
94
95 #@author G. Le Corguille
96 # This function format ions identifiers
97 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
98 splitDeco <- strsplit(as.character(variableMetadata$name),"_")
99 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
100 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
101 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
102 return(variableMetadata)
103 }
104
105 #@author G. Le Corguille
106 # Draw the plotChromPeakDensity 3 per page in a pdf file
107 getPlotChromPeakDensity <- function(xdata, mzdigit=4) {
108 pdf(file="plotChromPeakDensity.pdf", width=16, height=12)
109
110 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
111
112 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
113 names(group_colors) <- unique(xdata$sample_group)
114
115 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
116 for (i in 1:nrow(featureDefinitions(xdata))) {
117 mzmin = featureDefinitions(xdata)[i,]$mzmin
118 mzmax = featureDefinitions(xdata)[i,]$mzmax
119 plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit)))
120 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
121 }
122
123 dev.off()
124 }
125
126 #@author G. Le Corguille
127 # Draw the plotChromPeakDensity 3 per page in a pdf file
128 getPlotAdjustedRtime <- function(xdata) {
129
130 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)
131
132 # Color by group
133 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
134 if (length(group_colors) > 1) {
135 names(group_colors) <- unique(xdata$sample_group)
136 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
137 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
138 }
139
140 # Color by sample
141 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
142 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
143
144 dev.off()
145 }
146
147 #@author G. Le Corguille
148 # value: intensity values to be used into, maxo or intb
149 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) {
150 dataMatrix <- featureValues(xdata, method="medret", value=intval)
151 colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix))
152 dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix)
153 variableMetadata <- featureDefinitions(xdata)
154 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt"
155 variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata)
156
157 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
158 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
159
160 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F)
161 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
162
163 }
164
165 #@author G. Le Corguille
166 # It allow different of field separators
167 getDataFrameFromFile <- function(filename, header=T) {
168 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F)
169 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F)
170 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F)
171 if (ncol(myDataFrame) < 2) {
172 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation"
173 print(error_message)
174 stop(error_message)
175 }
176 return(myDataFrame)
177 }
178
179 getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") {
180
181 chrom <- chromatogram(xdata, aggregationFun = aggregationFun)
182 if (aggregationFun == "sum")
183 type="Total Ion Chromatograms"
184 else
185 type="Base Peak Intensity Chromatograms"
186
187 adjusted="Raw"
188 if (hasAdjustedRtime(xdata))
189 adjusted="Adjusted"
190
191 main <- paste(type,":",adjusted,"data")
192
193 pdf(pdfname, width=16, height=10)
194
195 # Color by group
196 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
197 if (length(group_colors) > 1) {
198 names(group_colors) <- unique(xdata$sample_group)
199 plot(chrom, col = group_colors[chrom$sample_group], main=main)
200 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
201 }
202
203 # Color by sample
204 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main)
205 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
206
207 dev.off()
208 }
209
210 #@author G. Le Corguille
211 getPlotTICs <- function(xdata, pdfname="TICs.pdf") {
212 getPlotChromatogram(xdata, pdfname, aggregationFun = "sum")
213 }
214
215 #@author G. Le Corguille
216 getPlotBPIs <- function(xdata, pdfname="BPIs.pdf") {
217 getPlotChromatogram(xdata, pdfname, aggregationFun = "max")
218 }
219
220
221 # Get the polarities from all the samples of a condition
222 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
223 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
224 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
225 cat("Creating the sampleMetadata file...\n")
226
227 #Create the sampleMetada dataframe
228 sampleMetadata <- xdata@phenoData@data
229 rownames(sampleMetadata) <- NULL
230 colnames(sampleMetadata) <- c("sampleMetadata", "class")
231
232 sampleNamesOrigin <- sampleMetadata$sampleMetadata
233 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
234
235 if (any(duplicated(sampleNamesMakeNames))) {
236 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
237 for (sampleName in sampleNamesOrigin) {
238 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
239 }
240 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
241 }
242
243 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
244 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
245 for (sampleName in sampleNamesOrigin) {
246 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n"))
247 }
248 }
249
250 sampleMetadata$sampleMetadata <- sampleNamesMakeNames
251
252
253 #For each sample file, the following actions are done
254 for (fileIdx in 1:length(fileNames(xdata))) {
255 #Check if the file is in the CDF format
256 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
257
258 # If the column isn't exist, with add one filled with NA
259 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
260
261 #Extract the polarity (a list of polarities)
262 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"]
263 #Verify if all the scans have the same polarity
264 uniq_list <- unique(polarity)
265 if (length(uniq_list)>1){
266 polarity <- "mixed"
267 } else {
268 polarity <- as.character(uniq_list)
269 }
270
271 #Set the polarity attribute
272 sampleMetadata$polarity[fileIdx] <- polarity
273 }
274
275 }
276
277 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput)
278
279 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames))
280
281 }
282
283
284 # This function check if xcms will found all the files
285 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
286 checkFilesCompatibilityWithXcms <- function(directory) {
287 cat("Checking files filenames compatibilities with xmcs...\n")
288 # WHAT XCMS WILL FIND
289 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
290 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
291 info <- file.info(directory)
292 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
293 files <- c(directory[!info$isdir], listed)
294 files_abs <- file.path(getwd(), files)
295 exists <- file.exists(files_abs)
296 files[exists] <- files_abs[exists]
297 files[exists] <- sub("//","/",files[exists])
298
299 # WHAT IS ON THE FILESYSTEM
300 filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T)
301 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
302
303 # COMPARISON
304 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
305 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
306 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
307 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
308 }
309 }
310
311
312 #This function list the compatible files within the directory as xcms did
313 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
314 getMSFiles <- function (directory) {
315 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
316 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
317 info <- file.info(directory)
318 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
319 files <- c(directory[!info$isdir], listed)
320 exists <- file.exists(files)
321 files <- files[exists]
322 return(files)
323 }
324
325 # This function check if XML contains special caracters. It also checks integrity and completness.
326 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
327 checkXmlStructure <- function (directory) {
328 cat("Checking XML structure...\n")
329
330 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
331 capture <- system(cmd, intern=TRUE)
332
333 if (length(capture)>0){
334 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
335 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
336 write(capture, stderr())
337 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
338 }
339
340 }
341
342
343 # This function check if XML contain special characters
344 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
345 deleteXmlBadCharacters<- function (directory) {
346 cat("Checking Non ASCII characters in the XML...\n")
347
348 processed <- F
349 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
350 for (i in l){
351 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="")
352 capture <- suppressWarnings(system(cmd, intern=TRUE))
353 if (length(capture)>0){
354 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
355 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
356 c <- system(cmd, intern=TRUE)
357 capture <- ""
358 processed <- T
359 }
360 }
361 if (processed) cat("\n\n")
362 return(processed)
363 }
364
365
366 # This function will compute MD5 checksum to check the data integrity
367 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
368 getMd5sum <- function (directory) {
369 cat("Compute md5 checksum...\n")
370 # WHAT XCMS WILL FIND
371 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
372 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
373 info <- file.info(directory)
374 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
375 files <- c(directory[!info$isdir], listed)
376 exists <- file.exists(files)
377 files <- files[exists]
378
379 library(tools)
380
381 #cat("\n\n")
382
383 return(as.matrix(md5sum(files)))
384 }
385
386
387 # This function get the raw file path from the arguments
388 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
389 getRawfilePathFromArguments <- function(singlefile, zipfile, args) {
390 if (!is.null(args$zipfile)) zipfile <- args$zipfile
391 if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive
392 if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative
393
394 if (!is.null(args$singlefile_galaxyPath)) {
395 singlefile_galaxyPaths <- args$singlefile_galaxyPath;
396 singlefile_sampleNames <- args$singlefile_sampleName
397 }
398 if (!is.null(args$singlefile_galaxyPathPositive)) {
399 singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive;
400 singlefile_sampleNames <- args$singlefile_sampleNamePositive
401 }
402 if (!is.null(args$singlefile_galaxyPathNegative)) {
403 singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative;
404 singlefile_sampleNames <- args$singlefile_sampleNameNegative
405 }
406 if (exists("singlefile_galaxyPaths")){
407 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|"))
408 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|"))
409
410 singlefile <- NULL
411 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
412 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
413 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
414 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
415 }
416 }
417 return(list(zipfile=zipfile, singlefile=singlefile))
418 }
419
420
421 # This function retrieve the raw file in the working directory
422 # - if zipfile: unzip the file with its directory tree
423 # - if singlefiles: set symlink with the good filename
424 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
425 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
426 if(!is.null(singlefile) && (length("singlefile")>0)) {
427 for (singlefile_sampleName in names(singlefile)) {
428 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
429 if(!file.exists(singlefile_galaxyPath)){
430 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
431 print(error_message); stop(error_message)
432 }
433
434 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
435 file.copy(singlefile_galaxyPath, singlefile_sampleName)
436
437 }
438 directory <- "."
439
440 }
441 if(!is.null(zipfile) && (zipfile != "")) {
442 if(!file.exists(zipfile)){
443 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
444 print(error_message)
445 stop(error_message)
446 }
447
448 #list all file in the zip file
449 #zip_files <- unzip(zipfile,list=T)[,"Name"]
450
451 #unzip
452 suppressWarnings(unzip(zipfile, unzip="unzip"))
453
454 #get the directory name
455 suppressWarnings(filesInZip <- unzip(zipfile, list=T))
456 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
457 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
458 directory <- "."
459 if (length(directories) == 1) directory <- directories
460
461 cat("files_root_directory\t",directory,"\n")
462
463 }
464 return (directory)
465 }
466
467
468 # This function retrieve a xset like object
469 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
470 getxcmsSetObject <- function(xobject) {
471 # XCMS 1.x
472 if (class(xobject) == "xcmsSet")
473 return (xobject)
474 # XCMS 3.x
475 if (class(xobject) == "XCMSnExp") {
476 # Get the legacy xcmsSet object
477 suppressWarnings(xset <- as(xobject, 'xcmsSet'))
478 sampclass(xset) <- xset@phenoData$sample_group
479 return (xset)
480 }
481 }
482
483
484 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
485 # https://github.com/sneumann/xcms/issues/250
486 groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) {
487 mzfmt <- paste("%.", mzdec, "f", sep = "")
488 rtfmt <- paste("%.", rtdec, "f", sep = "")
489
490 gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T",
491 sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "")
492
493 if (any(dup <- duplicated(gnames)))
494 for (dupname in unique(gnames[dup])) {
495 dupidx <- which(gnames == dupname)
496 gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_")
497 }
498
499 return (gnames)
500 }
501
502 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
503 # https://github.com/sneumann/xcms/issues/247
504 .concatenate_XCMSnExp <- function(...) {
505 x <- list(...)
506 if (length(x) == 0)
507 return(NULL)
508 if (length(x) == 1)
509 return(x[[1]])
510 ## Check that all are XCMSnExp objects.
511 if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp")))))
512 stop("All passed objects should be 'XCMSnExp' objects")
513 new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp")
514 ## If any of the XCMSnExp has alignment results or detected features drop
515 ## them!
516 x <- lapply(x, function(z) {
517 if (hasAdjustedRtime(z)) {
518 z <- dropAdjustedRtime(z)
519 warning("Adjusted retention times found, had to drop them.")
520 }
521 if (hasFeatures(z)) {
522 z <- dropFeatureDefinitions(z)
523 warning("Feature definitions found, had to drop them.")
524 }
525 z
526 })
527 ## Combine peaks
528 fls <- lapply(x, fileNames)
529 startidx <- cumsum(lengths(fls))
530 pks <- lapply(x, chromPeaks)
531 procH <- lapply(x, processHistory)
532 for (i in 2:length(fls)) {
533 pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1]
534 procH[[i]] <- lapply(procH[[i]], function(z) {
535 z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1])
536 z
537 })
538 }
539 pks <- do.call(rbind, pks)
540 new_x@.processHistory <- unlist(procH)
541 chromPeaks(new_x) <- pks
542 if (validObject(new_x))
543 new_x
544 }
545
546 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
547 # https://github.com/sneumann/xcms/issues/247
548 .concatenate_OnDiskMSnExp <- function(...) {
549 x <- list(...)
550 if (length(x) == 0)
551 return(NULL)
552 if (length(x) == 1)
553 return(x[[1]])
554 ## Check that all are XCMSnExp objects.
555 if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp")))))
556 stop("All passed objects should be 'OnDiskMSnExp' objects")
557 ## Check processingQueue
558 procQ <- lapply(x, function(z) z@spectraProcessingQueue)
559 new_procQ <- procQ[[1]]
560 is_ok <- unlist(lapply(procQ, function(z)
561 !is.character(all.equal(new_procQ, z))
562 ))
563 if (any(!is_ok)) {
564 warning("Processing queues from the submitted objects differ! ",
565 "Dropping the processing queue.")
566 new_procQ <- list()
567 }
568 ## processingData
569 fls <- lapply(x, function(z) z@processingData@files)
570 startidx <- cumsum(lengths(fls))
571 ## featureData
572 featd <- lapply(x, fData)
573 ## Have to update the file index and the spectrum names.
574 for (i in 2:length(featd)) {
575 featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1]
576 rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames(
577 fileIds = featd[[i]]$fileIdx,
578 spectrumIds = featd[[i]]$spIdx,
579 nSpectra = nrow(featd[[i]]),
580 nFiles = length(unlist(fls))
581 )
582 }
583 featd <- do.call(rbind, featd)
584 featd$spectrum <- 1:nrow(featd)
585 ## experimentData
586 expdata <- lapply(x, function(z) {
587 ed <- z@experimentData
588 data.frame(instrumentManufacturer = ed@instrumentManufacturer,
589 instrumentModel = ed@instrumentModel,
590 ionSource = ed@ionSource,
591 analyser = ed@analyser,
592 detectorType = ed@detectorType,
593 stringsAsFactors = FALSE)
594 })
595 expdata <- do.call(rbind, expdata)
596 expdata <- new("MIAPE",
597 instrumentManufacturer = expdata$instrumentManufacturer,
598 instrumentModel = expdata$instrumentModel,
599 ionSource = expdata$ionSource,
600 analyser = expdata$analyser,
601 detectorType = expdata$detectorType)
602
603 ## protocolData
604 protodata <- lapply(x, function(z) z@protocolData)
605 if (any(unlist(lapply(protodata, nrow)) > 0))
606 warning("Found non-empty protocol data, but merging protocol data is",
607 " currently not supported. Skipped.")
608 ## phenoData
609 pdata <- do.call(rbind, lapply(x, pData))
610 res <- new(
611 "OnDiskMSnExp",
612 phenoData = new("NAnnotatedDataFrame", data = pdata),
613 featureData = new("AnnotatedDataFrame", featd),
614 processingData = new("MSnProcess",
615 processing = paste0("Concatenated [", date(), "]"),
616 files = unlist(fls), smoothed = NA),
617 experimentData = expdata,
618 spectraProcessingQueue = new_procQ)
619 if (validObject(res))
620 res
621 }
622
623 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
624 # https://github.com/sneumann/xcms/issues/247
625 c.XCMSnExp <- function(...) {
626 .concatenate_XCMSnExp(...)
627 }
628
629 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
630 # https://github.com/sneumann/xcms/issues/247
631 c.MSnbase <- function(...) {
632 .concatenate_OnDiskMSnExp(...)
633 }