Mercurial > repos > lecorguille > xcms_plot_chromatogram
comparison lib.r @ 0:fe1f0f16d9e6 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit cfb08142b6bfb78002b4e0d7775adb1a58e66c33-dirty
author | lecorguille |
---|---|
date | Thu, 08 Mar 2018 02:48:44 -0500 |
parents | |
children | e6fdadaf039e |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fe1f0f16d9e6 |
---|---|
1 #@authors ABiMS TEAM, Y. Guitton | |
2 # lib.r for Galaxy Workflow4Metabolomics xcms tools | |
3 | |
4 #@author G. Le Corguille | |
5 # solve an issue with batch if arguments are logical TRUE/FALSE | |
6 parseCommandArgs <- function(...) { | |
7 args <- batch::parseCommandArgs(...) | |
8 for (key in names(args)) { | |
9 if (args[key] %in% c("TRUE","FALSE")) | |
10 args[key] = as.logical(args[key]) | |
11 } | |
12 return(args) | |
13 } | |
14 | |
15 #@author G. Le Corguille | |
16 # This function will | |
17 # - load the packages | |
18 # - display the sessionInfo | |
19 loadAndDisplayPackages <- function(pkgs) { | |
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | |
21 | |
22 sessioninfo = sessionInfo() | |
23 cat(sessioninfo$R.version$version.string,"\n") | |
24 cat("Main packages:\n") | |
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
26 cat("Other loaded packages:\n") | |
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
28 } | |
29 | |
30 #@author G. Le Corguille | |
31 # This function convert if it is required the Retention Time in minutes | |
32 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | |
33 if (convertRTMinute){ | |
34 #converting the retention times (seconds) into minutes | |
35 print("converting the retention times into minutes in the variableMetadata") | |
36 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 | |
37 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 | |
38 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 | |
39 } | |
40 return (variableMetadata) | |
41 } | |
42 | |
43 #@author G. Le Corguille | |
44 # This function format ions identifiers | |
45 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | |
46 splitDeco <- strsplit(as.character(variableMetadata$name),"_") | |
47 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | |
48 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | |
49 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | |
50 return(variableMetadata) | |
51 } | |
52 | |
53 #@author G. Le Corguille | |
54 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
55 getPlotChromPeakDensity <- function(xdata, mzdigit=4) { | |
56 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | |
57 | |
58 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | |
59 | |
60 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
61 names(group_colors) <- unique(xdata$sample_group) | |
62 | |
63 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | |
64 for (i in 1:nrow(featureDefinitions(xdata))) { | |
65 mzmin = featureDefinitions(xdata)[i,]$mzmin | |
66 mzmax = featureDefinitions(xdata)[i,]$mzmax | |
67 plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) | |
68 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
69 } | |
70 | |
71 dev.off() | |
72 } | |
73 | |
74 #@author G. Le Corguille | |
75 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
76 getPlotAdjustedRtime <- function(xdata) { | |
77 | |
78 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | |
79 | |
80 # Color by group | |
81 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
82 names(group_colors) <- unique(xdata$sample_group) | |
83 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | |
84 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
85 | |
86 # Color by sample | |
87 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) | |
88 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | |
89 | |
90 dev.off() | |
91 } | |
92 | |
93 #@author G. Le Corguille | |
94 # value: intensity values to be used into, maxo or intb | |
95 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) { | |
96 dataMatrix <- featureValues(xdata, method="medret", value=intval) | |
97 colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix)) | |
98 dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix) | |
99 variableMetadata <- featureDefinitions(xdata) | |
100 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" | |
101 variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata) | |
102 | |
103 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) | |
104 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | |
105 | |
106 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) | |
107 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) | |
108 | |
109 } | |
110 | |
111 #@author G. Le Corguille | |
112 getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { | |
113 | |
114 chrom <- chromatogram(xdata, aggregationFun = aggregationFun) | |
115 if (aggregationFun == "sum") | |
116 type="Total Ion Chromatograms" | |
117 else | |
118 type="Base Peak Intensity Chromatograms" | |
119 | |
120 adjusted="Raw" | |
121 if (hasAdjustedRtime(xdata)) | |
122 adjusted="Adjusted" | |
123 | |
124 main <- paste(type,":",adjusted,"data") | |
125 | |
126 pdf(pdfname, width=16, height=10) | |
127 | |
128 # Color by group | |
129 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
130 names(group_colors) <- unique(xdata$sample_group) | |
131 plot(chrom, col = group_colors[chrom$sample_group], main=main) | |
132 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
133 | |
134 # Color by sample | |
135 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) | |
136 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | |
137 | |
138 dev.off() | |
139 } | |
140 | |
141 #@author G. Le Corguille | |
142 getPlotTICs <- function(xdata, pdfname="TICs.pdf") { | |
143 getPlotChromatogram(xdata, pdfname, aggregationFun = "sum") | |
144 } | |
145 | |
146 #@author G. Le Corguille | |
147 getPlotBPIs <- function(xdata, pdfname="BPIs.pdf") { | |
148 getPlotChromatogram(xdata, pdfname, aggregationFun = "max") | |
149 } | |
150 | |
151 | |
152 # Get the polarities from all the samples of a condition | |
153 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
154 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
155 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | |
156 cat("Creating the sampleMetadata file...\n") | |
157 | |
158 #Create the sampleMetada dataframe | |
159 sampleMetadata <- xdata@phenoData@data | |
160 rownames(sampleMetadata) <- NULL | |
161 colnames(sampleMetadata) <- c("sampleMetadata", "class") | |
162 | |
163 sampleNamesOrigin <- sampleMetadata$sampleMetadata | |
164 sampleNamesMakeNames <- make.names(sampleNamesOrigin) | |
165 | |
166 if (any(duplicated(sampleNamesMakeNames))) { | |
167 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | |
168 for (sampleName in sampleNamesOrigin) { | |
169 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | |
170 } | |
171 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
172 } | |
173 | |
174 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { | |
175 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") | |
176 for (sampleName in sampleNamesOrigin) { | |
177 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | |
178 } | |
179 } | |
180 | |
181 sampleMetadata$sampleMetadata <- sampleNamesMakeNames | |
182 | |
183 | |
184 #For each sample file, the following actions are done | |
185 for (fileIdx in 1:length(fileNames(xdata))) { | |
186 #Check if the file is in the CDF format | |
187 if (!mzR:::netCDFIsFile(fileNames(xdata))) { | |
188 | |
189 # If the column isn't exist, with add one filled with NA | |
190 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA | |
191 | |
192 #Extract the polarity (a list of polarities) | |
193 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] | |
194 #Verify if all the scans have the same polarity | |
195 uniq_list <- unique(polarity) | |
196 if (length(uniq_list)>1){ | |
197 polarity <- "mixed" | |
198 } else { | |
199 polarity <- as.character(uniq_list) | |
200 } | |
201 | |
202 #Set the polarity attribute | |
203 sampleMetadata$polarity[fileIdx] <- polarity | |
204 } | |
205 | |
206 } | |
207 | |
208 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | |
209 | |
210 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) | |
211 | |
212 } | |
213 | |
214 | |
215 # This function check if xcms will found all the files | |
216 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
217 checkFilesCompatibilityWithXcms <- function(directory) { | |
218 cat("Checking files filenames compatibilities with xmcs...\n") | |
219 # WHAT XCMS WILL FIND | |
220 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
221 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
222 info <- file.info(directory) | |
223 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
224 files <- c(directory[!info$isdir], listed) | |
225 files_abs <- file.path(getwd(), files) | |
226 exists <- file.exists(files_abs) | |
227 files[exists] <- files_abs[exists] | |
228 files[exists] <- sub("//","/",files[exists]) | |
229 | |
230 # WHAT IS ON THE FILESYSTEM | |
231 filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) | |
232 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | |
233 | |
234 # COMPARISON | |
235 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | |
236 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | |
237 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | |
238 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
239 } | |
240 } | |
241 | |
242 | |
243 #This function list the compatible files within the directory as xcms did | |
244 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
245 getMSFiles <- function (directory) { | |
246 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
247 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
248 info <- file.info(directory) | |
249 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
250 files <- c(directory[!info$isdir], listed) | |
251 exists <- file.exists(files) | |
252 files <- files[exists] | |
253 return(files) | |
254 } | |
255 | |
256 # This function check if XML contains special caracters. It also checks integrity and completness. | |
257 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
258 checkXmlStructure <- function (directory) { | |
259 cat("Checking XML structure...\n") | |
260 | |
261 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | |
262 capture <- system(cmd, intern=TRUE) | |
263 | |
264 if (length(capture)>0){ | |
265 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | |
266 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | |
267 write(capture, stderr()) | |
268 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | |
269 } | |
270 | |
271 } | |
272 | |
273 | |
274 # This function check if XML contain special characters | |
275 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
276 deleteXmlBadCharacters<- function (directory) { | |
277 cat("Checking Non ASCII characters in the XML...\n") | |
278 | |
279 processed <- F | |
280 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) | |
281 for (i in l){ | |
282 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") | |
283 capture <- suppressWarnings(system(cmd, intern=TRUE)) | |
284 if (length(capture)>0){ | |
285 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | |
286 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | |
287 c <- system(cmd, intern=TRUE) | |
288 capture <- "" | |
289 processed <- T | |
290 } | |
291 } | |
292 if (processed) cat("\n\n") | |
293 return(processed) | |
294 } | |
295 | |
296 | |
297 # This function will compute MD5 checksum to check the data integrity | |
298 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
299 getMd5sum <- function (directory) { | |
300 cat("Compute md5 checksum...\n") | |
301 # WHAT XCMS WILL FIND | |
302 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
303 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
304 info <- file.info(directory) | |
305 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
306 files <- c(directory[!info$isdir], listed) | |
307 exists <- file.exists(files) | |
308 files <- files[exists] | |
309 | |
310 library(tools) | |
311 | |
312 #cat("\n\n") | |
313 | |
314 return(as.matrix(md5sum(files))) | |
315 } | |
316 | |
317 | |
318 # This function get the raw file path from the arguments | |
319 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
320 getRawfilePathFromArguments <- function(singlefile, zipfile, args) { | |
321 if (!is.null(args$zipfile)) zipfile <- args$zipfile | |
322 if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive | |
323 if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative | |
324 | |
325 if (!is.null(args$singlefile_galaxyPath)) { | |
326 singlefile_galaxyPaths <- args$singlefile_galaxyPath; | |
327 singlefile_sampleNames <- args$singlefile_sampleName | |
328 } | |
329 if (!is.null(args$singlefile_galaxyPathPositive)) { | |
330 singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive; | |
331 singlefile_sampleNames <- args$singlefile_sampleNamePositive | |
332 } | |
333 if (!is.null(args$singlefile_galaxyPathNegative)) { | |
334 singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative; | |
335 singlefile_sampleNames <- args$singlefile_sampleNameNegative | |
336 } | |
337 if (exists("singlefile_galaxyPaths")){ | |
338 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) | |
339 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) | |
340 | |
341 singlefile <- NULL | |
342 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
343 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
344 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
345 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
346 } | |
347 } | |
348 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { | |
349 args[[argument]] <- NULL | |
350 } | |
351 return(list(zipfile=zipfile, singlefile=singlefile, args=args)) | |
352 } | |
353 | |
354 | |
355 # This function retrieve the raw file in the working directory | |
356 # - if zipfile: unzip the file with its directory tree | |
357 # - if singlefiles: set symlink with the good filename | |
358 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
359 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | |
360 if(!is.null(singlefile) && (length("singlefile")>0)) { | |
361 for (singlefile_sampleName in names(singlefile)) { | |
362 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | |
363 if(!file.exists(singlefile_galaxyPath)){ | |
364 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | |
365 print(error_message); stop(error_message) | |
366 } | |
367 | |
368 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | |
369 file.copy(singlefile_galaxyPath, singlefile_sampleName) | |
370 | |
371 } | |
372 directory <- "." | |
373 | |
374 } | |
375 if(!is.null(zipfile) && (zipfile != "")) { | |
376 if(!file.exists(zipfile)){ | |
377 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | |
378 print(error_message) | |
379 stop(error_message) | |
380 } | |
381 | |
382 #list all file in the zip file | |
383 #zip_files <- unzip(zipfile,list=T)[,"Name"] | |
384 | |
385 #unzip | |
386 suppressWarnings(unzip(zipfile, unzip="unzip")) | |
387 | |
388 #get the directory name | |
389 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | |
390 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | |
391 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | |
392 directory <- "." | |
393 if (length(directories) == 1) directory <- directories | |
394 | |
395 cat("files_root_directory\t",directory,"\n") | |
396 | |
397 } | |
398 return (directory) | |
399 } | |
400 | |
401 | |
402 # This function retrieve a xset like object | |
403 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
404 getxcmsSetObject <- function(xobject) { | |
405 # XCMS 1.x | |
406 if (class(xobject) == "xcmsSet") | |
407 return (xobject) | |
408 # XCMS 3.x | |
409 if (class(xobject) == "XCMSnExp") { | |
410 # Get the legacy xcmsSet object | |
411 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | |
412 sampclass(xset) <- xset@phenoData$sample_group | |
413 return (xset) | |
414 } | |
415 } | |
416 | |
417 | |
418 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
419 # https://github.com/sneumann/xcms/issues/250 | |
420 groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) { | |
421 mzfmt <- paste("%.", mzdec, "f", sep = "") | |
422 rtfmt <- paste("%.", rtdec, "f", sep = "") | |
423 | |
424 gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T", | |
425 sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "") | |
426 | |
427 if (any(dup <- duplicated(gnames))) | |
428 for (dupname in unique(gnames[dup])) { | |
429 dupidx <- which(gnames == dupname) | |
430 gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_") | |
431 } | |
432 | |
433 return (gnames) | |
434 } | |
435 | |
436 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
437 # https://github.com/sneumann/xcms/issues/247 | |
438 .concatenate_XCMSnExp <- function(...) { | |
439 x <- list(...) | |
440 if (length(x) == 0) | |
441 return(NULL) | |
442 if (length(x) == 1) | |
443 return(x[[1]]) | |
444 ## Check that all are XCMSnExp objects. | |
445 if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp"))))) | |
446 stop("All passed objects should be 'XCMSnExp' objects") | |
447 new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp") | |
448 ## If any of the XCMSnExp has alignment results or detected features drop | |
449 ## them! | |
450 x <- lapply(x, function(z) { | |
451 if (hasAdjustedRtime(z)) { | |
452 z <- dropAdjustedRtime(z) | |
453 warning("Adjusted retention times found, had to drop them.") | |
454 } | |
455 if (hasFeatures(z)) { | |
456 z <- dropFeatureDefinitions(z) | |
457 warning("Feature definitions found, had to drop them.") | |
458 } | |
459 z | |
460 }) | |
461 ## Combine peaks | |
462 fls <- lapply(x, fileNames) | |
463 startidx <- cumsum(lengths(fls)) | |
464 pks <- lapply(x, chromPeaks) | |
465 procH <- lapply(x, processHistory) | |
466 for (i in 2:length(fls)) { | |
467 pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1] | |
468 procH[[i]] <- lapply(procH[[i]], function(z) { | |
469 z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1]) | |
470 z | |
471 }) | |
472 } | |
473 pks <- do.call(rbind, pks) | |
474 new_x@.processHistory <- unlist(procH) | |
475 chromPeaks(new_x) <- pks | |
476 if (validObject(new_x)) | |
477 new_x | |
478 } | |
479 | |
480 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
481 # https://github.com/sneumann/xcms/issues/247 | |
482 .concatenate_OnDiskMSnExp <- function(...) { | |
483 x <- list(...) | |
484 if (length(x) == 0) | |
485 return(NULL) | |
486 if (length(x) == 1) | |
487 return(x[[1]]) | |
488 ## Check that all are XCMSnExp objects. | |
489 if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp"))))) | |
490 stop("All passed objects should be 'OnDiskMSnExp' objects") | |
491 ## Check processingQueue | |
492 procQ <- lapply(x, function(z) z@spectraProcessingQueue) | |
493 new_procQ <- procQ[[1]] | |
494 is_ok <- unlist(lapply(procQ, function(z) | |
495 !is.character(all.equal(new_procQ, z)) | |
496 )) | |
497 if (any(!is_ok)) { | |
498 warning("Processing queues from the submitted objects differ! ", | |
499 "Dropping the processing queue.") | |
500 new_procQ <- list() | |
501 } | |
502 ## processingData | |
503 fls <- lapply(x, function(z) z@processingData@files) | |
504 startidx <- cumsum(lengths(fls)) | |
505 ## featureData | |
506 featd <- lapply(x, fData) | |
507 ## Have to update the file index and the spectrum names. | |
508 for (i in 2:length(featd)) { | |
509 featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1] | |
510 rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames( | |
511 fileIds = featd[[i]]$fileIdx, | |
512 spectrumIds = featd[[i]]$spIdx, | |
513 nSpectra = nrow(featd[[i]]), | |
514 nFiles = length(unlist(fls)) | |
515 ) | |
516 } | |
517 featd <- do.call(rbind, featd) | |
518 featd$spectrum <- 1:nrow(featd) | |
519 ## experimentData | |
520 expdata <- lapply(x, function(z) { | |
521 ed <- z@experimentData | |
522 data.frame(instrumentManufacturer = ed@instrumentManufacturer, | |
523 instrumentModel = ed@instrumentModel, | |
524 ionSource = ed@ionSource, | |
525 analyser = ed@analyser, | |
526 detectorType = ed@detectorType, | |
527 stringsAsFactors = FALSE) | |
528 }) | |
529 expdata <- do.call(rbind, expdata) | |
530 expdata <- new("MIAPE", | |
531 instrumentManufacturer = expdata$instrumentManufacturer, | |
532 instrumentModel = expdata$instrumentModel, | |
533 ionSource = expdata$ionSource, | |
534 analyser = expdata$analyser, | |
535 detectorType = expdata$detectorType) | |
536 | |
537 ## protocolData | |
538 protodata <- lapply(x, function(z) z@protocolData) | |
539 if (any(unlist(lapply(protodata, nrow)) > 0)) | |
540 warning("Found non-empty protocol data, but merging protocol data is", | |
541 " currently not supported. Skipped.") | |
542 ## phenoData | |
543 pdata <- do.call(rbind, lapply(x, pData)) | |
544 res <- new( | |
545 "OnDiskMSnExp", | |
546 phenoData = new("NAnnotatedDataFrame", data = pdata), | |
547 featureData = new("AnnotatedDataFrame", featd), | |
548 processingData = new("MSnProcess", | |
549 processing = paste0("Concatenated [", date(), "]"), | |
550 files = unlist(fls), smoothed = NA), | |
551 experimentData = expdata, | |
552 spectraProcessingQueue = new_procQ) | |
553 if (validObject(res)) | |
554 res | |
555 } | |
556 | |
557 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
558 # https://github.com/sneumann/xcms/issues/247 | |
559 c.XCMSnExp <- function(...) { | |
560 .concatenate_XCMSnExp(...) | |
561 } |