Mercurial > repos > lecorguille > mnsbase_readmsdata
comparison lib.r @ 0:3f0a218e2ebc draft default tip
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit f01148783819c37e474790dbd56619862960448a
| author | lecorguille | 
|---|---|
| date | Tue, 03 Apr 2018 11:45:58 -0400 | 
| parents | |
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:3f0a218e2ebc | 
|---|---|
| 1 #@authors ABiMS TEAM, Y. Guitton | |
| 2 # lib.r for Galaxy Workflow4Metabolomics xcms tools | |
| 3 | |
| 4 #@author G. Le Corguille | |
| 5 # solve an issue with batch if arguments are logical TRUE/FALSE | |
| 6 parseCommandArgs <- function(...) { | |
| 7 args <- batch::parseCommandArgs(...) | |
| 8 for (key in names(args)) { | |
| 9 if (args[key] %in% c("TRUE","FALSE")) | |
| 10 args[key] = as.logical(args[key]) | |
| 11 } | |
| 12 return(args) | |
| 13 } | |
| 14 | |
| 15 #@author G. Le Corguille | |
| 16 # This function will | |
| 17 # - load the packages | |
| 18 # - display the sessionInfo | |
| 19 loadAndDisplayPackages <- function(pkgs) { | |
| 20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | |
| 21 | |
| 22 sessioninfo = sessionInfo() | |
| 23 cat(sessioninfo$R.version$version.string,"\n") | |
| 24 cat("Main packages:\n") | |
| 25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
| 26 cat("Other loaded packages:\n") | |
| 27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
| 28 } | |
| 29 | |
| 30 #@author G. Le Corguille | |
| 31 # This function merge several xdata into one. | |
| 32 mergeXData <- function(args) { | |
| 33 for(image in args$images) { | |
| 34 load(image) | |
| 35 # Handle infiles | |
| 36 if (!exists("singlefile")) singlefile <- NULL | |
| 37 if (!exists("zipfile")) zipfile <- NULL | |
| 38 rawFilePath <- getRawfilePathFromArguments(singlefile, zipfile, args) | |
| 39 zipfile <- rawFilePath$zipfile | |
| 40 singlefile <- rawFilePath$singlefile | |
| 41 retrieveRawfileInTheWorkingDirectory(singlefile, zipfile) | |
| 42 if (exists("raw_data")) xdata <- raw_data | |
| 43 if (!exists("xdata")) stop("\n\nERROR: The RData doesn't contain any object called 'xdata'. This RData should have been created by an old version of XMCS 2.*") | |
| 44 cat(sampleNamesList$sampleNamesOrigin,"\n") | |
| 45 if (!exists("xdata_merged")) { | |
| 46 xdata_merged <- xdata | |
| 47 singlefile_merged <- singlefile | |
| 48 md5sumList_merged <- md5sumList | |
| 49 sampleNamesList_merged <- sampleNamesList | |
| 50 } else { | |
| 51 if (is(xdata, "XCMSnExp")) xdata_merged <- c(xdata_merged,xdata) | |
| 52 else if (is(xdata, "OnDiskMSnExp")) xdata_merged <- .concatenate_OnDiskMSnExp(xdata_merged,xdata) | |
| 53 else stop("\n\nERROR: The RData either a OnDiskMSnExp object called raw_data or a XCMSnExp object called xdata") | |
| 54 singlefile_merged <- c(singlefile_merged,singlefile) | |
| 55 md5sumList_merged$origin <- rbind(md5sumList_merged$origin,md5sumList$origin) | |
| 56 sampleNamesList_merged$sampleNamesOrigin <- c(sampleNamesList_merged$sampleNamesOrigin,sampleNamesList$sampleNamesOrigin) | |
| 57 sampleNamesList_merged$sampleNamesMakeNames <- c(sampleNamesList_merged$sampleNamesMakeNames,sampleNamesList$sampleNamesMakeNames) | |
| 58 } | |
| 59 } | |
| 60 rm(image) | |
| 61 xdata <- xdata_merged; rm(xdata_merged) | |
| 62 singlefile <- singlefile_merged; rm(singlefile_merged) | |
| 63 md5sumList <- md5sumList_merged; rm(md5sumList_merged) | |
| 64 sampleNamesList <- sampleNamesList_merged; rm(sampleNamesList_merged) | |
| 65 | |
| 66 if (!is.null(args$sampleMetadata)) { | |
| 67 cat("\tXSET PHENODATA SETTING...\n") | |
| 68 sampleMetadataFile <- args$sampleMetadata | |
| 69 sampleMetadata <- getDataFrameFromFile(sampleMetadataFile, header=F) | |
| 70 xdata@phenoData@data$sample_group=sampleMetadata$V2[match(xdata@phenoData@data$sample_name,sampleMetadata$V1)] | |
| 71 | |
| 72 if (any(is.na(pData(xdata)$sample_group))) { | |
| 73 sample_missing <- pData(xdata)$sample_name[is.na(pData(xdata)$sample_group)] | |
| 74 error_message <- paste("Those samples are missing in your sampleMetadata:", paste(sample_missing, collapse=" ")) | |
| 75 print(error_message) | |
| 76 stop(error_message) | |
| 77 } | |
| 78 } | |
| 79 return(list("xdata"=xdata, "singlefile"=singlefile, "md5sumList"=md5sumList,"sampleNamesList"=sampleNamesList)) | |
| 80 } | |
| 81 | |
| 82 #@author G. Le Corguille | |
| 83 # This function convert if it is required the Retention Time in minutes | |
| 84 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | |
| 85 if (convertRTMinute){ | |
| 86 #converting the retention times (seconds) into minutes | |
| 87 print("converting the retention times into minutes in the variableMetadata") | |
| 88 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 | |
| 89 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 | |
| 90 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 | |
| 91 } | |
| 92 return (variableMetadata) | |
| 93 } | |
| 94 | |
| 95 #@author G. Le Corguille | |
| 96 # This function format ions identifiers | |
| 97 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | |
| 98 splitDeco <- strsplit(as.character(variableMetadata$name),"_") | |
| 99 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | |
| 100 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | |
| 101 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | |
| 102 return(variableMetadata) | |
| 103 } | |
| 104 | |
| 105 #@author G. Le Corguille | |
| 106 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
| 107 getPlotChromPeakDensity <- function(xdata, mzdigit=4) { | |
| 108 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | |
| 109 | |
| 110 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | |
| 111 | |
| 112 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
| 113 names(group_colors) <- unique(xdata$sample_group) | |
| 114 | |
| 115 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | |
| 116 for (i in 1:nrow(featureDefinitions(xdata))) { | |
| 117 mzmin = featureDefinitions(xdata)[i,]$mzmin | |
| 118 mzmax = featureDefinitions(xdata)[i,]$mzmax | |
| 119 plotChromPeakDensity(xdata, mz=c(mzmin,mzmax), col=group_colors, pch=16, xlim=xlim, main=paste(round(mzmin,mzdigit),round(mzmax,mzdigit))) | |
| 120 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
| 121 } | |
| 122 | |
| 123 dev.off() | |
| 124 } | |
| 125 | |
| 126 #@author G. Le Corguille | |
| 127 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
| 128 getPlotAdjustedRtime <- function(xdata) { | |
| 129 | |
| 130 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | |
| 131 | |
| 132 # Color by group | |
| 133 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
| 134 if (length(group_colors) > 1) { | |
| 135 names(group_colors) <- unique(xdata$sample_group) | |
| 136 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | |
| 137 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
| 138 } | |
| 139 | |
| 140 # Color by sample | |
| 141 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) | |
| 142 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | |
| 143 | |
| 144 dev.off() | |
| 145 } | |
| 146 | |
| 147 #@author G. Le Corguille | |
| 148 # value: intensity values to be used into, maxo or intb | |
| 149 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) { | |
| 150 dataMatrix <- featureValues(xdata, method="medret", value=intval) | |
| 151 colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix)) | |
| 152 dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix) | |
| 153 variableMetadata <- featureDefinitions(xdata) | |
| 154 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" | |
| 155 variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata) | |
| 156 | |
| 157 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) | |
| 158 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | |
| 159 | |
| 160 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) | |
| 161 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) | |
| 162 | |
| 163 } | |
| 164 | |
| 165 #@author G. Le Corguille | |
| 166 # It allow different of field separators | |
| 167 getDataFrameFromFile <- function(filename, header=T) { | |
| 168 myDataFrame <- read.table(filename, header=header, sep=";", stringsAsFactors=F) | |
| 169 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep="\t", stringsAsFactors=F) | |
| 170 if (ncol(myDataFrame) < 2) myDataFrame <- read.table(filename, header=header, sep=",", stringsAsFactors=F) | |
| 171 if (ncol(myDataFrame) < 2) { | |
| 172 error_message="Your tabular file seems not well formatted. The column separators accepted are ; , and tabulation" | |
| 173 print(error_message) | |
| 174 stop(error_message) | |
| 175 } | |
| 176 return(myDataFrame) | |
| 177 } | |
| 178 | |
| 179 getPlotChromatogram <- function(xdata, pdfname="Chromatogram.pdf", aggregationFun = "max") { | |
| 180 | |
| 181 chrom <- chromatogram(xdata, aggregationFun = aggregationFun) | |
| 182 if (aggregationFun == "sum") | |
| 183 type="Total Ion Chromatograms" | |
| 184 else | |
| 185 type="Base Peak Intensity Chromatograms" | |
| 186 | |
| 187 adjusted="Raw" | |
| 188 if (hasAdjustedRtime(xdata)) | |
| 189 adjusted="Adjusted" | |
| 190 | |
| 191 main <- paste(type,":",adjusted,"data") | |
| 192 | |
| 193 pdf(pdfname, width=16, height=10) | |
| 194 | |
| 195 # Color by group | |
| 196 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
| 197 if (length(group_colors) > 1) { | |
| 198 names(group_colors) <- unique(xdata$sample_group) | |
| 199 plot(chrom, col = group_colors[chrom$sample_group], main=main) | |
| 200 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
| 201 } | |
| 202 | |
| 203 # Color by sample | |
| 204 plot(chrom, col = rainbow(length(xdata@phenoData@data$sample_name)), main=main) | |
| 205 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | |
| 206 | |
| 207 dev.off() | |
| 208 } | |
| 209 | |
| 210 #@author G. Le Corguille | |
| 211 getPlotTICs <- function(xdata, pdfname="TICs.pdf") { | |
| 212 getPlotChromatogram(xdata, pdfname, aggregationFun = "sum") | |
| 213 } | |
| 214 | |
| 215 #@author G. Le Corguille | |
| 216 getPlotBPIs <- function(xdata, pdfname="BPIs.pdf") { | |
| 217 getPlotChromatogram(xdata, pdfname, aggregationFun = "max") | |
| 218 } | |
| 219 | |
| 220 | |
| 221 # Get the polarities from all the samples of a condition | |
| 222 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
| 223 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
| 224 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | |
| 225 cat("Creating the sampleMetadata file...\n") | |
| 226 | |
| 227 #Create the sampleMetada dataframe | |
| 228 sampleMetadata <- xdata@phenoData@data | |
| 229 rownames(sampleMetadata) <- NULL | |
| 230 colnames(sampleMetadata) <- c("sampleMetadata", "class") | |
| 231 | |
| 232 sampleNamesOrigin <- sampleMetadata$sampleMetadata | |
| 233 sampleNamesMakeNames <- make.names(sampleNamesOrigin) | |
| 234 | |
| 235 if (any(duplicated(sampleNamesMakeNames))) { | |
| 236 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | |
| 237 for (sampleName in sampleNamesOrigin) { | |
| 238 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | |
| 239 } | |
| 240 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
| 241 } | |
| 242 | |
| 243 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { | |
| 244 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") | |
| 245 for (sampleName in sampleNamesOrigin) { | |
| 246 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | |
| 247 } | |
| 248 } | |
| 249 | |
| 250 sampleMetadata$sampleMetadata <- sampleNamesMakeNames | |
| 251 | |
| 252 | |
| 253 #For each sample file, the following actions are done | |
| 254 for (fileIdx in 1:length(fileNames(xdata))) { | |
| 255 #Check if the file is in the CDF format | |
| 256 if (!mzR:::netCDFIsFile(fileNames(xdata))) { | |
| 257 | |
| 258 # If the column isn't exist, with add one filled with NA | |
| 259 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA | |
| 260 | |
| 261 #Extract the polarity (a list of polarities) | |
| 262 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] | |
| 263 #Verify if all the scans have the same polarity | |
| 264 uniq_list <- unique(polarity) | |
| 265 if (length(uniq_list)>1){ | |
| 266 polarity <- "mixed" | |
| 267 } else { | |
| 268 polarity <- as.character(uniq_list) | |
| 269 } | |
| 270 | |
| 271 #Set the polarity attribute | |
| 272 sampleMetadata$polarity[fileIdx] <- polarity | |
| 273 } | |
| 274 | |
| 275 } | |
| 276 | |
| 277 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | |
| 278 | |
| 279 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) | |
| 280 | |
| 281 } | |
| 282 | |
| 283 | |
| 284 # This function check if xcms will found all the files | |
| 285 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
| 286 checkFilesCompatibilityWithXcms <- function(directory) { | |
| 287 cat("Checking files filenames compatibilities with xmcs...\n") | |
| 288 # WHAT XCMS WILL FIND | |
| 289 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
| 290 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
| 291 info <- file.info(directory) | |
| 292 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
| 293 files <- c(directory[!info$isdir], listed) | |
| 294 files_abs <- file.path(getwd(), files) | |
| 295 exists <- file.exists(files_abs) | |
| 296 files[exists] <- files_abs[exists] | |
| 297 files[exists] <- sub("//","/",files[exists]) | |
| 298 | |
| 299 # WHAT IS ON THE FILESYSTEM | |
| 300 filesystem_filepaths <- system(paste0("find \"$PWD/",directory,"\" -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\""), intern=T) | |
| 301 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | |
| 302 | |
| 303 # COMPARISON | |
| 304 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | |
| 305 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | |
| 306 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | |
| 307 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | |
| 308 } | |
| 309 } | |
| 310 | |
| 311 | |
| 312 #This function list the compatible files within the directory as xcms did | |
| 313 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | |
| 314 getMSFiles <- function (directory) { | |
| 315 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
| 316 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
| 317 info <- file.info(directory) | |
| 318 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
| 319 files <- c(directory[!info$isdir], listed) | |
| 320 exists <- file.exists(files) | |
| 321 files <- files[exists] | |
| 322 return(files) | |
| 323 } | |
| 324 | |
| 325 # This function check if XML contains special caracters. It also checks integrity and completness. | |
| 326 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
| 327 checkXmlStructure <- function (directory) { | |
| 328 cat("Checking XML structure...\n") | |
| 329 | |
| 330 cmd <- paste0("IFS=$'\n'; for xml in $(find '",directory,"' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | |
| 331 capture <- system(cmd, intern=TRUE) | |
| 332 | |
| 333 if (length(capture)>0){ | |
| 334 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | |
| 335 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | |
| 336 write(capture, stderr()) | |
| 337 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | |
| 338 } | |
| 339 | |
| 340 } | |
| 341 | |
| 342 | |
| 343 # This function check if XML contain special characters | |
| 344 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | |
| 345 deleteXmlBadCharacters<- function (directory) { | |
| 346 cat("Checking Non ASCII characters in the XML...\n") | |
| 347 | |
| 348 processed <- F | |
| 349 l <- system( paste0("find '",directory, "' -not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) | |
| 350 for (i in l){ | |
| 351 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") | |
| 352 capture <- suppressWarnings(system(cmd, intern=TRUE)) | |
| 353 if (length(capture)>0){ | |
| 354 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | |
| 355 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | |
| 356 c <- system(cmd, intern=TRUE) | |
| 357 capture <- "" | |
| 358 processed <- T | |
| 359 } | |
| 360 } | |
| 361 if (processed) cat("\n\n") | |
| 362 return(processed) | |
| 363 } | |
| 364 | |
| 365 | |
| 366 # This function will compute MD5 checksum to check the data integrity | |
| 367 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 368 getMd5sum <- function (directory) { | |
| 369 cat("Compute md5 checksum...\n") | |
| 370 # WHAT XCMS WILL FIND | |
| 371 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
| 372 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
| 373 info <- file.info(directory) | |
| 374 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) | |
| 375 files <- c(directory[!info$isdir], listed) | |
| 376 exists <- file.exists(files) | |
| 377 files <- files[exists] | |
| 378 | |
| 379 library(tools) | |
| 380 | |
| 381 #cat("\n\n") | |
| 382 | |
| 383 return(as.matrix(md5sum(files))) | |
| 384 } | |
| 385 | |
| 386 | |
| 387 # This function get the raw file path from the arguments | |
| 388 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 389 getRawfilePathFromArguments <- function(singlefile, zipfile, args) { | |
| 390 if (!is.null(args$zipfile)) zipfile <- args$zipfile | |
| 391 if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive | |
| 392 if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative | |
| 393 | |
| 394 if (!is.null(args$singlefile_galaxyPath)) { | |
| 395 singlefile_galaxyPaths <- args$singlefile_galaxyPath; | |
| 396 singlefile_sampleNames <- args$singlefile_sampleName | |
| 397 } | |
| 398 if (!is.null(args$singlefile_galaxyPathPositive)) { | |
| 399 singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive; | |
| 400 singlefile_sampleNames <- args$singlefile_sampleNamePositive | |
| 401 } | |
| 402 if (!is.null(args$singlefile_galaxyPathNegative)) { | |
| 403 singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative; | |
| 404 singlefile_sampleNames <- args$singlefile_sampleNameNegative | |
| 405 } | |
| 406 if (exists("singlefile_galaxyPaths")){ | |
| 407 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,"\\|")) | |
| 408 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,"\\|")) | |
| 409 | |
| 410 singlefile <- NULL | |
| 411 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | |
| 412 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] | |
| 413 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] | |
| 414 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath | |
| 415 } | |
| 416 } | |
| 417 return(list(zipfile=zipfile, singlefile=singlefile)) | |
| 418 } | |
| 419 | |
| 420 | |
| 421 # This function retrieve the raw file in the working directory | |
| 422 # - if zipfile: unzip the file with its directory tree | |
| 423 # - if singlefiles: set symlink with the good filename | |
| 424 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 425 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | |
| 426 if(!is.null(singlefile) && (length("singlefile")>0)) { | |
| 427 for (singlefile_sampleName in names(singlefile)) { | |
| 428 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] | |
| 429 if(!file.exists(singlefile_galaxyPath)){ | |
| 430 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | |
| 431 print(error_message); stop(error_message) | |
| 432 } | |
| 433 | |
| 434 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) | |
| 435 file.copy(singlefile_galaxyPath, singlefile_sampleName) | |
| 436 | |
| 437 } | |
| 438 directory <- "." | |
| 439 | |
| 440 } | |
| 441 if(!is.null(zipfile) && (zipfile != "")) { | |
| 442 if(!file.exists(zipfile)){ | |
| 443 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | |
| 444 print(error_message) | |
| 445 stop(error_message) | |
| 446 } | |
| 447 | |
| 448 #list all file in the zip file | |
| 449 #zip_files <- unzip(zipfile,list=T)[,"Name"] | |
| 450 | |
| 451 #unzip | |
| 452 suppressWarnings(unzip(zipfile, unzip="unzip")) | |
| 453 | |
| 454 #get the directory name | |
| 455 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) | |
| 456 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) | |
| 457 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | |
| 458 directory <- "." | |
| 459 if (length(directories) == 1) directory <- directories | |
| 460 | |
| 461 cat("files_root_directory\t",directory,"\n") | |
| 462 | |
| 463 } | |
| 464 return (directory) | |
| 465 } | |
| 466 | |
| 467 | |
| 468 # This function retrieve a xset like object | |
| 469 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
| 470 getxcmsSetObject <- function(xobject) { | |
| 471 # XCMS 1.x | |
| 472 if (class(xobject) == "xcmsSet") | |
| 473 return (xobject) | |
| 474 # XCMS 3.x | |
| 475 if (class(xobject) == "XCMSnExp") { | |
| 476 # Get the legacy xcmsSet object | |
| 477 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | |
| 478 sampclass(xset) <- xset@phenoData$sample_group | |
| 479 return (xset) | |
| 480 } | |
| 481 } | |
| 482 | |
| 483 | |
| 484 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
| 485 # https://github.com/sneumann/xcms/issues/250 | |
| 486 groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) { | |
| 487 mzfmt <- paste("%.", mzdec, "f", sep = "") | |
| 488 rtfmt <- paste("%.", rtdec, "f", sep = "") | |
| 489 | |
| 490 gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T", | |
| 491 sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "") | |
| 492 | |
| 493 if (any(dup <- duplicated(gnames))) | |
| 494 for (dupname in unique(gnames[dup])) { | |
| 495 dupidx <- which(gnames == dupname) | |
| 496 gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_") | |
| 497 } | |
| 498 | |
| 499 return (gnames) | |
| 500 } | |
| 501 | |
| 502 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
| 503 # https://github.com/sneumann/xcms/issues/247 | |
| 504 .concatenate_XCMSnExp <- function(...) { | |
| 505 x <- list(...) | |
| 506 if (length(x) == 0) | |
| 507 return(NULL) | |
| 508 if (length(x) == 1) | |
| 509 return(x[[1]]) | |
| 510 ## Check that all are XCMSnExp objects. | |
| 511 if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp"))))) | |
| 512 stop("All passed objects should be 'XCMSnExp' objects") | |
| 513 new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp") | |
| 514 ## If any of the XCMSnExp has alignment results or detected features drop | |
| 515 ## them! | |
| 516 x <- lapply(x, function(z) { | |
| 517 if (hasAdjustedRtime(z)) { | |
| 518 z <- dropAdjustedRtime(z) | |
| 519 warning("Adjusted retention times found, had to drop them.") | |
| 520 } | |
| 521 if (hasFeatures(z)) { | |
| 522 z <- dropFeatureDefinitions(z) | |
| 523 warning("Feature definitions found, had to drop them.") | |
| 524 } | |
| 525 z | |
| 526 }) | |
| 527 ## Combine peaks | |
| 528 fls <- lapply(x, fileNames) | |
| 529 startidx <- cumsum(lengths(fls)) | |
| 530 pks <- lapply(x, chromPeaks) | |
| 531 procH <- lapply(x, processHistory) | |
| 532 for (i in 2:length(fls)) { | |
| 533 pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1] | |
| 534 procH[[i]] <- lapply(procH[[i]], function(z) { | |
| 535 z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1]) | |
| 536 z | |
| 537 }) | |
| 538 } | |
| 539 pks <- do.call(rbind, pks) | |
| 540 new_x@.processHistory <- unlist(procH) | |
| 541 chromPeaks(new_x) <- pks | |
| 542 if (validObject(new_x)) | |
| 543 new_x | |
| 544 } | |
| 545 | |
| 546 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
| 547 # https://github.com/sneumann/xcms/issues/247 | |
| 548 .concatenate_OnDiskMSnExp <- function(...) { | |
| 549 x <- list(...) | |
| 550 if (length(x) == 0) | |
| 551 return(NULL) | |
| 552 if (length(x) == 1) | |
| 553 return(x[[1]]) | |
| 554 ## Check that all are XCMSnExp objects. | |
| 555 if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp"))))) | |
| 556 stop("All passed objects should be 'OnDiskMSnExp' objects") | |
| 557 ## Check processingQueue | |
| 558 procQ <- lapply(x, function(z) z@spectraProcessingQueue) | |
| 559 new_procQ <- procQ[[1]] | |
| 560 is_ok <- unlist(lapply(procQ, function(z) | |
| 561 !is.character(all.equal(new_procQ, z)) | |
| 562 )) | |
| 563 if (any(!is_ok)) { | |
| 564 warning("Processing queues from the submitted objects differ! ", | |
| 565 "Dropping the processing queue.") | |
| 566 new_procQ <- list() | |
| 567 } | |
| 568 ## processingData | |
| 569 fls <- lapply(x, function(z) z@processingData@files) | |
| 570 startidx <- cumsum(lengths(fls)) | |
| 571 ## featureData | |
| 572 featd <- lapply(x, fData) | |
| 573 ## Have to update the file index and the spectrum names. | |
| 574 for (i in 2:length(featd)) { | |
| 575 featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1] | |
| 576 rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames( | |
| 577 fileIds = featd[[i]]$fileIdx, | |
| 578 spectrumIds = featd[[i]]$spIdx, | |
| 579 nSpectra = nrow(featd[[i]]), | |
| 580 nFiles = length(unlist(fls)) | |
| 581 ) | |
| 582 } | |
| 583 featd <- do.call(rbind, featd) | |
| 584 featd$spectrum <- 1:nrow(featd) | |
| 585 ## experimentData | |
| 586 expdata <- lapply(x, function(z) { | |
| 587 ed <- z@experimentData | |
| 588 data.frame(instrumentManufacturer = ed@instrumentManufacturer, | |
| 589 instrumentModel = ed@instrumentModel, | |
| 590 ionSource = ed@ionSource, | |
| 591 analyser = ed@analyser, | |
| 592 detectorType = ed@detectorType, | |
| 593 stringsAsFactors = FALSE) | |
| 594 }) | |
| 595 expdata <- do.call(rbind, expdata) | |
| 596 expdata <- new("MIAPE", | |
| 597 instrumentManufacturer = expdata$instrumentManufacturer, | |
| 598 instrumentModel = expdata$instrumentModel, | |
| 599 ionSource = expdata$ionSource, | |
| 600 analyser = expdata$analyser, | |
| 601 detectorType = expdata$detectorType) | |
| 602 | |
| 603 ## protocolData | |
| 604 protodata <- lapply(x, function(z) z@protocolData) | |
| 605 if (any(unlist(lapply(protodata, nrow)) > 0)) | |
| 606 warning("Found non-empty protocol data, but merging protocol data is", | |
| 607 " currently not supported. Skipped.") | |
| 608 ## phenoData | |
| 609 pdata <- do.call(rbind, lapply(x, pData)) | |
| 610 res <- new( | |
| 611 "OnDiskMSnExp", | |
| 612 phenoData = new("NAnnotatedDataFrame", data = pdata), | |
| 613 featureData = new("AnnotatedDataFrame", featd), | |
| 614 processingData = new("MSnProcess", | |
| 615 processing = paste0("Concatenated [", date(), "]"), | |
| 616 files = unlist(fls), smoothed = NA), | |
| 617 experimentData = expdata, | |
| 618 spectraProcessingQueue = new_procQ) | |
| 619 if (validObject(res)) | |
| 620 res | |
| 621 } | |
| 622 | |
| 623 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
| 624 # https://github.com/sneumann/xcms/issues/247 | |
| 625 c.XCMSnExp <- function(...) { | |
| 626 .concatenate_XCMSnExp(...) | |
| 627 } | |
| 628 | |
| 629 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
| 630 # https://github.com/sneumann/xcms/issues/247 | |
| 631 c.MSnbase <- function(...) { | |
| 632 .concatenate_OnDiskMSnExp(...) | |
| 633 } | 
