Mercurial > repos > lecorguille > xcms_fillpeaks
comparison xcms.r @ 16:5f61570177e2 draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 22c4e92909198328fc7439ff47e4546a273eb907
| author | lecorguille | 
|---|---|
| date | Sun, 05 Feb 2017 08:57:59 -0500 | 
| parents | 1c78d61fd646 | 
| children | 98d0d4b62503 | 
   comparison
  equal
  deleted
  inserted
  replaced
| 15:1c78d61fd646 | 16:5f61570177e2 | 
|---|---|
| 13 # ----- PACKAGE ----- | 13 # ----- PACKAGE ----- | 
| 14 cat("\tPACKAGE INFO\n") | 14 cat("\tPACKAGE INFO\n") | 
| 15 #pkgs=c("xcms","batch") | 15 #pkgs=c("xcms","batch") | 
| 16 pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") | 16 pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") | 
| 17 for(pkg in pkgs) { | 17 for(pkg in pkgs) { | 
| 18 suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | 18 suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | 
| 19 cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") | 19 cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") | 
| 20 } | 20 } | 
| 21 source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } | 21 source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } | 
| 22 cat("\n\n"); | 22 cat("\n\n"); | 
| 23 | 23 | 
| 24 | 24 | 
| 36 # ----- ARGUMENTS PROCESSING ----- | 36 # ----- ARGUMENTS PROCESSING ----- | 
| 37 cat("\tINFILE PROCESSING INFO\n") | 37 cat("\tINFILE PROCESSING INFO\n") | 
| 38 | 38 | 
| 39 #image is an .RData file necessary to use xset variable given by previous tools | 39 #image is an .RData file necessary to use xset variable given by previous tools | 
| 40 if (!is.null(listArguments[["image"]])){ | 40 if (!is.null(listArguments[["image"]])){ | 
| 41 load(listArguments[["image"]]); listArguments[["image"]]=NULL | 41 load(listArguments[["image"]]); listArguments[["image"]]=NULL | 
| 42 } | 42 } | 
| 43 | 43 | 
| 44 #Import the different functions | 44 #Import the different functions | 
| 45 source_local("lib.r") | 45 source_local("lib.r") | 
| 46 | 46 | 
| 59 #saving the commun parameters | 59 #saving the commun parameters | 
| 60 thefunction = listArguments[["xfunction"]]; listArguments[["xfunction"]]=NULL #delete from the list of arguments | 60 thefunction = listArguments[["xfunction"]]; listArguments[["xfunction"]]=NULL #delete from the list of arguments | 
| 61 | 61 | 
| 62 xsetRdataOutput = paste(thefunction,"RData",sep=".") | 62 xsetRdataOutput = paste(thefunction,"RData",sep=".") | 
| 63 if (!is.null(listArguments[["xsetRdataOutput"]])){ | 63 if (!is.null(listArguments[["xsetRdataOutput"]])){ | 
| 64 xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL | 64 xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL | 
| 65 } | 65 } | 
| 66 | 66 | 
| 67 #saving the specific parameters | 67 #saving the specific parameters | 
| 68 rplotspdf = "Rplots.pdf" | 68 rplotspdf = "Rplots.pdf" | 
| 69 if (!is.null(listArguments[["rplotspdf"]])){ | 69 if (!is.null(listArguments[["rplotspdf"]])){ | 
| 70 rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL | 70 rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL | 
| 71 } | 71 } | 
| 72 sampleMetadataOutput = "sampleMetadata.tsv" | 72 sampleMetadataOutput = "sampleMetadata.tsv" | 
| 73 if (!is.null(listArguments[["sampleMetadataOutput"]])){ | 73 if (!is.null(listArguments[["sampleMetadataOutput"]])){ | 
| 74 sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL | 74 sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL | 
| 75 } | 75 } | 
| 76 variableMetadataOutput = "variableMetadata.tsv" | 76 variableMetadataOutput = "variableMetadata.tsv" | 
| 77 if (!is.null(listArguments[["variableMetadataOutput"]])){ | 77 if (!is.null(listArguments[["variableMetadataOutput"]])){ | 
| 78 variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL | 78 variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL | 
| 79 } | 79 } | 
| 80 dataMatrixOutput = "dataMatrix.tsv" | 80 dataMatrixOutput = "dataMatrix.tsv" | 
| 81 if (!is.null(listArguments[["dataMatrixOutput"]])){ | 81 if (!is.null(listArguments[["dataMatrixOutput"]])){ | 
| 82 dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL | 82 dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL | 
| 83 } | 83 } | 
| 84 if (!is.null(listArguments[["convertRTMinute"]])){ | 84 if (!is.null(listArguments[["convertRTMinute"]])){ | 
| 85 convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL | 85 convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL | 
| 86 } | 86 } | 
| 87 if (!is.null(listArguments[["numDigitsMZ"]])){ | 87 if (!is.null(listArguments[["numDigitsMZ"]])){ | 
| 88 numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL | 88 numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL | 
| 89 } | 89 } | 
| 90 if (!is.null(listArguments[["numDigitsRT"]])){ | 90 if (!is.null(listArguments[["numDigitsRT"]])){ | 
| 91 numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL | 91 numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL | 
| 92 } | 92 } | 
| 93 if (!is.null(listArguments[["intval"]])){ | 93 if (!is.null(listArguments[["intval"]])){ | 
| 94 intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL | 94 intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL | 
| 95 } | 95 } | 
| 96 | 96 | 
| 97 if (thefunction %in% c("xcmsSet","retcor")) { | 97 if (thefunction %in% c("xcmsSet","retcor")) { | 
| 98 ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL | 98 ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL | 
| 99 bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL | 99 bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL | 
| 100 } | 100 } | 
| 101 | 101 | 
| 102 #necessary to unzip .zip file uploaded to Galaxy | 102 #necessary to unzip .zip file uploaded to Galaxy | 
| 103 #thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories | 103 #thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories | 
| 104 | 104 | 
| 105 | 105 | 
| 106 if (!is.null(listArguments[["zipfile"]])){ | 106 if (!is.null(listArguments[["zipfile"]])){ | 
| 107 zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL | 107 zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL | 
| 108 } | 108 } | 
| 109 | 109 | 
| 110 if (!is.null(listArguments[["library"]])){ | 110 if (!is.null(listArguments[["singlefile_galaxyPath"]])){ | 
| 111 directory=listArguments[["library"]]; listArguments[["library"]]=NULL | 111 singlefile_galaxyPaths = unlist(strsplit(listArguments[["singlefile_galaxyPath"]],",")); listArguments[["singlefile_galaxyPath"]]=NULL | 
| 112 if(!file.exists(directory)){ | 112 singlefile_sampleNames = unlist(strsplit(listArguments[["singlefile_sampleName"]],",")); listArguments[["singlefile_sampleName"]]=NULL | 
| 113 error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") | 113 | 
| 114 print(error_message) | 114 singlefile=NULL | 
| 115 stop(error_message) | 115 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | 
| 116 } | 116 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] | 
| 117 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] | |
| 118 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath | |
| 119 } | |
| 117 } | 120 } | 
| 118 | 121 | 
| 119 # We unzip automatically the chromatograms from the zip files. | 122 # We unzip automatically the chromatograms from the zip files. | 
| 120 if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { | 123 if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { | 
| 121 if(exists("zipfile") && (zipfile!="")) { | 124 if(exists("singlefile") && (length("singlefile")>0)) { | 
| 122 if(!file.exists(zipfile)){ | 125 for (singlefile_sampleName in names(singlefile)) { | 
| 123 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 126 singlefile_galaxyPath = singlefile[[singlefile_sampleName]] | 
| 124 print(error_message) | 127 if(!file.exists(singlefile_galaxyPath)){ | 
| 125 stop(error_message) | 128 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 
| 126 } | 129 print(error_message); stop(error_message) | 
| 127 | 130 } | 
| 128 #list all file in the zip file | 131 | 
| 129 #zip_files=unzip(zipfile,list=T)[,"Name"] | 132 file.symlink(singlefile_galaxyPath,singlefile_sampleName) | 
| 130 | 133 } | 
| 131 | 134 directory = "." | 
| 132 #unzip | 135 | 
| 133 suppressWarnings(unzip(zipfile, unzip="unzip")) | 136 md5sumList=list("origin"=getMd5sum(directory)) | 
| 134 | 137 | 
| 135 #get the directory name | 138 } | 
| 136 filesInZip=unzip(zipfile, list=T); | 139 if(exists("zipfile") && (zipfile!="")) { | 
| 137 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); | 140 if(!file.exists(zipfile)){ | 
| 138 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 141 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 
| 139 directory = "." | 142 print(error_message) | 
| 140 if (length(directories) == 1) directory = directories | 143 stop(error_message) | 
| 141 | 144 } | 
| 142 cat("files_root_directory\t",directory,"\n") | 145 | 
| 143 | 146 #list all file in the zip file | 
| 144 # | 147 #zip_files=unzip(zipfile,list=T)[,"Name"] | 
| 145 md5sumList=list("origin"=getMd5sum(directory)) | 148 | 
| 146 | 149 #unzip | 
| 147 # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files. | 150 suppressWarnings(unzip(zipfile, unzip="unzip")) | 
| 148 # Remove because can create issue with some clean files | 151 | 
| 149 #@TODO: fix me | 152 #get the directory name | 
| 150 #if (deleteXmlBadCharacters(directory)) { | 153 filesInZip=unzip(zipfile, list=T); | 
| 151 # md5sumList=list("removalBadCharacters"=getMd5sum(directory)) | 154 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); | 
| 152 #} | 155 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 
| 153 | 156 directory = "." | 
| 154 } | 157 if (length(directories) == 1) directory = directories | 
| 158 | |
| 159 cat("files_root_directory\t",directory,"\n") | |
| 160 | |
| 161 md5sumList=list("origin"=getMd5sum(directory)) | |
| 162 } | |
| 155 } | 163 } | 
| 156 | 164 | 
| 157 #addition of the directory to the list of arguments in the first position | 165 #addition of the directory to the list of arguments in the first position | 
| 158 if (thefunction == "xcmsSet") { | 166 if (thefunction == "xcmsSet") { | 
| 159 checkXmlStructure(directory) | 167 checkXmlStructure(directory) | 
| 160 checkFilesCompatibilityWithXcms(directory) | 168 checkFilesCompatibilityWithXcms(directory) | 
| 161 listArguments=append(directory, listArguments) | 169 listArguments=append(directory, listArguments) | 
| 162 } | 170 } | 
| 163 | 171 | 
| 164 | 172 | 
| 165 #addition of xset object to the list of arguments in the first position | 173 #addition of xset object to the list of arguments in the first position | 
| 166 if (exists("xset")){ | 174 if (exists("xset")){ | 
| 167 listArguments=append(list(xset), listArguments) | 175 listArguments=append(list(xset), listArguments) | 
| 168 } | 176 } | 
| 169 | 177 | 
| 170 cat("\n\n") | 178 cat("\n\n") | 
| 171 | |
| 172 | |
| 173 | 179 | 
| 174 | 180 | 
| 175 | 181 | 
| 176 | 182 | 
| 177 # ----- MAIN PROCESSING INFO ----- | 183 # ----- MAIN PROCESSING INFO ----- | 
| 179 | 185 | 
| 180 | 186 | 
| 181 #Verification of a group step before doing the fillpeaks job. | 187 #Verification of a group step before doing the fillpeaks job. | 
| 182 | 188 | 
| 183 if (thefunction == "fillPeaks") { | 189 if (thefunction == "fillPeaks") { | 
| 184 res=try(is.null(groupnames(xset))) | 190 res=try(is.null(groupnames(xset))) | 
| 185 if (class(res) == "try-error"){ | 191 if (class(res) == "try-error"){ | 
| 186 error<-geterrmessage() | 192 error<-geterrmessage() | 
| 187 write(error, stderr()) | 193 write(error, stderr()) | 
| 188 stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") | 194 stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") | 
| 189 } | 195 } | 
| 190 | 196 | 
| 191 } | 197 } | 
| 192 | 198 | 
| 193 #change the default display settings | 199 #change the default display settings | 
| 194 #dev.new(file="Rplots.pdf", width=16, height=12) | 200 #dev.new(file="Rplots.pdf", width=16, height=12) | 
| 195 pdf(file=rplotspdf, width=16, height=12) | 201 pdf(file=rplotspdf, width=16, height=12) | 
| 196 if (thefunction == "group") { | 202 if (thefunction == "group") { | 
| 197 par(mfrow=c(2,2)) | 203 par(mfrow=c(2,2)) | 
| 198 } | 204 } | 
| 199 #else if (thefunction == "retcor") { | 205 #else if (thefunction == "retcor") { | 
| 200 #try to change the legend display | 206 #try to change the legend display | 
| 201 # par(xpd=NA) | 207 # par(xpd=NA) | 
| 202 # par(xpd=T, mar=par()$mar+c(0,0,0,4)) | 208 # par(xpd=T, mar=par()$mar+c(0,0,0,4)) | 
| 213 | 219 | 
| 214 dev.off() #dev.new(file="Rplots.pdf", width=16, height=12) | 220 dev.off() #dev.new(file="Rplots.pdf", width=16, height=12) | 
| 215 | 221 | 
| 216 if (thefunction == "xcmsSet") { | 222 if (thefunction == "xcmsSet") { | 
| 217 | 223 | 
| 218 #transform the files absolute pathways into relative pathways | 224 #transform the files absolute pathways into relative pathways | 
| 219 xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) | 225 xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) | 
| 220 | 226 | 
| 221 if(exists("zipfile") && (zipfile!="")) { | 227 if(exists("zipfile") && (zipfile!="")) { | 
| 222 | 228 | 
| 223 #Modify the samples names (erase the path) | 229 #Modify the samples names (erase the path) | 
| 224 for(i in 1:length(sampnames(xset))){ | 230 for(i in 1:length(sampnames(xset))){ | 
| 225 | 231 | 
| 226 sample_name=unlist(strsplit(sampnames(xset)[i], "/")) | 232 sample_name=unlist(strsplit(sampnames(xset)[i], "/")) | 
| 227 sample_name=sample_name[length(sample_name)] | 233 sample_name=sample_name[length(sample_name)] | 
| 228 sample_name= unlist(strsplit(sample_name,"[.]"))[1] | 234 sample_name= unlist(strsplit(sample_name,"[.]"))[1] | 
| 229 sampnames(xset)[i]=sample_name | 235 sampnames(xset)[i]=sample_name | 
| 230 | 236 | 
| 231 } | 237 } | 
| 232 | 238 | 
| 233 } | 239 } | 
| 234 | 240 | 
| 235 } | 241 } | 
| 236 | 242 | 
| 237 # -- TIC -- | 243 # -- TIC -- | 
| 238 if (thefunction == "xcmsSet") { | 244 if (thefunction == "xcmsSet") { | 
| 239 cat("\t\tGET TIC GRAPH\n") | 245 cat("\t\tGET TIC GRAPH\n") | 
| 240 sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) | 246 sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) | 
| 241 getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") | 247 getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") | 
| 242 getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) | 248 getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) | 
| 243 } else if (thefunction == "retcor") { | 249 } else if (thefunction == "retcor") { | 
| 244 cat("\t\tGET TIC GRAPH\n") | 250 cat("\t\tGET TIC GRAPH\n") | 
| 245 getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") | 251 getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") | 
| 246 getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) | 252 getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) | 
| 247 } | 253 } | 
| 248 | 254 | 
| 249 if (thefunction == "fillPeaks") { | 255 if (thefunction == "fillPeaks") { | 
| 250 cat("\t\tGET THE PEAK LIST\n") | 256 cat("\t\tGET THE PEAK LIST\n") | 
| 251 getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) | 257 getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) | 
| 252 } | 258 } | 
| 253 | 259 | 
| 254 | 260 | 
| 255 cat("\n\n") | 261 cat("\n\n") | 
| 256 | 262 | 
| 260 print(xset) | 266 print(xset) | 
| 261 #delete the parameters to avoid the passage to the next tool in .RData image | 267 #delete the parameters to avoid the passage to the next tool in .RData image | 
| 262 | 268 | 
| 263 | 269 | 
| 264 #saving R data in .Rdata file to save the variables used in the present tool | 270 #saving R data in .Rdata file to save the variables used in the present tool | 
| 265 objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList") | 271 objects2save = c("xset","zipfile","singlefile","listOFlistArguments","md5sumList","sampleNamesList") | 
| 266 save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) | 272 save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) | 
| 267 | 273 | 
| 268 cat("\n\n") | 274 cat("\n\n") | 
| 269 | 275 | 
| 270 | 276 | 
