comparison xcms.r @ 16:20a75ba4345b draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 22c4e92909198328fc7439ff47e4546a273eb907
author lecorguille
date Sun, 05 Feb 2017 08:57:02 -0500
parents c04568596f40
children 3bd1e74d4abc
comparison
equal deleted inserted replaced
15:c04568596f40 16:20a75ba4345b
13 # ----- PACKAGE ----- 13 # ----- PACKAGE -----
14 cat("\tPACKAGE INFO\n") 14 cat("\tPACKAGE INFO\n")
15 #pkgs=c("xcms","batch") 15 #pkgs=c("xcms","batch")
16 pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch") 16 pkgs=c("parallel","BiocGenerics", "Biobase", "Rcpp", "mzR", "xcms","snow","batch")
17 for(pkg in pkgs) { 17 for(pkg in pkgs) {
18 suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) 18 suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
19 cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="") 19 cat(pkg,"\t",as.character(packageVersion(pkg)),"\n",sep="")
20 } 20 }
21 source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) } 21 source_local <- function(fname){ argv <- commandArgs(trailingOnly = FALSE); base_dir <- dirname(substring(argv[grep("--file=", argv)], 8)); source(paste(base_dir, fname, sep="/")) }
22 cat("\n\n"); 22 cat("\n\n");
23 23
24 24
36 # ----- ARGUMENTS PROCESSING ----- 36 # ----- ARGUMENTS PROCESSING -----
37 cat("\tINFILE PROCESSING INFO\n") 37 cat("\tINFILE PROCESSING INFO\n")
38 38
39 #image is an .RData file necessary to use xset variable given by previous tools 39 #image is an .RData file necessary to use xset variable given by previous tools
40 if (!is.null(listArguments[["image"]])){ 40 if (!is.null(listArguments[["image"]])){
41 load(listArguments[["image"]]); listArguments[["image"]]=NULL 41 load(listArguments[["image"]]); listArguments[["image"]]=NULL
42 } 42 }
43 43
44 #Import the different functions 44 #Import the different functions
45 source_local("lib.r") 45 source_local("lib.r")
46 46
59 #saving the commun parameters 59 #saving the commun parameters
60 thefunction = listArguments[["xfunction"]]; listArguments[["xfunction"]]=NULL #delete from the list of arguments 60 thefunction = listArguments[["xfunction"]]; listArguments[["xfunction"]]=NULL #delete from the list of arguments
61 61
62 xsetRdataOutput = paste(thefunction,"RData",sep=".") 62 xsetRdataOutput = paste(thefunction,"RData",sep=".")
63 if (!is.null(listArguments[["xsetRdataOutput"]])){ 63 if (!is.null(listArguments[["xsetRdataOutput"]])){
64 xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL 64 xsetRdataOutput = listArguments[["xsetRdataOutput"]]; listArguments[["xsetRdataOutput"]]=NULL
65 } 65 }
66 66
67 #saving the specific parameters 67 #saving the specific parameters
68 rplotspdf = "Rplots.pdf" 68 rplotspdf = "Rplots.pdf"
69 if (!is.null(listArguments[["rplotspdf"]])){ 69 if (!is.null(listArguments[["rplotspdf"]])){
70 rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL 70 rplotspdf = listArguments[["rplotspdf"]]; listArguments[["rplotspdf"]]=NULL
71 } 71 }
72 sampleMetadataOutput = "sampleMetadata.tsv" 72 sampleMetadataOutput = "sampleMetadata.tsv"
73 if (!is.null(listArguments[["sampleMetadataOutput"]])){ 73 if (!is.null(listArguments[["sampleMetadataOutput"]])){
74 sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL 74 sampleMetadataOutput = listArguments[["sampleMetadataOutput"]]; listArguments[["sampleMetadataOutput"]]=NULL
75 } 75 }
76 variableMetadataOutput = "variableMetadata.tsv" 76 variableMetadataOutput = "variableMetadata.tsv"
77 if (!is.null(listArguments[["variableMetadataOutput"]])){ 77 if (!is.null(listArguments[["variableMetadataOutput"]])){
78 variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL 78 variableMetadataOutput = listArguments[["variableMetadataOutput"]]; listArguments[["variableMetadataOutput"]]=NULL
79 } 79 }
80 dataMatrixOutput = "dataMatrix.tsv" 80 dataMatrixOutput = "dataMatrix.tsv"
81 if (!is.null(listArguments[["dataMatrixOutput"]])){ 81 if (!is.null(listArguments[["dataMatrixOutput"]])){
82 dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL 82 dataMatrixOutput = listArguments[["dataMatrixOutput"]]; listArguments[["dataMatrixOutput"]]=NULL
83 } 83 }
84 if (!is.null(listArguments[["convertRTMinute"]])){ 84 if (!is.null(listArguments[["convertRTMinute"]])){
85 convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL 85 convertRTMinute = listArguments[["convertRTMinute"]]; listArguments[["convertRTMinute"]]=NULL
86 } 86 }
87 if (!is.null(listArguments[["numDigitsMZ"]])){ 87 if (!is.null(listArguments[["numDigitsMZ"]])){
88 numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL 88 numDigitsMZ = listArguments[["numDigitsMZ"]]; listArguments[["numDigitsMZ"]]=NULL
89 } 89 }
90 if (!is.null(listArguments[["numDigitsRT"]])){ 90 if (!is.null(listArguments[["numDigitsRT"]])){
91 numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL 91 numDigitsRT = listArguments[["numDigitsRT"]]; listArguments[["numDigitsRT"]]=NULL
92 } 92 }
93 if (!is.null(listArguments[["intval"]])){ 93 if (!is.null(listArguments[["intval"]])){
94 intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL 94 intval = listArguments[["intval"]]; listArguments[["intval"]]=NULL
95 } 95 }
96 96
97 if (thefunction %in% c("xcmsSet","retcor")) { 97 if (thefunction %in% c("xcmsSet","retcor")) {
98 ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL 98 ticspdf = listArguments[["ticspdf"]]; listArguments[["ticspdf"]]=NULL
99 bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL 99 bicspdf = listArguments[["bicspdf"]]; listArguments[["bicspdf"]]=NULL
100 } 100 }
101 101
102 #necessary to unzip .zip file uploaded to Galaxy 102 #necessary to unzip .zip file uploaded to Galaxy
103 #thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories 103 #thanks to .zip file it's possible to upload many file as the same time conserving the tree hierarchy of directories
104 104
105 105
106 if (!is.null(listArguments[["zipfile"]])){ 106 if (!is.null(listArguments[["zipfile"]])){
107 zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL 107 zipfile= listArguments[["zipfile"]]; listArguments[["zipfile"]]=NULL
108 } 108 }
109 109
110 if (!is.null(listArguments[["library"]])){ 110 if (!is.null(listArguments[["singlefile_galaxyPath"]])){
111 directory=listArguments[["library"]]; listArguments[["library"]]=NULL 111 singlefile_galaxyPaths = unlist(strsplit(listArguments[["singlefile_galaxyPath"]],",")); listArguments[["singlefile_galaxyPath"]]=NULL
112 if(!file.exists(directory)){ 112 singlefile_sampleNames = unlist(strsplit(listArguments[["singlefile_sampleName"]],",")); listArguments[["singlefile_sampleName"]]=NULL
113 error_message=paste("Cannot access the directory:",directory,". Please verify if the directory exists or not.") 113
114 print(error_message) 114 singlefile=NULL
115 stop(error_message) 115 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
116 } 116 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i]
117 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i]
118 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath
119 }
117 } 120 }
118 121
119 # We unzip automatically the chromatograms from the zip files. 122 # We unzip automatically the chromatograms from the zip files.
120 if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) { 123 if (thefunction %in% c("xcmsSet","retcor","fillPeaks")) {
121 if(exists("zipfile") && (zipfile!="")) { 124 if(exists("singlefile") && (length("singlefile")>0)) {
122 if(!file.exists(zipfile)){ 125 for (singlefile_sampleName in names(singlefile)) {
123 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") 126 singlefile_galaxyPath = singlefile[[singlefile_sampleName]]
124 print(error_message) 127 if(!file.exists(singlefile_galaxyPath)){
125 stop(error_message) 128 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
126 } 129 print(error_message); stop(error_message)
127 130 }
128 #list all file in the zip file 131
129 #zip_files=unzip(zipfile,list=T)[,"Name"] 132 file.symlink(singlefile_galaxyPath,singlefile_sampleName)
130 133 }
131 134 directory = "."
132 #unzip 135
133 suppressWarnings(unzip(zipfile, unzip="unzip")) 136 md5sumList=list("origin"=getMd5sum(directory))
134 137
135 #get the directory name 138 }
136 filesInZip=unzip(zipfile, list=T); 139 if(exists("zipfile") && (zipfile!="")) {
137 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); 140 if(!file.exists(zipfile)){
138 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] 141 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
139 directory = "." 142 print(error_message)
140 if (length(directories) == 1) directory = directories 143 stop(error_message)
141 144 }
142 cat("files_root_directory\t",directory,"\n") 145
143 146 #list all file in the zip file
144 # 147 #zip_files=unzip(zipfile,list=T)[,"Name"]
145 md5sumList=list("origin"=getMd5sum(directory)) 148
146 149 #unzip
147 # Check and fix if there are non ASCII characters. If so, they will be removed from the *mzXML mzML files. 150 suppressWarnings(unzip(zipfile, unzip="unzip"))
148 # Remove because can create issue with some clean files 151
149 #@TODO: fix me 152 #get the directory name
150 #if (deleteXmlBadCharacters(directory)) { 153 filesInZip=unzip(zipfile, list=T);
151 # md5sumList=list("removalBadCharacters"=getMd5sum(directory)) 154 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])));
152 #} 155 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
153 156 directory = "."
154 } 157 if (length(directories) == 1) directory = directories
158
159 cat("files_root_directory\t",directory,"\n")
160
161 md5sumList=list("origin"=getMd5sum(directory))
162 }
155 } 163 }
156 164
157 #addition of the directory to the list of arguments in the first position 165 #addition of the directory to the list of arguments in the first position
158 if (thefunction == "xcmsSet") { 166 if (thefunction == "xcmsSet") {
159 checkXmlStructure(directory) 167 checkXmlStructure(directory)
160 checkFilesCompatibilityWithXcms(directory) 168 checkFilesCompatibilityWithXcms(directory)
161 listArguments=append(directory, listArguments) 169 listArguments=append(directory, listArguments)
162 } 170 }
163 171
164 172
165 #addition of xset object to the list of arguments in the first position 173 #addition of xset object to the list of arguments in the first position
166 if (exists("xset")){ 174 if (exists("xset")){
167 listArguments=append(list(xset), listArguments) 175 listArguments=append(list(xset), listArguments)
168 } 176 }
169 177
170 cat("\n\n") 178 cat("\n\n")
171
172
173 179
174 180
175 181
176 182
177 # ----- MAIN PROCESSING INFO ----- 183 # ----- MAIN PROCESSING INFO -----
179 185
180 186
181 #Verification of a group step before doing the fillpeaks job. 187 #Verification of a group step before doing the fillpeaks job.
182 188
183 if (thefunction == "fillPeaks") { 189 if (thefunction == "fillPeaks") {
184 res=try(is.null(groupnames(xset))) 190 res=try(is.null(groupnames(xset)))
185 if (class(res) == "try-error"){ 191 if (class(res) == "try-error"){
186 error<-geterrmessage() 192 error<-geterrmessage()
187 write(error, stderr()) 193 write(error, stderr())
188 stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step") 194 stop("You must always do a group step after a retcor. Otherwise it won't work for the fillpeaks step")
189 } 195 }
190 196
191 } 197 }
192 198
193 #change the default display settings 199 #change the default display settings
194 #dev.new(file="Rplots.pdf", width=16, height=12) 200 #dev.new(file="Rplots.pdf", width=16, height=12)
195 pdf(file=rplotspdf, width=16, height=12) 201 pdf(file=rplotspdf, width=16, height=12)
196 if (thefunction == "group") { 202 if (thefunction == "group") {
197 par(mfrow=c(2,2)) 203 par(mfrow=c(2,2))
198 } 204 }
199 #else if (thefunction == "retcor") { 205 #else if (thefunction == "retcor") {
200 #try to change the legend display 206 #try to change the legend display
201 # par(xpd=NA) 207 # par(xpd=NA)
202 # par(xpd=T, mar=par()$mar+c(0,0,0,4)) 208 # par(xpd=T, mar=par()$mar+c(0,0,0,4))
213 219
214 dev.off() #dev.new(file="Rplots.pdf", width=16, height=12) 220 dev.off() #dev.new(file="Rplots.pdf", width=16, height=12)
215 221
216 if (thefunction == "xcmsSet") { 222 if (thefunction == "xcmsSet") {
217 223
218 #transform the files absolute pathways into relative pathways 224 #transform the files absolute pathways into relative pathways
219 xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths) 225 xset@filepaths<-sub(paste(getwd(),"/",sep="") ,"", xset@filepaths)
220 226
221 if(exists("zipfile") && (zipfile!="")) { 227 if(exists("zipfile") && (zipfile!="")) {
222 228
223 #Modify the samples names (erase the path) 229 #Modify the samples names (erase the path)
224 for(i in 1:length(sampnames(xset))){ 230 for(i in 1:length(sampnames(xset))){
225 231
226 sample_name=unlist(strsplit(sampnames(xset)[i], "/")) 232 sample_name=unlist(strsplit(sampnames(xset)[i], "/"))
227 sample_name=sample_name[length(sample_name)] 233 sample_name=sample_name[length(sample_name)]
228 sample_name= unlist(strsplit(sample_name,"[.]"))[1] 234 sample_name= unlist(strsplit(sample_name,"[.]"))[1]
229 sampnames(xset)[i]=sample_name 235 sampnames(xset)[i]=sample_name
230 236
231 } 237 }
232 238
233 } 239 }
234 240
235 } 241 }
236 242
237 # -- TIC -- 243 # -- TIC --
238 if (thefunction == "xcmsSet") { 244 if (thefunction == "xcmsSet") {
239 cat("\t\tGET TIC GRAPH\n") 245 cat("\t\tGET TIC GRAPH\n")
240 sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput) 246 sampleNamesList = getSampleMetadata(xcmsSet=xset, sampleMetadataOutput=sampleMetadataOutput)
241 getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw") 247 getTICs(xcmsSet=xset, pdfname=ticspdf,rt="raw")
242 getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf) 248 getBPCs(xcmsSet=xset,rt="raw",pdfname=bicspdf)
243 } else if (thefunction == "retcor") { 249 } else if (thefunction == "retcor") {
244 cat("\t\tGET TIC GRAPH\n") 250 cat("\t\tGET TIC GRAPH\n")
245 getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected") 251 getTICs(xcmsSet=xset, pdfname=ticspdf,rt="corrected")
246 getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf) 252 getBPCs(xcmsSet=xset,rt="corrected",pdfname=bicspdf)
247 } 253 }
248 254
249 if (thefunction == "fillPeaks") { 255 if (thefunction == "fillPeaks") {
250 cat("\t\tGET THE PEAK LIST\n") 256 cat("\t\tGET THE PEAK LIST\n")
251 getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput) 257 getPeaklistW4M(xset,intval,convertRTMinute,numDigitsMZ,numDigitsRT,variableMetadataOutput,dataMatrixOutput)
252 } 258 }
253 259
254 260
255 cat("\n\n") 261 cat("\n\n")
256 262
260 print(xset) 266 print(xset)
261 #delete the parameters to avoid the passage to the next tool in .RData image 267 #delete the parameters to avoid the passage to the next tool in .RData image
262 268
263 269
264 #saving R data in .Rdata file to save the variables used in the present tool 270 #saving R data in .Rdata file to save the variables used in the present tool
265 objects2save = c("xset","zipfile","listOFlistArguments","md5sumList","sampleNamesList") 271 objects2save = c("xset","zipfile","singlefile","listOFlistArguments","md5sumList","sampleNamesList")
266 save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput) 272 save(list=objects2save[objects2save %in% ls()], file=xsetRdataOutput)
267 273
268 cat("\n\n") 274 cat("\n\n")
269 275
270 276