comparison lib.r @ 30:4d6f4cd7c3ef draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit e384d6dd5f410799ec211f73bca0b5d5d7bc651e
author lecorguille
date Thu, 01 Mar 2018 04:16:45 -0500
parents c013ed353a2f
children 281786a7b9a2
comparison
equal deleted inserted replaced
29:c013ed353a2f 30:4d6f4cd7c3ef
1 #Authors ABiMS TEAM 1 #@authors ABiMS TEAM, Y. Guitton
2 #Lib.r for Galaxy Workflow4Metabolomics xcms tools 2 # lib.r for Galaxy Workflow4Metabolomics xcms tools
3 #
4 #version 2.4: lecorguille
5 # add getPeaklistW4M
6 #version 2.3: yguitton
7 # correction for empty PDF when only 1 class
8 #version 2.2
9 # correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet
10 # Note if scanrange is used a warning is prompted in R console but do not stop PDF generation
11 #version 2.1: yguitton
12 # Modifications made by Guitton Yann
13
14 3
15 #@author G. Le Corguille 4 #@author G. Le Corguille
16 #This function convert if it is required the Retention Time in minutes 5 # solve an issue with batch if arguments are logical TRUE/FALSE
6 parseCommandArgs <- function(...) {
7 args <- batch::parseCommandArgs(...)
8 for (key in names(args)) {
9 if (args[key] %in% c("TRUE","FALSE"))
10 args[key] = as.logical(args[key])
11 }
12 return(args)
13 }
14
15 #@author G. Le Corguille
16 # This function will
17 # - load the packages
18 # - display the sessionInfo
19 loadAndDisplayPackages <- function(pkgs) {
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE)))
21
22 sessioninfo = sessionInfo()
23 cat(sessioninfo$R.version$version.string,"\n")
24 cat("Main packages:\n")
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
26 cat("Other loaded packages:\n")
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n")
28 }
29
30 #@author G. Le Corguille
31 # This function convert if it is required the Retention Time in minutes
17 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { 32 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
18 if (convertRTMinute){ 33 if (convertRTMinute){
19 #converting the retention times (seconds) into minutes 34 #converting the retention times (seconds) into minutes
20 print("converting the retention times into minutes in the variableMetadata") 35 print("converting the retention times into minutes in the variableMetadata")
21 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60 36 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60
22 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60 37 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60
23 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60 38 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60
24 } 39 }
25 return (variableMetadata) 40 return (variableMetadata)
26 } 41 }
27 42
28 #@author G. Le Corguille 43 #@author G. Le Corguille
29 #This function format ions identifiers 44 # This function format ions identifiers
30 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { 45 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) {
31 splitDeco = strsplit(as.character(variableMetadata$name),"_") 46 splitDeco <- strsplit(as.character(variableMetadata$name),"_")
32 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) 47 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) })
33 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) 48 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco))
34 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) 49 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
35 return(variableMetadata) 50 return(variableMetadata)
51 }
52
53 #@author G. Le Corguille
54 # Draw the plotChromPeakDensity 3 per page in a pdf file
55 getPlotChromPeakDensity <- function(xdata) {
56 pdf(file="plotChromPeakDensity.pdf", width=16, height=12)
57
58 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5))
59
60 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
61 names(group_colors) <- unique(xdata$sample_group)
62
63 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax))
64 for (i in 1:nrow(featureDefinitions(xdata))) {
65 plotChromPeakDensity(xdata, mz=c(featureDefinitions(xdata)[i,]$mzmin,featureDefinitions(xdata)[i,]$mzmax), col=group_colors, pch=16, xlim=xlim)
66 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
67 }
68
69 dev.off()
70 }
71
72 #@author G. Le Corguille
73 # Draw the plotChromPeakDensity 3 per page in a pdf file
74 getPlotAdjustedRtime <- function(xdata) {
75 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12)
76 # Color by group
77 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))]
78 names(group_colors) <- unique(xdata$sample_group)
79 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group])
80 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1)
81 # Color by sample
82 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name)))
83 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1)
84 dev.off()
36 } 85 }
37 86
38 #@author G. Le Corguille 87 #@author G. Le Corguille
39 # value: intensity values to be used into, maxo or intb 88 # value: intensity values to be used into, maxo or intb
40 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { 89 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) {
41 variableMetadata_dataMatrix = peakTable(xset, method="medret", value=intval) 90 dataMatrix <- featureValues(xdata, method="medret", value=intval)
42 variableMetadata_dataMatrix = cbind(name=groupnames(xset),variableMetadata_dataMatrix) 91 colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix))
43 92 dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix)
44 dataMatrix = variableMetadata_dataMatrix[,(make.names(colnames(variableMetadata_dataMatrix)) %in% c("name", make.names(sampnames(xset))))] 93 variableMetadata <- featureDefinitions(xdata)
45 94 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt"
46 variableMetadata = variableMetadata_dataMatrix[,!(make.names(colnames(variableMetadata_dataMatrix)) %in% c(make.names(sampnames(xset))))] 95 variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata)
47 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute) 96
48 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) 97 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute)
98 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
49 99
50 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) 100 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F)
51 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) 101 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F)
102
52 } 103 }
53 104
54 #@author Y. Guitton 105 #@author Y. Guitton
55 getBPC <- function(file,rtcor=NULL, ...) { 106 getBPC <- function(file,rtcor=NULL, ...) {
56 object <- xcmsRaw(file) 107 object <- xcmsRaw(file)
68 stop() 119 stop()
69 } else { 120 } else {
70 files <- filepaths(xcmsSet) 121 files <- filepaths(xcmsSet)
71 } 122 }
72 123
73 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class 124 phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class
74 125
75 classnames<-vector("list",length(phenoDataClass)) 126 classnames <- vector("list",length(phenoDataClass))
76 for (i in 1:length(phenoDataClass)){ 127 for (i in 1:length(phenoDataClass)){
77 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) 128 classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i])
78 } 129 }
79 130
80 N <- dim(phenoData(xcmsSet))[1] 131 N <- dim(phenoData(xcmsSet))[1]
81 132
82 TIC <- vector("list",N) 133 TIC <- vector("list",N)
99 150
100 151
101 152
102 pdf(pdfname,w=16,h=10) 153 pdf(pdfname,w=16,h=10)
103 cols <- rainbow(N) 154 cols <- rainbow(N)
104 lty = 1:N 155 lty <- 1:N
105 pch = 1:N 156 pch <- 1:N
106 #search for max x and max y in BPCs 157 #search for max x and max y in BPCs
107 xlim = range(sapply(TIC, function(x) range(x[,1]))) 158 xlim <- range(sapply(TIC, function(x) range(x[,1])))
108 ylim = range(sapply(TIC, function(x) range(x[,2]))) 159 ylim <- range(sapply(TIC, function(x) range(x[,2])))
109 ylim = c(-ylim[2], ylim[2]) 160 ylim <- c(-ylim[2], ylim[2])
110 161
111 162
112 ##plot start 163 ##plot start
113 164
114 if (length(phenoDataClass)>2){ 165 if (length(phenoDataClass)>2){
115 for (k in 1:(length(phenoDataClass)-1)){ 166 for (k in 1:(length(phenoDataClass)-1)){
116 for (l in (k+1):length(phenoDataClass)){ 167 for (l in (k+1):length(phenoDataClass)){
117 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) 168 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
118 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 169 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC")
119 colvect<-NULL 170 colvect <- NULL
120 for (j in 1:length(classnames[[k]])) { 171 for (j in 1:length(classnames[[k]])) {
121 tic <- TIC[[classnames[[k]][j]]] 172 tic <- TIC[[classnames[[k]][j]]]
122 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 173 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
123 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 174 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
124 colvect<-append(colvect,cols[classnames[[k]][j]]) 175 colvect <- append(colvect,cols[classnames[[k]][j]])
125 } 176 }
126 for (j in 1:length(classnames[[l]])) { 177 for (j in 1:length(classnames[[l]])) {
127 # i=class2names[j] 178 # i <- class2names[j]
128 tic <- TIC[[classnames[[l]][j]]] 179 tic <- TIC[[classnames[[l]][j]]]
129 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") 180 points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l")
130 colvect<-append(colvect,cols[classnames[[l]][j]]) 181 colvect <- append(colvect,cols[classnames[[l]][j]])
131 } 182 }
132 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 183 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch)
133 } 184 }
134 } 185 }
135 }#end if length >2 186 }#end if length >2
136 187
137 if (length(phenoDataClass)==2){ 188 if (length(phenoDataClass)==2){
138 k=1 189 k <- 1
139 l=2 190 l <- 2
140 colvect<-NULL 191 colvect <- NULL
141 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 192 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC")
142 193
143 for (j in 1:length(classnames[[k]])) { 194 for (j in 1:length(classnames[[k]])) {
144 195
145 tic <- TIC[[classnames[[k]][j]]] 196 tic <- TIC[[classnames[[k]][j]]]
146 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 197 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
147 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 198 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
148 colvect<-append(colvect,cols[classnames[[k]][j]]) 199 colvect<-append(colvect,cols[classnames[[k]][j]])
149 } 200 }
150 for (j in 1:length(classnames[[l]])) { 201 for (j in 1:length(classnames[[l]])) {
151 # i=class2names[j] 202 # i <- class2names[j]
152 tic <- TIC[[classnames[[l]][j]]] 203 tic <- TIC[[classnames[[l]][j]]]
153 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") 204 points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l")
154 colvect<-append(colvect,cols[classnames[[l]][j]]) 205 colvect <- append(colvect,cols[classnames[[l]][j]])
155 } 206 }
156 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 207 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch)
157 208
158 }#end length ==2 209 }#end length ==2
159 210
160 #case where only one class 211 #case where only one class
161 if (length(phenoDataClass)==1){ 212 if (length(phenoDataClass)==1){
162 k=1 213 k <- 1
163 ylim = range(sapply(TIC, function(x) range(x[,2]))) 214 ylim <- range(sapply(TIC, function(x) range(x[,2])))
164 colvect<-NULL 215 colvect <- NULL
165 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 216 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="BPC")
166 217
167 for (j in 1:length(classnames[[k]])) { 218 for (j in 1:length(classnames[[k]])) {
168 tic <- TIC[[classnames[[k]][j]]] 219 tic <- TIC[[classnames[[k]][j]]]
169 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 220 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
170 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 221 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
171 colvect<-append(colvect,cols[classnames[[k]][j]]) 222 colvect <- append(colvect,cols[classnames[[k]][j]])
172 } 223 }
173 224
174 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) 225 legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch)
175 226
176 }#end length ==1 227 }#end length ==1
177 228
178 dev.off() #pdf(pdfname,w=16,h=10) 229 dev.off() #pdf(pdfname,w=16,h=10)
179 230
181 } 232 }
182 233
183 234
184 235
185 #@author Y. Guitton 236 #@author Y. Guitton
186 getTIC <- function(file,rtcor=NULL) { 237 getTIC <- function(file, rtcor=NULL) {
187 object <- xcmsRaw(file) 238 object <- xcmsRaw(file)
188 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) 239 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object, mzrange=range(object@env$mz))$intensity)
189 } 240 }
190 241
191 ## 242 #overlay TIC from all files in current folder or from xcmsSet, create pdf
192 ## overlay TIC from all files in current folder or from xcmsSet, create pdf
193 ##
194 #@author Y. Guitton 243 #@author Y. Guitton
195 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { 244 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf", rt=c("raw","corrected")) {
196 cat("Creating TIC pdf...\n") 245 cat("Creating TIC pdf...\n")
197 246
198 if (is.null(xcmsSet)) { 247 if (is.null(xcmsSet)) {
199 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 248 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
200 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") 249 filepattern <- paste(paste("\\.", filepattern, "$", sep=""), collapse="|")
201 if (is.null(files)) 250 if (is.null(files))
202 files <- getwd() 251 files <- getwd()
203 info <- file.info(files) 252 info <- file.info(files)
204 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) 253 listed <- list.files(files[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
205 files <- c(files[!info$isdir], listed) 254 files <- c(files[!info$isdir], listed)
206 } else { 255 } else {
207 files <- filepaths(xcmsSet) 256 files <- filepaths(xcmsSet)
208 } 257 }
209 258
210 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class 259 phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class
211 classnames<-vector("list",length(phenoDataClass)) 260 classnames <- vector("list",length(phenoDataClass))
212 for (i in 1:length(phenoDataClass)){ 261 for (i in 1:length(phenoDataClass)){
213 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) 262 classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i])
214 } 263 }
215 264
216 N <- length(files) 265 N <- length(files)
217 TIC <- vector("list",N) 266 TIC <- vector("list",N)
218 267
219 for (i in 1:N) { 268 for (i in 1:N) {
220 if (!is.null(xcmsSet) && rt == "corrected") 269 if (!is.null(xcmsSet) && rt == "corrected")
221 rtcor <- xcmsSet@rt$corrected[[i]] else 270 rtcor <- xcmsSet@rt$corrected[[i]] else
222 rtcor <- NULL 271 rtcor <- NULL
223 TIC[[i]] <- getTIC(files[i],rtcor=rtcor) 272 TIC[[i]] <- getTIC(files[i], rtcor=rtcor)
224 } 273 }
225 274
226 pdf(pdfname,w=16,h=10) 275 pdf(pdfname, w=16, h=10)
227 cols <- rainbow(N) 276 cols <- rainbow(N)
228 lty = 1:N 277 lty <- 1:N
229 pch = 1:N 278 pch <- 1:N
230 #search for max x and max y in TICs 279 #search for max x and max y in TICs
231 xlim = range(sapply(TIC, function(x) range(x[,1]))) 280 xlim <- range(sapply(TIC, function(x) range(x[,1])))
232 ylim = range(sapply(TIC, function(x) range(x[,2]))) 281 ylim <- range(sapply(TIC, function(x) range(x[,2])))
233 ylim = c(-ylim[2], ylim[2]) 282 ylim <- c(-ylim[2], ylim[2])
234 283
235 284
236 ##plot start 285 ##plot start
237 if (length(phenoDataClass)>2){ 286 if (length(phenoDataClass)>2){
238 for (k in 1:(length(phenoDataClass)-1)){ 287 for (k in 1:(length(phenoDataClass)-1)){
239 for (l in (k+1):length(phenoDataClass)){ 288 for (l in (k+1):length(phenoDataClass)){
240 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) 289 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
241 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") 290 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC")
242 colvect<-NULL 291 colvect <- NULL
243 for (j in 1:length(classnames[[k]])) { 292 for (j in 1:length(classnames[[k]])) {
244 tic <- TIC[[classnames[[k]][j]]] 293 tic <- TIC[[classnames[[k]][j]]]
245 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 294 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
246 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 295 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
247 colvect<-append(colvect,cols[classnames[[k]][j]]) 296 colvect <- append(colvect,cols[classnames[[k]][j]])
248 } 297 }
249 for (j in 1:length(classnames[[l]])) { 298 for (j in 1:length(classnames[[l]])) {
250 # i=class2names[j] 299 # i=class2names[j]
251 tic <- TIC[[classnames[[l]][j]]] 300 tic <- TIC[[classnames[[l]][j]]]
252 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") 301 points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l")
253 colvect<-append(colvect,cols[classnames[[l]][j]]) 302 colvect <- append(colvect,cols[classnames[[l]][j]])
254 } 303 }
255 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 304 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch)
256 } 305 }
257 } 306 }
258 }#end if length >2 307 }#end if length >2
259 if (length(phenoDataClass)==2){ 308 if (length(phenoDataClass)==2){
260 k=1 309 k <- 1
261 l=2 310 l <- 2
262 311
263 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") 312 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC")
264 colvect<-NULL 313 colvect <- NULL
265 for (j in 1:length(classnames[[k]])) { 314 for (j in 1:length(classnames[[k]])) {
266 tic <- TIC[[classnames[[k]][j]]] 315 tic <- TIC[[classnames[[k]][j]]]
267 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 316 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
268 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 317 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
269 colvect<-append(colvect,cols[classnames[[k]][j]]) 318 colvect <- append(colvect,cols[classnames[[k]][j]])
270 } 319 }
271 for (j in 1:length(classnames[[l]])) { 320 for (j in 1:length(classnames[[l]])) {
272 # i=class2names[j] 321 # i <- class2names[j]
273 tic <- TIC[[classnames[[l]][j]]] 322 tic <- TIC[[classnames[[l]][j]]]
274 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") 323 points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l")
275 colvect<-append(colvect,cols[classnames[[l]][j]]) 324 colvect <- append(colvect,cols[classnames[[l]][j]])
276 } 325 }
277 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 326 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch)
278 327
279 }#end length ==2 328 }#end length ==2
280 329
281 #case where only one class 330 #case where only one class
282 if (length(phenoDataClass)==1){ 331 if (length(phenoDataClass)==1){
283 k=1 332 k <- 1
284 ylim = range(sapply(TIC, function(x) range(x[,2]))) 333 ylim <- range(sapply(TIC, function(x) range(x[,2])))
285 334
286 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") 335 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="TIC")
287 colvect<-NULL 336 colvect <- NULL
288 for (j in 1:length(classnames[[k]])) { 337 for (j in 1:length(classnames[[k]])) {
289 tic <- TIC[[classnames[[k]][j]]] 338 tic <- TIC[[classnames[[k]][j]]]
290 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 339 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l")
291 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 340 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l")
292 colvect<-append(colvect,cols[classnames[[k]][j]]) 341 colvect <- append(colvect,cols[classnames[[k]][j]])
293 } 342 }
294 343
295 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) 344 legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch)
296 345
297 }#end length ==1 346 }#end length ==1
298 347
299 dev.off() #pdf(pdfname,w=16,h=10) 348 dev.off() #pdf(pdfname,w=16,h=10)
300 349
301 invisible(TIC) 350 invisible(TIC)
302 } 351 }
303 352
304 353
305 354
306 ## 355 # Get the polarities from all the samples of a condition
307 ## Get the polarities from all the samples of a condition
308 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 356 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
309 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 357 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
310 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { 358 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
311 cat("Creating the sampleMetadata file...\n") 359 cat("Creating the sampleMetadata file...\n")
312 360
313 #Create the sampleMetada dataframe 361 #Create the sampleMetada dataframe
314 sampleMetadata=xset@phenoData 362 sampleMetadata <- xdata@phenoData@data
315 sampleNamesOrigin=rownames(sampleMetadata) 363 rownames(sampleMetadata) <- NULL
316 sampleNamesMakeNames=make.names(sampleNamesOrigin) 364 colnames(sampleMetadata) <- c("sampleMetadata", "class")
365
366 sampleNamesOrigin <- sampleMetadata$sampleMetadata
367 sampleNamesMakeNames <- make.names(sampleNamesOrigin)
317 368
318 if (any(duplicated(sampleNamesMakeNames))) { 369 if (any(duplicated(sampleNamesMakeNames))) {
319 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 370 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
320 for (sampleName in sampleNamesOrigin) { 371 for (sampleName in sampleNamesOrigin) {
321 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 372 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
328 for (sampleName in sampleNamesOrigin) { 379 for (sampleName in sampleNamesOrigin) {
329 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) 380 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n"))
330 } 381 }
331 } 382 }
332 383
333 sampleMetadata$sampleMetadata=sampleNamesMakeNames 384 sampleMetadata$sampleMetadata <- sampleNamesMakeNames
334 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns 385
335 rownames(sampleMetadata)=NULL 386
336
337 #Create a list of files name in the current directory
338 list_files=xset@filepaths
339 #For each sample file, the following actions are done 387 #For each sample file, the following actions are done
340 for (file in list_files){ 388 for (fileIdx in 1:length(fileNames(xdata))) {
341 #Check if the file is in the CDF format 389 #Check if the file is in the CDF format
342 if (!mzR:::netCDFIsFile(file)){ 390 if (!mzR:::netCDFIsFile(fileNames(xdata))) {
343 391
344 # If the column isn't exist, with add one filled with NA 392 # If the column isn't exist, with add one filled with NA
345 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA 393 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA
346 394
347 #Create a simple xcmsRaw object for each sample
348 xcmsRaw=xcmsRaw(file)
349 #Extract the polarity (a list of polarities) 395 #Extract the polarity (a list of polarities)
350 polarity=xcmsRaw@polarity 396 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"]
351 #Verify if all the scans have the same polarity 397 #Verify if all the scans have the same polarity
352 uniq_list=unique(polarity) 398 uniq_list <- unique(polarity)
353 if (length(uniq_list)>1){ 399 if (length(uniq_list)>1){
354 polarity="mixed" 400 polarity <- "mixed"
355 } else { 401 } else {
356 polarity=as.character(uniq_list) 402 polarity <- as.character(uniq_list)
357 } 403 }
358 #Transforms the character to obtain only the sample name
359 filename=basename(file)
360 library(tools)
361 samplename=file_path_sans_ext(filename)
362 404
363 #Set the polarity attribute 405 #Set the polarity attribute
364 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity 406 sampleMetadata$polarity[fileIdx] <- polarity
365
366 #Delete xcmsRaw object because it creates a bug for the fillpeaks step
367 rm(xcmsRaw)
368 } 407 }
369 408
370 } 409 }
371 410
372 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) 411 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput)
373 412
374 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) 413 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames))
375 414
376 } 415 }
377 416
378 417
379 ## 418 # This function check if xcms will found all the files
380 ## This function check if xcms will found all the files
381 ##
382 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 419 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
383 checkFilesCompatibilityWithXcms <- function(directory) { 420 checkFilesCompatibilityWithXcms <- function(directory) {
384 cat("Checking files filenames compatibilities with xmcs...\n") 421 cat("Checking files filenames compatibilities with xmcs...\n")
385 # WHAT XCMS WILL FIND 422 # WHAT XCMS WILL FIND
386 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 423 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
387 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 424 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
388 info <- file.info(directory) 425 info <- file.info(directory)
389 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) 426 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
390 files <- c(directory[!info$isdir], listed) 427 files <- c(directory[!info$isdir], listed)
391 files_abs <- file.path(getwd(), files) 428 files_abs <- file.path(getwd(), files)
392 exists <- file.exists(files_abs) 429 exists <- file.exists(files_abs)
393 files[exists] <- files_abs[exists] 430 files[exists] <- files_abs[exists]
394 files[exists] <- sub("//","/",files[exists]) 431 files[exists] <- sub("//","/",files[exists])
395 432
396 # WHAT IS ON THE FILESYSTEM 433 # WHAT IS ON THE FILESYSTEM
397 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) 434 filesystem_filepaths <- system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
398 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] 435 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
399 436
400 # COMPARISON 437 # COMPARISON
401 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { 438 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
402 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) 439 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
403 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) 440 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
404 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 441 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
405 } 442 }
406 } 443 }
407 444
408 445
409 446 #This function list the compatible files within the directory as xcms did
410 ## 447 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
411 ## This function check if XML contains special caracters. It also checks integrity and completness. 448 getMSFiles <- function (directory) {
412 ## 449 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
450 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
451 info <- file.info(directory)
452 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE)
453 files <- c(directory[!info$isdir], listed)
454 exists <- file.exists(files)
455 files <- files[exists]
456 return(files)
457 }
458
459 # This function check if XML contains special caracters. It also checks integrity and completness.
413 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 460 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
414 checkXmlStructure <- function (directory) { 461 checkXmlStructure <- function (directory) {
415 cat("Checking XML structure...\n") 462 cat("Checking XML structure...\n")
416 463
417 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") 464 cmd <- paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
418 capture=system(cmd,intern=TRUE) 465 capture <- system(cmd, intern=TRUE)
419 466
420 if (length(capture)>0){ 467 if (length(capture)>0){
421 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) 468 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
422 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) 469 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
423 write(capture, stderr()) 470 write(capture, stderr())
425 } 472 }
426 473
427 } 474 }
428 475
429 476
430 ## 477 # This function check if XML contain special characters
431 ## This function check if XML contain special characters
432 ##
433 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 478 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
434 deleteXmlBadCharacters<- function (directory) { 479 deleteXmlBadCharacters<- function (directory) {
435 cat("Checking Non ASCII characters in the XML...\n") 480 cat("Checking Non ASCII characters in the XML...\n")
436 481
437 processed=F 482 processed <- F
438 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) 483 l <- system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE)
439 for (i in l){ 484 for (i in l){
440 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") 485 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="")
441 capture=suppressWarnings(system(cmd,intern=TRUE)) 486 capture <- suppressWarnings(system(cmd, intern=TRUE))
442 if (length(capture)>0){ 487 if (length(capture)>0){
443 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) 488 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
444 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) 489 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
445 c=system(cmd,intern=TRUE) 490 c <- system(cmd, intern=TRUE)
446 capture="" 491 capture <- ""
447 processed=T 492 processed <- T
448 } 493 }
449 } 494 }
450 if (processed) cat("\n\n") 495 if (processed) cat("\n\n")
451 return(processed) 496 return(processed)
452 } 497 }
453 498
454 499
455 ## 500 # This function will compute MD5 checksum to check the data integrity
456 ## This function will compute MD5 checksum to check the data integrity
457 ##
458 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 501 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
459 getMd5sum <- function (directory) { 502 getMd5sum <- function (directory) {
460 cat("Compute md5 checksum...\n") 503 cat("Compute md5 checksum...\n")
461 # WHAT XCMS WILL FIND 504 # WHAT XCMS WILL FIND
462 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 505 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
463 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 506 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|")
464 info <- file.info(directory) 507 info <- file.info(directory)
465 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) 508 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE)
466 files <- c(directory[!info$isdir], listed) 509 files <- c(directory[!info$isdir], listed)
467 exists <- file.exists(files) 510 exists <- file.exists(files)
468 files <- files[exists] 511 files <- files[exists]
469 512
470 library(tools) 513 library(tools)
474 return(as.matrix(md5sum(files))) 517 return(as.matrix(md5sum(files)))
475 } 518 }
476 519
477 520
478 # This function get the raw file path from the arguments 521 # This function get the raw file path from the arguments
479 getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { 522 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
480 if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]] 523 getRawfilePathFromArguments <- function(singlefile, zipfile, args) {
481 if (!is.null(listArguments[["zipfilePositive"]])) zipfile = listArguments[["zipfilePositive"]] 524 if (!is.null(args$zipfile)) zipfile <- args$zipfile
482 if (!is.null(listArguments[["zipfileNegative"]])) zipfile = listArguments[["zipfileNegative"]] 525 if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive
483 526 if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative
484 if (!is.null(listArguments[["singlefile_galaxyPath"]])) { 527
485 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]]; 528 if (!is.null(args$singlefile_galaxyPath)) {
486 singlefile_sampleNames = listArguments[["singlefile_sampleName"]] 529 singlefile_galaxyPaths <- args$singlefile_galaxyPath;
487 } 530 singlefile_sampleNames <- args$singlefile_sampleName
488 if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) { 531 }
489 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]]; 532 if (!is.null(args$singlefile_galaxyPathPositive)) {
490 singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]] 533 singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive;
491 } 534 singlefile_sampleNames <- args$singlefile_sampleNamePositive
492 if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) { 535 }
493 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]]; 536 if (!is.null(args$singlefile_galaxyPathNegative)) {
494 singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]] 537 singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative;
538 singlefile_sampleNames <- args$singlefile_sampleNameNegative
495 } 539 }
496 if (exists("singlefile_galaxyPaths")){ 540 if (exists("singlefile_galaxyPaths")){
497 singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,",")) 541 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,","))
498 singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,",")) 542 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,","))
499 543
500 singlefile=NULL 544 singlefile <- NULL
501 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { 545 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) {
502 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] 546 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i]
503 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] 547 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i]
504 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath 548 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath
505 } 549 }
506 } 550 }
507 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { 551 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) {
508 listArguments[[argument]]=NULL 552 args[[argument]] <- NULL
509 } 553 }
510 return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments)) 554 return(list(zipfile=zipfile, singlefile=singlefile, args=args))
511 } 555 }
512 556
513 557
514 # This function retrieve the raw file in the working directory 558 # This function retrieve the raw file in the working directory
515 # - if zipfile: unzip the file with its directory tree 559 # - if zipfile: unzip the file with its directory tree
516 # - if singlefiles: set symlink with the good filename 560 # - if singlefiles: set symlink with the good filename
561 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
517 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { 562 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) {
518 if(!is.null(singlefile) && (length("singlefile")>0)) { 563 if(!is.null(singlefile) && (length("singlefile")>0)) {
519 for (singlefile_sampleName in names(singlefile)) { 564 for (singlefile_sampleName in names(singlefile)) {
520 singlefile_galaxyPath = singlefile[[singlefile_sampleName]] 565 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]]
521 if(!file.exists(singlefile_galaxyPath)){ 566 if(!file.exists(singlefile_galaxyPath)){
522 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") 567 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!")
523 print(error_message); stop(error_message) 568 print(error_message); stop(error_message)
524 } 569 }
525 570
526 file.symlink(singlefile_galaxyPath,singlefile_sampleName) 571 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T)))
527 } 572 file.copy(singlefile_galaxyPath, singlefile_sampleName)
528 directory = "." 573
529 574 }
530 } 575 directory <- "."
531 if(!is.null(zipfile) && (zipfile!="")) { 576
577 }
578 if(!is.null(zipfile) && (zipfile != "")) {
532 if(!file.exists(zipfile)){ 579 if(!file.exists(zipfile)){
533 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") 580 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!")
534 print(error_message) 581 print(error_message)
535 stop(error_message) 582 stop(error_message)
536 } 583 }
537 584
538 #list all file in the zip file 585 #list all file in the zip file
539 #zip_files=unzip(zipfile,list=T)[,"Name"] 586 #zip_files <- unzip(zipfile,list=T)[,"Name"]
540 587
541 #unzip 588 #unzip
542 suppressWarnings(unzip(zipfile, unzip="unzip")) 589 suppressWarnings(unzip(zipfile, unzip="unzip"))
543 590
544 #get the directory name 591 #get the directory name
545 filesInZip=unzip(zipfile, list=T); 592 suppressWarnings(filesInZip <- unzip(zipfile, list=T))
546 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); 593 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1])))
547 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] 594 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir]
548 directory = "." 595 directory <- "."
549 if (length(directories) == 1) directory = directories 596 if (length(directories) == 1) directory <- directories
550 597
551 cat("files_root_directory\t",directory,"\n") 598 cat("files_root_directory\t",directory,"\n")
552 599
553 } 600 }
554 return (directory) 601 return (directory)
555 } 602 }
603
604
605 # This function retrieve a xset like object
606 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
607 getxcmsSetObject <- function(xobject) {
608 # XCMS 1.x
609 if (class(xobject) == "xcmsSet")
610 return (xobject)
611 # XCMS 3.x
612 if (class(xobject) == "XCMSnExp") {
613 # Get the legacy xcmsSet object
614 suppressWarnings(xset <- as(xobject, 'xcmsSet'))
615 sampclass(xset) <- xset@phenoData$sample_group
616 return (xset)
617 }
618 }
619
620
621 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
622 # https://github.com/sneumann/xcms/issues/250
623 groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) {
624 mzfmt <- paste("%.", mzdec, "f", sep = "")
625 rtfmt <- paste("%.", rtdec, "f", sep = "")
626
627 gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T",
628 sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "")
629
630 if (any(dup <- duplicated(gnames)))
631 for (dupname in unique(gnames[dup])) {
632 dupidx <- which(gnames == dupname)
633 gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_")
634 }
635
636 return (gnames)
637 }
638
639 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
640 # https://github.com/sneumann/xcms/issues/247
641 .concatenate_XCMSnExp <- function(...) {
642 x <- list(...)
643 if (length(x) == 0)
644 return(NULL)
645 if (length(x) == 1)
646 return(x[[1]])
647 ## Check that all are XCMSnExp objects.
648 if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp")))))
649 stop("All passed objects should be 'XCMSnExp' objects")
650 new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp")
651 ## If any of the XCMSnExp has alignment results or detected features drop
652 ## them!
653 x <- lapply(x, function(z) {
654 if (hasAdjustedRtime(z)) {
655 z <- dropAdjustedRtime(z)
656 warning("Adjusted retention times found, had to drop them.")
657 }
658 if (hasFeatures(z)) {
659 z <- dropFeatureDefinitions(z)
660 warning("Feature definitions found, had to drop them.")
661 }
662 z
663 })
664 ## Combine peaks
665 fls <- lapply(x, fileNames)
666 startidx <- cumsum(lengths(fls))
667 pks <- lapply(x, chromPeaks)
668 procH <- lapply(x, processHistory)
669 for (i in 2:length(fls)) {
670 pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1]
671 procH[[i]] <- lapply(procH[[i]], function(z) {
672 z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1])
673 z
674 })
675 }
676 pks <- do.call(rbind, pks)
677 new_x@.processHistory <- unlist(procH)
678 chromPeaks(new_x) <- pks
679 if (validObject(new_x))
680 new_x
681 }
682
683 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
684 # https://github.com/sneumann/xcms/issues/247
685 .concatenate_OnDiskMSnExp <- function(...) {
686 x <- list(...)
687 if (length(x) == 0)
688 return(NULL)
689 if (length(x) == 1)
690 return(x[[1]])
691 ## Check that all are XCMSnExp objects.
692 if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp")))))
693 stop("All passed objects should be 'OnDiskMSnExp' objects")
694 ## Check processingQueue
695 procQ <- lapply(x, function(z) z@spectraProcessingQueue)
696 new_procQ <- procQ[[1]]
697 is_ok <- unlist(lapply(procQ, function(z)
698 !is.character(all.equal(new_procQ, z))
699 ))
700 if (any(!is_ok)) {
701 warning("Processing queues from the submitted objects differ! ",
702 "Dropping the processing queue.")
703 new_procQ <- list()
704 }
705 ## processingData
706 fls <- lapply(x, function(z) z@processingData@files)
707 startidx <- cumsum(lengths(fls))
708 ## featureData
709 featd <- lapply(x, fData)
710 ## Have to update the file index and the spectrum names.
711 for (i in 2:length(featd)) {
712 featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1]
713 rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames(
714 fileIds = featd[[i]]$fileIdx,
715 spectrumIds = featd[[i]]$spIdx,
716 nSpectra = nrow(featd[[i]]),
717 nFiles = length(unlist(fls))
718 )
719 }
720 featd <- do.call(rbind, featd)
721 featd$spectrum <- 1:nrow(featd)
722 ## experimentData
723 expdata <- lapply(x, function(z) {
724 ed <- z@experimentData
725 data.frame(instrumentManufacturer = ed@instrumentManufacturer,
726 instrumentModel = ed@instrumentModel,
727 ionSource = ed@ionSource,
728 analyser = ed@analyser,
729 detectorType = ed@detectorType,
730 stringsAsFactors = FALSE)
731 })
732 expdata <- do.call(rbind, expdata)
733 expdata <- new("MIAPE",
734 instrumentManufacturer = expdata$instrumentManufacturer,
735 instrumentModel = expdata$instrumentModel,
736 ionSource = expdata$ionSource,
737 analyser = expdata$analyser,
738 detectorType = expdata$detectorType)
739
740 ## protocolData
741 protodata <- lapply(x, function(z) z@protocolData)
742 if (any(unlist(lapply(protodata, nrow)) > 0))
743 warning("Found non-empty protocol data, but merging protocol data is",
744 " currently not supported. Skipped.")
745 ## phenoData
746 pdata <- do.call(rbind, lapply(x, pData))
747 res <- new(
748 "OnDiskMSnExp",
749 phenoData = new("NAnnotatedDataFrame", data = pdata),
750 featureData = new("AnnotatedDataFrame", featd),
751 processingData = new("MSnProcess",
752 processing = paste0("Concatenated [", date(), "]"),
753 files = unlist(fls), smoothed = NA),
754 experimentData = expdata,
755 spectraProcessingQueue = new_procQ)
756 if (validObject(res))
757 res
758 }
759
760 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
761 # https://github.com/sneumann/xcms/issues/247
762 c.XCMSnExp <- function(...) {
763 .concatenate_XCMSnExp(...)
764 }