Mercurial > repos > lecorguille > xcms_retcor
comparison lib.r @ 30:4d6f4cd7c3ef draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit e384d6dd5f410799ec211f73bca0b5d5d7bc651e
author | lecorguille |
---|---|
date | Thu, 01 Mar 2018 04:16:45 -0500 |
parents | c013ed353a2f |
children | 281786a7b9a2 |
comparison
equal
deleted
inserted
replaced
29:c013ed353a2f | 30:4d6f4cd7c3ef |
---|---|
1 #Authors ABiMS TEAM | 1 #@authors ABiMS TEAM, Y. Guitton |
2 #Lib.r for Galaxy Workflow4Metabolomics xcms tools | 2 # lib.r for Galaxy Workflow4Metabolomics xcms tools |
3 # | |
4 #version 2.4: lecorguille | |
5 # add getPeaklistW4M | |
6 #version 2.3: yguitton | |
7 # correction for empty PDF when only 1 class | |
8 #version 2.2 | |
9 # correct bug in Base Peak Chromatogram (BPC) option, not only TIC when scanrange used in xcmsSet | |
10 # Note if scanrange is used a warning is prompted in R console but do not stop PDF generation | |
11 #version 2.1: yguitton | |
12 # Modifications made by Guitton Yann | |
13 | |
14 | 3 |
15 #@author G. Le Corguille | 4 #@author G. Le Corguille |
16 #This function convert if it is required the Retention Time in minutes | 5 # solve an issue with batch if arguments are logical TRUE/FALSE |
6 parseCommandArgs <- function(...) { | |
7 args <- batch::parseCommandArgs(...) | |
8 for (key in names(args)) { | |
9 if (args[key] %in% c("TRUE","FALSE")) | |
10 args[key] = as.logical(args[key]) | |
11 } | |
12 return(args) | |
13 } | |
14 | |
15 #@author G. Le Corguille | |
16 # This function will | |
17 # - load the packages | |
18 # - display the sessionInfo | |
19 loadAndDisplayPackages <- function(pkgs) { | |
20 for(pkg in pkgs) suppressPackageStartupMessages( stopifnot( library(pkg, quietly=TRUE, logical.return=TRUE, character.only=TRUE))) | |
21 | |
22 sessioninfo = sessionInfo() | |
23 cat(sessioninfo$R.version$version.string,"\n") | |
24 cat("Main packages:\n") | |
25 for (pkg in names(sessioninfo$otherPkgs)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
26 cat("Other loaded packages:\n") | |
27 for (pkg in names(sessioninfo$loadedOnly)) { cat(paste(pkg,packageVersion(pkg)),"\t") }; cat("\n") | |
28 } | |
29 | |
30 #@author G. Le Corguille | |
31 # This function convert if it is required the Retention Time in minutes | |
17 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | 32 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { |
18 if (convertRTMinute){ | 33 if (convertRTMinute){ |
19 #converting the retention times (seconds) into minutes | 34 #converting the retention times (seconds) into minutes |
20 print("converting the retention times into minutes in the variableMetadata") | 35 print("converting the retention times into minutes in the variableMetadata") |
21 variableMetadata[,"rt"]=variableMetadata[,"rt"]/60 | 36 variableMetadata[,"rt"] <- variableMetadata[,"rt"]/60 |
22 variableMetadata[,"rtmin"]=variableMetadata[,"rtmin"]/60 | 37 variableMetadata[,"rtmin"] <- variableMetadata[,"rtmin"]/60 |
23 variableMetadata[,"rtmax"]=variableMetadata[,"rtmax"]/60 | 38 variableMetadata[,"rtmax"] <- variableMetadata[,"rtmax"]/60 |
24 } | 39 } |
25 return (variableMetadata) | 40 return (variableMetadata) |
26 } | 41 } |
27 | 42 |
28 #@author G. Le Corguille | 43 #@author G. Le Corguille |
29 #This function format ions identifiers | 44 # This function format ions identifiers |
30 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { | 45 formatIonIdentifiers <- function(variableMetadata, numDigitsRT=0, numDigitsMZ=0) { |
31 splitDeco = strsplit(as.character(variableMetadata$name),"_") | 46 splitDeco <- strsplit(as.character(variableMetadata$name),"_") |
32 idsDeco = sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) | 47 idsDeco <- sapply(splitDeco, function(x) { deco=unlist(x)[2]; if (is.na(deco)) return ("") else return(paste0("_",deco)) }) |
33 namecustom = make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) | 48 namecustom <- make.unique(paste0("M",round(variableMetadata[,"mz"],numDigitsMZ),"T",round(variableMetadata[,"rt"],numDigitsRT),idsDeco)) |
34 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | 49 variableMetadata <- cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) |
35 return(variableMetadata) | 50 return(variableMetadata) |
51 } | |
52 | |
53 #@author G. Le Corguille | |
54 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
55 getPlotChromPeakDensity <- function(xdata) { | |
56 pdf(file="plotChromPeakDensity.pdf", width=16, height=12) | |
57 | |
58 par(mfrow = c(3, 1), mar = c(4, 4, 1, 0.5)) | |
59 | |
60 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
61 names(group_colors) <- unique(xdata$sample_group) | |
62 | |
63 xlim <- c(min(featureDefinitions(xdata)$rtmin), max(featureDefinitions(xdata)$rtmax)) | |
64 for (i in 1:nrow(featureDefinitions(xdata))) { | |
65 plotChromPeakDensity(xdata, mz=c(featureDefinitions(xdata)[i,]$mzmin,featureDefinitions(xdata)[i,]$mzmax), col=group_colors, pch=16, xlim=xlim) | |
66 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
67 } | |
68 | |
69 dev.off() | |
70 } | |
71 | |
72 #@author G. Le Corguille | |
73 # Draw the plotChromPeakDensity 3 per page in a pdf file | |
74 getPlotAdjustedRtime <- function(xdata) { | |
75 pdf(file="raw_vs_adjusted_rt.pdf", width=16, height=12) | |
76 # Color by group | |
77 group_colors <- brewer.pal(3, "Set1")[1:length(unique(xdata$sample_group))] | |
78 names(group_colors) <- unique(xdata$sample_group) | |
79 plotAdjustedRtime(xdata, col = group_colors[xdata$sample_group]) | |
80 legend("topright", legend=names(group_colors), col=group_colors, cex=0.8, lty=1) | |
81 # Color by sample | |
82 plotAdjustedRtime(xdata, col = rainbow(length(xdata@phenoData@data$sample_name))) | |
83 legend("topright", legend=xdata@phenoData@data$sample_name, col=rainbow(length(xdata@phenoData@data$sample_name)), cex=0.8, lty=1) | |
84 dev.off() | |
36 } | 85 } |
37 | 86 |
38 #@author G. Le Corguille | 87 #@author G. Le Corguille |
39 # value: intensity values to be used into, maxo or intb | 88 # value: intensity values to be used into, maxo or intb |
40 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { | 89 getPeaklistW4M <- function(xdata, intval="into", convertRTMinute=F, numDigitsMZ=4, numDigitsRT=0, variableMetadataOutput, dataMatrixOutput) { |
41 variableMetadata_dataMatrix = peakTable(xset, method="medret", value=intval) | 90 dataMatrix <- featureValues(xdata, method="medret", value=intval) |
42 variableMetadata_dataMatrix = cbind(name=groupnames(xset),variableMetadata_dataMatrix) | 91 colnames(dataMatrix) <- tools::file_path_sans_ext(colnames(dataMatrix)) |
43 | 92 dataMatrix = cbind(name=groupnamesW4M(xdata), dataMatrix) |
44 dataMatrix = variableMetadata_dataMatrix[,(make.names(colnames(variableMetadata_dataMatrix)) %in% c("name", make.names(sampnames(xset))))] | 93 variableMetadata <- featureDefinitions(xdata) |
45 | 94 colnames(variableMetadata)[1] = "mz"; colnames(variableMetadata)[4] = "rt" |
46 variableMetadata = variableMetadata_dataMatrix[,!(make.names(colnames(variableMetadata_dataMatrix)) %in% c(make.names(sampnames(xset))))] | 95 variableMetadata = data.frame(name=groupnamesW4M(xdata), variableMetadata) |
47 variableMetadata = RTSecondToMinute(variableMetadata, convertRTMinute) | 96 |
48 variableMetadata = formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | 97 variableMetadata <- RTSecondToMinute(variableMetadata, convertRTMinute) |
98 variableMetadata <- formatIonIdentifiers(variableMetadata, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | |
49 | 99 |
50 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) | 100 write.table(variableMetadata, file=variableMetadataOutput,sep="\t",quote=F,row.names=F) |
51 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) | 101 write.table(dataMatrix, file=dataMatrixOutput,sep="\t",quote=F,row.names=F) |
102 | |
52 } | 103 } |
53 | 104 |
54 #@author Y. Guitton | 105 #@author Y. Guitton |
55 getBPC <- function(file,rtcor=NULL, ...) { | 106 getBPC <- function(file,rtcor=NULL, ...) { |
56 object <- xcmsRaw(file) | 107 object <- xcmsRaw(file) |
68 stop() | 119 stop() |
69 } else { | 120 } else { |
70 files <- filepaths(xcmsSet) | 121 files <- filepaths(xcmsSet) |
71 } | 122 } |
72 | 123 |
73 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | 124 phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class |
74 | 125 |
75 classnames<-vector("list",length(phenoDataClass)) | 126 classnames <- vector("list",length(phenoDataClass)) |
76 for (i in 1:length(phenoDataClass)){ | 127 for (i in 1:length(phenoDataClass)){ |
77 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) | 128 classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i]) |
78 } | 129 } |
79 | 130 |
80 N <- dim(phenoData(xcmsSet))[1] | 131 N <- dim(phenoData(xcmsSet))[1] |
81 | 132 |
82 TIC <- vector("list",N) | 133 TIC <- vector("list",N) |
99 | 150 |
100 | 151 |
101 | 152 |
102 pdf(pdfname,w=16,h=10) | 153 pdf(pdfname,w=16,h=10) |
103 cols <- rainbow(N) | 154 cols <- rainbow(N) |
104 lty = 1:N | 155 lty <- 1:N |
105 pch = 1:N | 156 pch <- 1:N |
106 #search for max x and max y in BPCs | 157 #search for max x and max y in BPCs |
107 xlim = range(sapply(TIC, function(x) range(x[,1]))) | 158 xlim <- range(sapply(TIC, function(x) range(x[,1]))) |
108 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 159 ylim <- range(sapply(TIC, function(x) range(x[,2]))) |
109 ylim = c(-ylim[2], ylim[2]) | 160 ylim <- c(-ylim[2], ylim[2]) |
110 | 161 |
111 | 162 |
112 ##plot start | 163 ##plot start |
113 | 164 |
114 if (length(phenoDataClass)>2){ | 165 if (length(phenoDataClass)>2){ |
115 for (k in 1:(length(phenoDataClass)-1)){ | 166 for (k in 1:(length(phenoDataClass)-1)){ |
116 for (l in (k+1):length(phenoDataClass)){ | 167 for (l in (k+1):length(phenoDataClass)){ |
117 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) | 168 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) |
118 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 169 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC") |
119 colvect<-NULL | 170 colvect <- NULL |
120 for (j in 1:length(classnames[[k]])) { | 171 for (j in 1:length(classnames[[k]])) { |
121 tic <- TIC[[classnames[[k]][j]]] | 172 tic <- TIC[[classnames[[k]][j]]] |
122 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 173 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") |
123 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 174 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") |
124 colvect<-append(colvect,cols[classnames[[k]][j]]) | 175 colvect <- append(colvect,cols[classnames[[k]][j]]) |
125 } | 176 } |
126 for (j in 1:length(classnames[[l]])) { | 177 for (j in 1:length(classnames[[l]])) { |
127 # i=class2names[j] | 178 # i <- class2names[j] |
128 tic <- TIC[[classnames[[l]][j]]] | 179 tic <- TIC[[classnames[[l]][j]]] |
129 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | 180 points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") |
130 colvect<-append(colvect,cols[classnames[[l]][j]]) | 181 colvect <- append(colvect,cols[classnames[[l]][j]]) |
131 } | 182 } |
132 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 183 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) |
133 } | 184 } |
134 } | 185 } |
135 }#end if length >2 | 186 }#end if length >2 |
136 | 187 |
137 if (length(phenoDataClass)==2){ | 188 if (length(phenoDataClass)==2){ |
138 k=1 | 189 k <- 1 |
139 l=2 | 190 l <- 2 |
140 colvect<-NULL | 191 colvect <- NULL |
141 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 192 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="BPC") |
142 | 193 |
143 for (j in 1:length(classnames[[k]])) { | 194 for (j in 1:length(classnames[[k]])) { |
144 | 195 |
145 tic <- TIC[[classnames[[k]][j]]] | 196 tic <- TIC[[classnames[[k]][j]]] |
146 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 197 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") |
147 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 198 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") |
148 colvect<-append(colvect,cols[classnames[[k]][j]]) | 199 colvect<-append(colvect,cols[classnames[[k]][j]]) |
149 } | 200 } |
150 for (j in 1:length(classnames[[l]])) { | 201 for (j in 1:length(classnames[[l]])) { |
151 # i=class2names[j] | 202 # i <- class2names[j] |
152 tic <- TIC[[classnames[[l]][j]]] | 203 tic <- TIC[[classnames[[l]][j]]] |
153 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | 204 points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") |
154 colvect<-append(colvect,cols[classnames[[l]][j]]) | 205 colvect <- append(colvect,cols[classnames[[l]][j]]) |
155 } | 206 } |
156 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 207 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) |
157 | 208 |
158 }#end length ==2 | 209 }#end length ==2 |
159 | 210 |
160 #case where only one class | 211 #case where only one class |
161 if (length(phenoDataClass)==1){ | 212 if (length(phenoDataClass)==1){ |
162 k=1 | 213 k <- 1 |
163 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 214 ylim <- range(sapply(TIC, function(x) range(x[,2]))) |
164 colvect<-NULL | 215 colvect <- NULL |
165 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 216 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="BPC") |
166 | 217 |
167 for (j in 1:length(classnames[[k]])) { | 218 for (j in 1:length(classnames[[k]])) { |
168 tic <- TIC[[classnames[[k]][j]]] | 219 tic <- TIC[[classnames[[k]][j]]] |
169 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 220 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") |
170 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 221 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") |
171 colvect<-append(colvect,cols[classnames[[k]][j]]) | 222 colvect <- append(colvect,cols[classnames[[k]][j]]) |
172 } | 223 } |
173 | 224 |
174 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | 225 legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch) |
175 | 226 |
176 }#end length ==1 | 227 }#end length ==1 |
177 | 228 |
178 dev.off() #pdf(pdfname,w=16,h=10) | 229 dev.off() #pdf(pdfname,w=16,h=10) |
179 | 230 |
181 } | 232 } |
182 | 233 |
183 | 234 |
184 | 235 |
185 #@author Y. Guitton | 236 #@author Y. Guitton |
186 getTIC <- function(file,rtcor=NULL) { | 237 getTIC <- function(file, rtcor=NULL) { |
187 object <- xcmsRaw(file) | 238 object <- xcmsRaw(file) |
188 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) | 239 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object, mzrange=range(object@env$mz))$intensity) |
189 } | 240 } |
190 | 241 |
191 ## | 242 #overlay TIC from all files in current folder or from xcmsSet, create pdf |
192 ## overlay TIC from all files in current folder or from xcmsSet, create pdf | |
193 ## | |
194 #@author Y. Guitton | 243 #@author Y. Guitton |
195 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { | 244 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf", rt=c("raw","corrected")) { |
196 cat("Creating TIC pdf...\n") | 245 cat("Creating TIC pdf...\n") |
197 | 246 |
198 if (is.null(xcmsSet)) { | 247 if (is.null(xcmsSet)) { |
199 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 248 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
200 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") | 249 filepattern <- paste(paste("\\.", filepattern, "$", sep=""), collapse="|") |
201 if (is.null(files)) | 250 if (is.null(files)) |
202 files <- getwd() | 251 files <- getwd() |
203 info <- file.info(files) | 252 info <- file.info(files) |
204 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) | 253 listed <- list.files(files[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) |
205 files <- c(files[!info$isdir], listed) | 254 files <- c(files[!info$isdir], listed) |
206 } else { | 255 } else { |
207 files <- filepaths(xcmsSet) | 256 files <- filepaths(xcmsSet) |
208 } | 257 } |
209 | 258 |
210 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | 259 phenoDataClass <- as.vector(levels(xcmsSet@phenoData[,"class"])) #sometime phenoData have more than 1 column use first as class |
211 classnames<-vector("list",length(phenoDataClass)) | 260 classnames <- vector("list",length(phenoDataClass)) |
212 for (i in 1:length(phenoDataClass)){ | 261 for (i in 1:length(phenoDataClass)){ |
213 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) | 262 classnames[[i]] <- which( xcmsSet@phenoData[,"class"]==phenoDataClass[i]) |
214 } | 263 } |
215 | 264 |
216 N <- length(files) | 265 N <- length(files) |
217 TIC <- vector("list",N) | 266 TIC <- vector("list",N) |
218 | 267 |
219 for (i in 1:N) { | 268 for (i in 1:N) { |
220 if (!is.null(xcmsSet) && rt == "corrected") | 269 if (!is.null(xcmsSet) && rt == "corrected") |
221 rtcor <- xcmsSet@rt$corrected[[i]] else | 270 rtcor <- xcmsSet@rt$corrected[[i]] else |
222 rtcor <- NULL | 271 rtcor <- NULL |
223 TIC[[i]] <- getTIC(files[i],rtcor=rtcor) | 272 TIC[[i]] <- getTIC(files[i], rtcor=rtcor) |
224 } | 273 } |
225 | 274 |
226 pdf(pdfname,w=16,h=10) | 275 pdf(pdfname, w=16, h=10) |
227 cols <- rainbow(N) | 276 cols <- rainbow(N) |
228 lty = 1:N | 277 lty <- 1:N |
229 pch = 1:N | 278 pch <- 1:N |
230 #search for max x and max y in TICs | 279 #search for max x and max y in TICs |
231 xlim = range(sapply(TIC, function(x) range(x[,1]))) | 280 xlim <- range(sapply(TIC, function(x) range(x[,1]))) |
232 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 281 ylim <- range(sapply(TIC, function(x) range(x[,2]))) |
233 ylim = c(-ylim[2], ylim[2]) | 282 ylim <- c(-ylim[2], ylim[2]) |
234 | 283 |
235 | 284 |
236 ##plot start | 285 ##plot start |
237 if (length(phenoDataClass)>2){ | 286 if (length(phenoDataClass)>2){ |
238 for (k in 1:(length(phenoDataClass)-1)){ | 287 for (k in 1:(length(phenoDataClass)-1)){ |
239 for (l in (k+1):length(phenoDataClass)){ | 288 for (l in (k+1):length(phenoDataClass)){ |
240 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) | 289 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) |
241 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | 290 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC") |
242 colvect<-NULL | 291 colvect <- NULL |
243 for (j in 1:length(classnames[[k]])) { | 292 for (j in 1:length(classnames[[k]])) { |
244 tic <- TIC[[classnames[[k]][j]]] | 293 tic <- TIC[[classnames[[k]][j]]] |
245 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 294 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") |
246 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 295 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") |
247 colvect<-append(colvect,cols[classnames[[k]][j]]) | 296 colvect <- append(colvect,cols[classnames[[k]][j]]) |
248 } | 297 } |
249 for (j in 1:length(classnames[[l]])) { | 298 for (j in 1:length(classnames[[l]])) { |
250 # i=class2names[j] | 299 # i=class2names[j] |
251 tic <- TIC[[classnames[[l]][j]]] | 300 tic <- TIC[[classnames[[l]][j]]] |
252 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | 301 points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") |
253 colvect<-append(colvect,cols[classnames[[l]][j]]) | 302 colvect <- append(colvect,cols[classnames[[l]][j]]) |
254 } | 303 } |
255 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 304 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) |
256 } | 305 } |
257 } | 306 } |
258 }#end if length >2 | 307 }#end if length >2 |
259 if (length(phenoDataClass)==2){ | 308 if (length(phenoDataClass)==2){ |
260 k=1 | 309 k <- 1 |
261 l=2 | 310 l <- 2 |
262 | 311 |
263 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | 312 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab="Retention Time (min)", ylab="TIC") |
264 colvect<-NULL | 313 colvect <- NULL |
265 for (j in 1:length(classnames[[k]])) { | 314 for (j in 1:length(classnames[[k]])) { |
266 tic <- TIC[[classnames[[k]][j]]] | 315 tic <- TIC[[classnames[[k]][j]]] |
267 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 316 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") |
268 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 317 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") |
269 colvect<-append(colvect,cols[classnames[[k]][j]]) | 318 colvect <- append(colvect,cols[classnames[[k]][j]]) |
270 } | 319 } |
271 for (j in 1:length(classnames[[l]])) { | 320 for (j in 1:length(classnames[[l]])) { |
272 # i=class2names[j] | 321 # i <- class2names[j] |
273 tic <- TIC[[classnames[[l]][j]]] | 322 tic <- TIC[[classnames[[l]][j]]] |
274 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | 323 points(tic[,1]/60, -tic[,2], col=cols[classnames[[l]][j]], pch=pch[classnames[[l]][j]], type="l") |
275 colvect<-append(colvect,cols[classnames[[l]][j]]) | 324 colvect <- append(colvect,cols[classnames[[l]][j]]) |
276 } | 325 } |
277 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 326 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col=colvect, lty=lty, pch=pch) |
278 | 327 |
279 }#end length ==2 | 328 }#end length ==2 |
280 | 329 |
281 #case where only one class | 330 #case where only one class |
282 if (length(phenoDataClass)==1){ | 331 if (length(phenoDataClass)==1){ |
283 k=1 | 332 k <- 1 |
284 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 333 ylim <- range(sapply(TIC, function(x) range(x[,2]))) |
285 | 334 |
286 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | 335 plot(0, 0, type="n", xlim=xlim/60, ylim=ylim, main=paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab="Retention Time (min)", ylab="TIC") |
287 colvect<-NULL | 336 colvect <- NULL |
288 for (j in 1:length(classnames[[k]])) { | 337 for (j in 1:length(classnames[[k]])) { |
289 tic <- TIC[[classnames[[k]][j]]] | 338 tic <- TIC[[classnames[[k]][j]]] |
290 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 339 # points(tic[,1]/60, tic[,2], col=cols[i], pch=pch[i], type="l") |
291 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 340 points(tic[,1]/60, tic[,2], col=cols[classnames[[k]][j]], pch=pch[classnames[[k]][j]], type="l") |
292 colvect<-append(colvect,cols[classnames[[k]][j]]) | 341 colvect <- append(colvect,cols[classnames[[k]][j]]) |
293 } | 342 } |
294 | 343 |
295 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | 344 legend("topright",paste(basename(files[c(classnames[[k]])])), col=colvect, lty=lty, pch=pch) |
296 | 345 |
297 }#end length ==1 | 346 }#end length ==1 |
298 | 347 |
299 dev.off() #pdf(pdfname,w=16,h=10) | 348 dev.off() #pdf(pdfname,w=16,h=10) |
300 | 349 |
301 invisible(TIC) | 350 invisible(TIC) |
302 } | 351 } |
303 | 352 |
304 | 353 |
305 | 354 |
306 ## | 355 # Get the polarities from all the samples of a condition |
307 ## Get the polarities from all the samples of a condition | |
308 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 356 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
309 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 357 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
310 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | 358 getSampleMetadata <- function(xdata=NULL, sampleMetadataOutput="sampleMetadata.tsv") { |
311 cat("Creating the sampleMetadata file...\n") | 359 cat("Creating the sampleMetadata file...\n") |
312 | 360 |
313 #Create the sampleMetada dataframe | 361 #Create the sampleMetada dataframe |
314 sampleMetadata=xset@phenoData | 362 sampleMetadata <- xdata@phenoData@data |
315 sampleNamesOrigin=rownames(sampleMetadata) | 363 rownames(sampleMetadata) <- NULL |
316 sampleNamesMakeNames=make.names(sampleNamesOrigin) | 364 colnames(sampleMetadata) <- c("sampleMetadata", "class") |
365 | |
366 sampleNamesOrigin <- sampleMetadata$sampleMetadata | |
367 sampleNamesMakeNames <- make.names(sampleNamesOrigin) | |
317 | 368 |
318 if (any(duplicated(sampleNamesMakeNames))) { | 369 if (any(duplicated(sampleNamesMakeNames))) { |
319 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 370 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
320 for (sampleName in sampleNamesOrigin) { | 371 for (sampleName in sampleNamesOrigin) { |
321 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 372 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) |
328 for (sampleName in sampleNamesOrigin) { | 379 for (sampleName in sampleNamesOrigin) { |
329 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | 380 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) |
330 } | 381 } |
331 } | 382 } |
332 | 383 |
333 sampleMetadata$sampleMetadata=sampleNamesMakeNames | 384 sampleMetadata$sampleMetadata <- sampleNamesMakeNames |
334 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns | 385 |
335 rownames(sampleMetadata)=NULL | 386 |
336 | |
337 #Create a list of files name in the current directory | |
338 list_files=xset@filepaths | |
339 #For each sample file, the following actions are done | 387 #For each sample file, the following actions are done |
340 for (file in list_files){ | 388 for (fileIdx in 1:length(fileNames(xdata))) { |
341 #Check if the file is in the CDF format | 389 #Check if the file is in the CDF format |
342 if (!mzR:::netCDFIsFile(file)){ | 390 if (!mzR:::netCDFIsFile(fileNames(xdata))) { |
343 | 391 |
344 # If the column isn't exist, with add one filled with NA | 392 # If the column isn't exist, with add one filled with NA |
345 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA | 393 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity <- NA |
346 | 394 |
347 #Create a simple xcmsRaw object for each sample | |
348 xcmsRaw=xcmsRaw(file) | |
349 #Extract the polarity (a list of polarities) | 395 #Extract the polarity (a list of polarities) |
350 polarity=xcmsRaw@polarity | 396 polarity <- fData(xdata)[fData(xdata)$fileIdx == fileIdx,"polarity"] |
351 #Verify if all the scans have the same polarity | 397 #Verify if all the scans have the same polarity |
352 uniq_list=unique(polarity) | 398 uniq_list <- unique(polarity) |
353 if (length(uniq_list)>1){ | 399 if (length(uniq_list)>1){ |
354 polarity="mixed" | 400 polarity <- "mixed" |
355 } else { | 401 } else { |
356 polarity=as.character(uniq_list) | 402 polarity <- as.character(uniq_list) |
357 } | 403 } |
358 #Transforms the character to obtain only the sample name | |
359 filename=basename(file) | |
360 library(tools) | |
361 samplename=file_path_sans_ext(filename) | |
362 | 404 |
363 #Set the polarity attribute | 405 #Set the polarity attribute |
364 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity | 406 sampleMetadata$polarity[fileIdx] <- polarity |
365 | |
366 #Delete xcmsRaw object because it creates a bug for the fillpeaks step | |
367 rm(xcmsRaw) | |
368 } | 407 } |
369 | 408 |
370 } | 409 } |
371 | 410 |
372 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | 411 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) |
373 | 412 |
374 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) | 413 return(list("sampleNamesOrigin"=sampleNamesOrigin, "sampleNamesMakeNames"=sampleNamesMakeNames)) |
375 | 414 |
376 } | 415 } |
377 | 416 |
378 | 417 |
379 ## | 418 # This function check if xcms will found all the files |
380 ## This function check if xcms will found all the files | |
381 ## | |
382 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 419 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
383 checkFilesCompatibilityWithXcms <- function(directory) { | 420 checkFilesCompatibilityWithXcms <- function(directory) { |
384 cat("Checking files filenames compatibilities with xmcs...\n") | 421 cat("Checking files filenames compatibilities with xmcs...\n") |
385 # WHAT XCMS WILL FIND | 422 # WHAT XCMS WILL FIND |
386 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 423 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
387 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | 424 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") |
388 info <- file.info(directory) | 425 info <- file.info(directory) |
389 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | 426 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) |
390 files <- c(directory[!info$isdir], listed) | 427 files <- c(directory[!info$isdir], listed) |
391 files_abs <- file.path(getwd(), files) | 428 files_abs <- file.path(getwd(), files) |
392 exists <- file.exists(files_abs) | 429 exists <- file.exists(files_abs) |
393 files[exists] <- files_abs[exists] | 430 files[exists] <- files_abs[exists] |
394 files[exists] <- sub("//","/",files[exists]) | 431 files[exists] <- sub("//","/",files[exists]) |
395 | 432 |
396 # WHAT IS ON THE FILESYSTEM | 433 # WHAT IS ON THE FILESYSTEM |
397 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) | 434 filesystem_filepaths <- system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) |
398 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | 435 filesystem_filepaths <- filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] |
399 | 436 |
400 # COMPARISON | 437 # COMPARISON |
401 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | 438 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { |
402 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | 439 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) |
403 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | 440 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) |
404 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 441 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
405 } | 442 } |
406 } | 443 } |
407 | 444 |
408 | 445 |
409 | 446 #This function list the compatible files within the directory as xcms did |
410 ## | 447 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
411 ## This function check if XML contains special caracters. It also checks integrity and completness. | 448 getMSFiles <- function (directory) { |
412 ## | 449 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
450 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") | |
451 info <- file.info(directory) | |
452 listed <- list.files(directory[info$isdir], pattern=filepattern,recursive=TRUE, full.names=TRUE) | |
453 files <- c(directory[!info$isdir], listed) | |
454 exists <- file.exists(files) | |
455 files <- files[exists] | |
456 return(files) | |
457 } | |
458 | |
459 # This function check if XML contains special caracters. It also checks integrity and completness. | |
413 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 460 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
414 checkXmlStructure <- function (directory) { | 461 checkXmlStructure <- function (directory) { |
415 cat("Checking XML structure...\n") | 462 cat("Checking XML structure...\n") |
416 | 463 |
417 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | 464 cmd <- paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") |
418 capture=system(cmd,intern=TRUE) | 465 capture <- system(cmd, intern=TRUE) |
419 | 466 |
420 if (length(capture)>0){ | 467 if (length(capture)>0){ |
421 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | 468 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) |
422 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 469 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
423 write(capture, stderr()) | 470 write(capture, stderr()) |
425 } | 472 } |
426 | 473 |
427 } | 474 } |
428 | 475 |
429 | 476 |
430 ## | 477 # This function check if XML contain special characters |
431 ## This function check if XML contain special characters | |
432 ## | |
433 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 478 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
434 deleteXmlBadCharacters<- function (directory) { | 479 deleteXmlBadCharacters<- function (directory) { |
435 cat("Checking Non ASCII characters in the XML...\n") | 480 cat("Checking Non ASCII characters in the XML...\n") |
436 | 481 |
437 processed=F | 482 processed <- F |
438 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) | 483 l <- system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"), intern=TRUE) |
439 for (i in l){ | 484 for (i in l){ |
440 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") | 485 cmd <- paste("LC_ALL=C grep '[^ -~]' \"", i, "\"", sep="") |
441 capture=suppressWarnings(system(cmd,intern=TRUE)) | 486 capture <- suppressWarnings(system(cmd, intern=TRUE)) |
442 if (length(capture)>0){ | 487 if (length(capture)>0){ |
443 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | 488 cmd <- paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) |
444 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | 489 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) |
445 c=system(cmd,intern=TRUE) | 490 c <- system(cmd, intern=TRUE) |
446 capture="" | 491 capture <- "" |
447 processed=T | 492 processed <- T |
448 } | 493 } |
449 } | 494 } |
450 if (processed) cat("\n\n") | 495 if (processed) cat("\n\n") |
451 return(processed) | 496 return(processed) |
452 } | 497 } |
453 | 498 |
454 | 499 |
455 ## | 500 # This function will compute MD5 checksum to check the data integrity |
456 ## This function will compute MD5 checksum to check the data integrity | |
457 ## | |
458 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 501 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
459 getMd5sum <- function (directory) { | 502 getMd5sum <- function (directory) { |
460 cat("Compute md5 checksum...\n") | 503 cat("Compute md5 checksum...\n") |
461 # WHAT XCMS WILL FIND | 504 # WHAT XCMS WILL FIND |
462 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 505 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
463 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | 506 filepattern <- paste(paste("\\.", filepattern, "$", sep=""),collapse="|") |
464 info <- file.info(directory) | 507 info <- file.info(directory) |
465 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | 508 listed <- list.files(directory[info$isdir], pattern=filepattern, recursive=TRUE, full.names=TRUE) |
466 files <- c(directory[!info$isdir], listed) | 509 files <- c(directory[!info$isdir], listed) |
467 exists <- file.exists(files) | 510 exists <- file.exists(files) |
468 files <- files[exists] | 511 files <- files[exists] |
469 | 512 |
470 library(tools) | 513 library(tools) |
474 return(as.matrix(md5sum(files))) | 517 return(as.matrix(md5sum(files))) |
475 } | 518 } |
476 | 519 |
477 | 520 |
478 # This function get the raw file path from the arguments | 521 # This function get the raw file path from the arguments |
479 getRawfilePathFromArguments <- function(singlefile, zipfile, listArguments) { | 522 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
480 if (!is.null(listArguments[["zipfile"]])) zipfile = listArguments[["zipfile"]] | 523 getRawfilePathFromArguments <- function(singlefile, zipfile, args) { |
481 if (!is.null(listArguments[["zipfilePositive"]])) zipfile = listArguments[["zipfilePositive"]] | 524 if (!is.null(args$zipfile)) zipfile <- args$zipfile |
482 if (!is.null(listArguments[["zipfileNegative"]])) zipfile = listArguments[["zipfileNegative"]] | 525 if (!is.null(args$zipfilePositive)) zipfile <- args$zipfilePositive |
483 | 526 if (!is.null(args$zipfileNegative)) zipfile <- args$zipfileNegative |
484 if (!is.null(listArguments[["singlefile_galaxyPath"]])) { | 527 |
485 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPath"]]; | 528 if (!is.null(args$singlefile_galaxyPath)) { |
486 singlefile_sampleNames = listArguments[["singlefile_sampleName"]] | 529 singlefile_galaxyPaths <- args$singlefile_galaxyPath; |
487 } | 530 singlefile_sampleNames <- args$singlefile_sampleName |
488 if (!is.null(listArguments[["singlefile_galaxyPathPositive"]])) { | 531 } |
489 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathPositive"]]; | 532 if (!is.null(args$singlefile_galaxyPathPositive)) { |
490 singlefile_sampleNames = listArguments[["singlefile_sampleNamePositive"]] | 533 singlefile_galaxyPaths <- args$singlefile_galaxyPathPositive; |
491 } | 534 singlefile_sampleNames <- args$singlefile_sampleNamePositive |
492 if (!is.null(listArguments[["singlefile_galaxyPathNegative"]])) { | 535 } |
493 singlefile_galaxyPaths = listArguments[["singlefile_galaxyPathNegative"]]; | 536 if (!is.null(args$singlefile_galaxyPathNegative)) { |
494 singlefile_sampleNames = listArguments[["singlefile_sampleNameNegative"]] | 537 singlefile_galaxyPaths <- args$singlefile_galaxyPathNegative; |
538 singlefile_sampleNames <- args$singlefile_sampleNameNegative | |
495 } | 539 } |
496 if (exists("singlefile_galaxyPaths")){ | 540 if (exists("singlefile_galaxyPaths")){ |
497 singlefile_galaxyPaths = unlist(strsplit(singlefile_galaxyPaths,",")) | 541 singlefile_galaxyPaths <- unlist(strsplit(singlefile_galaxyPaths,",")) |
498 singlefile_sampleNames = unlist(strsplit(singlefile_sampleNames,",")) | 542 singlefile_sampleNames <- unlist(strsplit(singlefile_sampleNames,",")) |
499 | 543 |
500 singlefile=NULL | 544 singlefile <- NULL |
501 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { | 545 for (singlefile_galaxyPath_i in seq(1:length(singlefile_galaxyPaths))) { |
502 singlefile_galaxyPath=singlefile_galaxyPaths[singlefile_galaxyPath_i] | 546 singlefile_galaxyPath <- singlefile_galaxyPaths[singlefile_galaxyPath_i] |
503 singlefile_sampleName=singlefile_sampleNames[singlefile_galaxyPath_i] | 547 singlefile_sampleName <- singlefile_sampleNames[singlefile_galaxyPath_i] |
504 singlefile[[singlefile_sampleName]] = singlefile_galaxyPath | 548 singlefile[[singlefile_sampleName]] <- singlefile_galaxyPath |
505 } | 549 } |
506 } | 550 } |
507 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { | 551 for (argument in c("zipfile","zipfilePositive","zipfileNegative","singlefile_galaxyPath","singlefile_sampleName","singlefile_galaxyPathPositive","singlefile_sampleNamePositive","singlefile_galaxyPathNegative","singlefile_sampleNameNegative")) { |
508 listArguments[[argument]]=NULL | 552 args[[argument]] <- NULL |
509 } | 553 } |
510 return(list(zipfile=zipfile, singlefile=singlefile, listArguments=listArguments)) | 554 return(list(zipfile=zipfile, singlefile=singlefile, args=args)) |
511 } | 555 } |
512 | 556 |
513 | 557 |
514 # This function retrieve the raw file in the working directory | 558 # This function retrieve the raw file in the working directory |
515 # - if zipfile: unzip the file with its directory tree | 559 # - if zipfile: unzip the file with its directory tree |
516 # - if singlefiles: set symlink with the good filename | 560 # - if singlefiles: set symlink with the good filename |
561 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
517 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { | 562 retrieveRawfileInTheWorkingDirectory <- function(singlefile, zipfile) { |
518 if(!is.null(singlefile) && (length("singlefile")>0)) { | 563 if(!is.null(singlefile) && (length("singlefile")>0)) { |
519 for (singlefile_sampleName in names(singlefile)) { | 564 for (singlefile_sampleName in names(singlefile)) { |
520 singlefile_galaxyPath = singlefile[[singlefile_sampleName]] | 565 singlefile_galaxyPath <- singlefile[[singlefile_sampleName]] |
521 if(!file.exists(singlefile_galaxyPath)){ | 566 if(!file.exists(singlefile_galaxyPath)){ |
522 error_message=paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") | 567 error_message <- paste("Cannot access the sample:",singlefile_sampleName,"located:",singlefile_galaxyPath,". Please, contact your administrator ... if you have one!") |
523 print(error_message); stop(error_message) | 568 print(error_message); stop(error_message) |
524 } | 569 } |
525 | 570 |
526 file.symlink(singlefile_galaxyPath,singlefile_sampleName) | 571 if (!suppressWarnings( try (file.link(singlefile_galaxyPath, singlefile_sampleName), silent=T))) |
527 } | 572 file.copy(singlefile_galaxyPath, singlefile_sampleName) |
528 directory = "." | 573 |
529 | 574 } |
530 } | 575 directory <- "." |
531 if(!is.null(zipfile) && (zipfile!="")) { | 576 |
577 } | |
578 if(!is.null(zipfile) && (zipfile != "")) { | |
532 if(!file.exists(zipfile)){ | 579 if(!file.exists(zipfile)){ |
533 error_message=paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") | 580 error_message <- paste("Cannot access the Zip file:",zipfile,". Please, contact your administrator ... if you have one!") |
534 print(error_message) | 581 print(error_message) |
535 stop(error_message) | 582 stop(error_message) |
536 } | 583 } |
537 | 584 |
538 #list all file in the zip file | 585 #list all file in the zip file |
539 #zip_files=unzip(zipfile,list=T)[,"Name"] | 586 #zip_files <- unzip(zipfile,list=T)[,"Name"] |
540 | 587 |
541 #unzip | 588 #unzip |
542 suppressWarnings(unzip(zipfile, unzip="unzip")) | 589 suppressWarnings(unzip(zipfile, unzip="unzip")) |
543 | 590 |
544 #get the directory name | 591 #get the directory name |
545 filesInZip=unzip(zipfile, list=T); | 592 suppressWarnings(filesInZip <- unzip(zipfile, list=T)) |
546 directories=unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))); | 593 directories <- unique(unlist(lapply(strsplit(filesInZip$Name,"/"), function(x) x[1]))) |
547 directories=directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] | 594 directories <- directories[!(directories %in% c("__MACOSX")) & file.info(directories)$isdir] |
548 directory = "." | 595 directory <- "." |
549 if (length(directories) == 1) directory = directories | 596 if (length(directories) == 1) directory <- directories |
550 | 597 |
551 cat("files_root_directory\t",directory,"\n") | 598 cat("files_root_directory\t",directory,"\n") |
552 | 599 |
553 } | 600 } |
554 return (directory) | 601 return (directory) |
555 } | 602 } |
603 | |
604 | |
605 # This function retrieve a xset like object | |
606 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | |
607 getxcmsSetObject <- function(xobject) { | |
608 # XCMS 1.x | |
609 if (class(xobject) == "xcmsSet") | |
610 return (xobject) | |
611 # XCMS 3.x | |
612 if (class(xobject) == "XCMSnExp") { | |
613 # Get the legacy xcmsSet object | |
614 suppressWarnings(xset <- as(xobject, 'xcmsSet')) | |
615 sampclass(xset) <- xset@phenoData$sample_group | |
616 return (xset) | |
617 } | |
618 } | |
619 | |
620 | |
621 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
622 # https://github.com/sneumann/xcms/issues/250 | |
623 groupnamesW4M <- function(xdata, mzdec = 0, rtdec = 0) { | |
624 mzfmt <- paste("%.", mzdec, "f", sep = "") | |
625 rtfmt <- paste("%.", rtdec, "f", sep = "") | |
626 | |
627 gnames <- paste("M", sprintf(mzfmt, featureDefinitions(xdata)[,"mzmed"]), "T", | |
628 sprintf(rtfmt, featureDefinitions(xdata)[,"rtmed"]), sep = "") | |
629 | |
630 if (any(dup <- duplicated(gnames))) | |
631 for (dupname in unique(gnames[dup])) { | |
632 dupidx <- which(gnames == dupname) | |
633 gnames[dupidx] <- paste(gnames[dupidx], seq(along = dupidx), sep = "_") | |
634 } | |
635 | |
636 return (gnames) | |
637 } | |
638 | |
639 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
640 # https://github.com/sneumann/xcms/issues/247 | |
641 .concatenate_XCMSnExp <- function(...) { | |
642 x <- list(...) | |
643 if (length(x) == 0) | |
644 return(NULL) | |
645 if (length(x) == 1) | |
646 return(x[[1]]) | |
647 ## Check that all are XCMSnExp objects. | |
648 if (!all(unlist(lapply(x, function(z) is(z, "XCMSnExp"))))) | |
649 stop("All passed objects should be 'XCMSnExp' objects") | |
650 new_x <- as(.concatenate_OnDiskMSnExp(...), "XCMSnExp") | |
651 ## If any of the XCMSnExp has alignment results or detected features drop | |
652 ## them! | |
653 x <- lapply(x, function(z) { | |
654 if (hasAdjustedRtime(z)) { | |
655 z <- dropAdjustedRtime(z) | |
656 warning("Adjusted retention times found, had to drop them.") | |
657 } | |
658 if (hasFeatures(z)) { | |
659 z <- dropFeatureDefinitions(z) | |
660 warning("Feature definitions found, had to drop them.") | |
661 } | |
662 z | |
663 }) | |
664 ## Combine peaks | |
665 fls <- lapply(x, fileNames) | |
666 startidx <- cumsum(lengths(fls)) | |
667 pks <- lapply(x, chromPeaks) | |
668 procH <- lapply(x, processHistory) | |
669 for (i in 2:length(fls)) { | |
670 pks[[i]][, "sample"] <- pks[[i]][, "sample"] + startidx[i - 1] | |
671 procH[[i]] <- lapply(procH[[i]], function(z) { | |
672 z@fileIndex <- as.integer(z@fileIndex + startidx[i - 1]) | |
673 z | |
674 }) | |
675 } | |
676 pks <- do.call(rbind, pks) | |
677 new_x@.processHistory <- unlist(procH) | |
678 chromPeaks(new_x) <- pks | |
679 if (validObject(new_x)) | |
680 new_x | |
681 } | |
682 | |
683 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
684 # https://github.com/sneumann/xcms/issues/247 | |
685 .concatenate_OnDiskMSnExp <- function(...) { | |
686 x <- list(...) | |
687 if (length(x) == 0) | |
688 return(NULL) | |
689 if (length(x) == 1) | |
690 return(x[[1]]) | |
691 ## Check that all are XCMSnExp objects. | |
692 if (!all(unlist(lapply(x, function(z) is(z, "OnDiskMSnExp"))))) | |
693 stop("All passed objects should be 'OnDiskMSnExp' objects") | |
694 ## Check processingQueue | |
695 procQ <- lapply(x, function(z) z@spectraProcessingQueue) | |
696 new_procQ <- procQ[[1]] | |
697 is_ok <- unlist(lapply(procQ, function(z) | |
698 !is.character(all.equal(new_procQ, z)) | |
699 )) | |
700 if (any(!is_ok)) { | |
701 warning("Processing queues from the submitted objects differ! ", | |
702 "Dropping the processing queue.") | |
703 new_procQ <- list() | |
704 } | |
705 ## processingData | |
706 fls <- lapply(x, function(z) z@processingData@files) | |
707 startidx <- cumsum(lengths(fls)) | |
708 ## featureData | |
709 featd <- lapply(x, fData) | |
710 ## Have to update the file index and the spectrum names. | |
711 for (i in 2:length(featd)) { | |
712 featd[[i]]$fileIdx <- featd[[i]]$fileIdx + startidx[i - 1] | |
713 rownames(featd[[i]]) <- MSnbase:::formatFileSpectrumNames( | |
714 fileIds = featd[[i]]$fileIdx, | |
715 spectrumIds = featd[[i]]$spIdx, | |
716 nSpectra = nrow(featd[[i]]), | |
717 nFiles = length(unlist(fls)) | |
718 ) | |
719 } | |
720 featd <- do.call(rbind, featd) | |
721 featd$spectrum <- 1:nrow(featd) | |
722 ## experimentData | |
723 expdata <- lapply(x, function(z) { | |
724 ed <- z@experimentData | |
725 data.frame(instrumentManufacturer = ed@instrumentManufacturer, | |
726 instrumentModel = ed@instrumentModel, | |
727 ionSource = ed@ionSource, | |
728 analyser = ed@analyser, | |
729 detectorType = ed@detectorType, | |
730 stringsAsFactors = FALSE) | |
731 }) | |
732 expdata <- do.call(rbind, expdata) | |
733 expdata <- new("MIAPE", | |
734 instrumentManufacturer = expdata$instrumentManufacturer, | |
735 instrumentModel = expdata$instrumentModel, | |
736 ionSource = expdata$ionSource, | |
737 analyser = expdata$analyser, | |
738 detectorType = expdata$detectorType) | |
739 | |
740 ## protocolData | |
741 protodata <- lapply(x, function(z) z@protocolData) | |
742 if (any(unlist(lapply(protodata, nrow)) > 0)) | |
743 warning("Found non-empty protocol data, but merging protocol data is", | |
744 " currently not supported. Skipped.") | |
745 ## phenoData | |
746 pdata <- do.call(rbind, lapply(x, pData)) | |
747 res <- new( | |
748 "OnDiskMSnExp", | |
749 phenoData = new("NAnnotatedDataFrame", data = pdata), | |
750 featureData = new("AnnotatedDataFrame", featd), | |
751 processingData = new("MSnProcess", | |
752 processing = paste0("Concatenated [", date(), "]"), | |
753 files = unlist(fls), smoothed = NA), | |
754 experimentData = expdata, | |
755 spectraProcessingQueue = new_procQ) | |
756 if (validObject(res)) | |
757 res | |
758 } | |
759 | |
760 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | |
761 # https://github.com/sneumann/xcms/issues/247 | |
762 c.XCMSnExp <- function(...) { | |
763 .concatenate_XCMSnExp(...) | |
764 } |