Mercurial > repos > lecorguille > xcms_retcor
comparison lib.r @ 16:20a75ba4345b draft
planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 22c4e92909198328fc7439ff47e4546a273eb907
author | lecorguille |
---|---|
date | Sun, 05 Feb 2017 08:57:02 -0500 |
parents | c04568596f40 |
children | 3bd1e74d4abc |
comparison
equal
deleted
inserted
replaced
15:c04568596f40 | 16:20a75ba4345b |
---|---|
34 #@author G. Le Corguille | 34 #@author G. Le Corguille |
35 # value: intensity values to be used into, maxo or intb | 35 # value: intensity values to be used into, maxo or intb |
36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { | 36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { |
37 groups <- xset@groups | 37 groups <- xset@groups |
38 values <- groupval(xset, "medret", value=intval) | 38 values <- groupval(xset, "medret", value=intval) |
39 | 39 |
40 # renamming of the column rtmed to rt to fit with camera peaklist function output | 40 # renamming of the column rtmed to rt to fit with camera peaklist function output |
41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt" | 41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt" |
42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz" | 42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz" |
43 | 43 |
44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) | 44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) |
45 groups = RTSecondToMinute(groups, convertRTMinute) | 45 groups = RTSecondToMinute(groups, convertRTMinute) |
46 | 46 |
47 rownames(groups) = ids | 47 rownames(groups) = ids |
48 rownames(values) = ids | 48 rownames(values) = ids |
55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA) | 55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA) |
56 } | 56 } |
57 | 57 |
58 #@author Y. Guitton | 58 #@author Y. Guitton |
59 getBPC <- function(file,rtcor=NULL, ...) { | 59 getBPC <- function(file,rtcor=NULL, ...) { |
60 object <- xcmsRaw(file) | 60 object <- xcmsRaw(file) |
61 sel <- profRange(object, ...) | 61 sel <- profRange(object, ...) |
62 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) | 62 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) |
63 #plotChrom(xcmsRaw(file), base=T) | 63 #plotChrom(xcmsRaw(file), base=T) |
64 } | 64 } |
65 | 65 |
66 #@author Y. Guitton | 66 #@author Y. Guitton |
67 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { | 67 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { |
68 cat("Creating BIC pdf...\n") | 68 cat("Creating BIC pdf...\n") |
69 | 69 |
70 if (is.null(xcmsSet)) { | 70 if (is.null(xcmsSet)) { |
71 cat("Enter an xcmsSet \n") | 71 cat("Enter an xcmsSet \n") |
72 stop() | 72 stop() |
73 } else { | 73 } else { |
74 files <- filepaths(xcmsSet) | 74 files <- filepaths(xcmsSet) |
75 } | 75 } |
76 | 76 |
77 class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | 77 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class |
78 | 78 |
79 classnames<-vector("list",length(class)) | 79 classnames<-vector("list",length(phenoDataClass)) |
80 for (i in 1:length(class)){ | 80 for (i in 1:length(phenoDataClass)){ |
81 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) | 81 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) |
82 } | 82 } |
83 | 83 |
84 N <- dim(phenoData(xcmsSet))[1] | 84 N <- dim(phenoData(xcmsSet))[1] |
85 | 85 |
86 TIC <- vector("list",N) | 86 TIC <- vector("list",N) |
87 | 87 |
88 | 88 |
89 for (j in 1:N) { | 89 for (j in 1:N) { |
90 | 90 |
91 TIC[[j]] <- getBPC(files[j]) | 91 TIC[[j]] <- getBPC(files[j]) |
92 #good for raw | 92 #good for raw |
93 # seems strange for corrected | 93 # seems strange for corrected |
94 #errors if scanrange used in xcmsSetgeneration | 94 #errors if scanrange used in xcmsSetgeneration |
95 if (!is.null(xcmsSet) && rt == "corrected") | 95 if (!is.null(xcmsSet) && rt == "corrected") |
96 rtcor <- xcmsSet@rt$corrected[[j]] else | 96 rtcor <- xcmsSet@rt$corrected[[j]] |
97 rtcor <- NULL | 97 else |
98 | 98 rtcor <- NULL |
99 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) | 99 |
100 # TIC[[j]][,1]<-rtcor | 100 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) |
101 } | 101 # TIC[[j]][,1]<-rtcor |
102 | 102 } |
103 | 103 |
104 | 104 |
105 pdf(pdfname,w=16,h=10) | 105 |
106 cols <- rainbow(N) | 106 pdf(pdfname,w=16,h=10) |
107 lty = 1:N | 107 cols <- rainbow(N) |
108 pch = 1:N | 108 lty = 1:N |
109 #search for max x and max y in BPCs | 109 pch = 1:N |
110 xlim = range(sapply(TIC, function(x) range(x[,1]))) | 110 #search for max x and max y in BPCs |
111 ylim = range(sapply(TIC, function(x) range(x[,2]))) | 111 xlim = range(sapply(TIC, function(x) range(x[,1]))) |
112 ylim = c(-ylim[2], ylim[2]) | 112 ylim = range(sapply(TIC, function(x) range(x[,2]))) |
113 | 113 ylim = c(-ylim[2], ylim[2]) |
114 | 114 |
115 ##plot start | 115 |
116 | 116 ##plot start |
117 if (length(class)>2){ | 117 |
118 for (k in 1:(length(class)-1)){ | 118 if (length(phenoDataClass)>2){ |
119 for (l in (k+1):length(class)){ | 119 for (k in 1:(length(phenoDataClass)-1)){ |
120 #print(paste(class[k],"vs",class[l],sep=" ")) | 120 for (l in (k+1):length(phenoDataClass)){ |
121 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 121 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) |
122 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
123 colvect<-NULL | |
124 for (j in 1:length(classnames[[k]])) { | |
125 tic <- TIC[[classnames[[k]][j]]] | |
126 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
127 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
128 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
129 } | |
130 for (j in 1:length(classnames[[l]])) { | |
131 # i=class2names[j] | |
132 tic <- TIC[[classnames[[l]][j]]] | |
133 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
134 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
135 } | |
136 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
137 } | |
138 } | |
139 }#end if length >2 | |
140 | |
141 if (length(phenoDataClass)==2){ | |
142 k=1 | |
143 l=2 | |
144 colvect<-NULL | |
145 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
146 | |
147 for (j in 1:length(classnames[[k]])) { | |
148 | |
149 tic <- TIC[[classnames[[k]][j]]] | |
150 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
151 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
152 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
153 } | |
154 for (j in 1:length(classnames[[l]])) { | |
155 # i=class2names[j] | |
156 tic <- TIC[[classnames[[l]][j]]] | |
157 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
158 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
159 } | |
160 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
161 | |
162 }#end length ==2 | |
163 | |
164 #case where only one class | |
165 if (length(phenoDataClass)==1){ | |
166 k=1 | |
167 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
168 colvect<-NULL | |
169 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
170 | |
171 for (j in 1:length(classnames[[k]])) { | |
172 tic <- TIC[[classnames[[k]][j]]] | |
173 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
174 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
175 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
176 } | |
177 | |
178 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
179 | |
180 }#end length ==1 | |
181 | |
182 dev.off() #pdf(pdfname,w=16,h=10) | |
183 | |
184 invisible(TIC) | |
185 } | |
186 | |
187 | |
188 | |
189 #@author Y. Guitton | |
190 getTIC <- function(file,rtcor=NULL) { | |
191 object <- xcmsRaw(file) | |
192 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) | |
193 } | |
194 | |
195 ## | |
196 ## overlay TIC from all files in current folder or from xcmsSet, create pdf | |
197 ## | |
198 #@author Y. Guitton | |
199 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { | |
200 cat("Creating TIC pdf...\n") | |
201 | |
202 if (is.null(xcmsSet)) { | |
203 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
204 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") | |
205 if (is.null(files)) | |
206 files <- getwd() | |
207 info <- file.info(files) | |
208 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) | |
209 files <- c(files[!info$isdir], listed) | |
210 } else { | |
211 files <- filepaths(xcmsSet) | |
212 } | |
213 | |
214 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | |
215 classnames<-vector("list",length(phenoDataClass)) | |
216 for (i in 1:length(phenoDataClass)){ | |
217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i]) | |
218 } | |
219 | |
220 N <- length(files) | |
221 TIC <- vector("list",N) | |
222 | |
223 for (i in 1:N) { | |
224 if (!is.null(xcmsSet) && rt == "corrected") | |
225 rtcor <- xcmsSet@rt$corrected[[i]] else | |
226 rtcor <- NULL | |
227 TIC[[i]] <- getTIC(files[i],rtcor=rtcor) | |
228 } | |
229 | |
230 pdf(pdfname,w=16,h=10) | |
231 cols <- rainbow(N) | |
232 lty = 1:N | |
233 pch = 1:N | |
234 #search for max x and max y in TICs | |
235 xlim = range(sapply(TIC, function(x) range(x[,1]))) | |
236 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
237 ylim = c(-ylim[2], ylim[2]) | |
238 | |
239 | |
240 ##plot start | |
241 if (length(phenoDataClass)>2){ | |
242 for (k in 1:(length(phenoDataClass)-1)){ | |
243 for (l in (k+1):length(phenoDataClass)){ | |
244 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" ")) | |
245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
246 colvect<-NULL | |
247 for (j in 1:length(classnames[[k]])) { | |
248 tic <- TIC[[classnames[[k]][j]]] | |
249 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
250 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
251 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
252 } | |
253 for (j in 1:length(classnames[[l]])) { | |
254 # i=class2names[j] | |
255 tic <- TIC[[classnames[[l]][j]]] | |
256 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
257 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
258 } | |
259 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
260 } | |
261 } | |
262 }#end if length >2 | |
263 if (length(phenoDataClass)==2){ | |
264 k=1 | |
265 l=2 | |
266 | |
267 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
122 colvect<-NULL | 268 colvect<-NULL |
123 for (j in 1:length(classnames[[k]])) { | 269 for (j in 1:length(classnames[[k]])) { |
124 tic <- TIC[[classnames[[k]][j]]] | 270 tic <- TIC[[classnames[[k]][j]]] |
125 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 271 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") |
126 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 272 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") |
127 colvect<-append(colvect,cols[classnames[[k]][j]]) | 273 colvect<-append(colvect,cols[classnames[[k]][j]]) |
128 } | 274 } |
129 for (j in 1:length(classnames[[l]])) { | 275 for (j in 1:length(classnames[[l]])) { |
130 # i=class2names[j] | 276 # i=class2names[j] |
131 tic <- TIC[[classnames[[l]][j]]] | 277 tic <- TIC[[classnames[[l]][j]]] |
132 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | 278 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") |
133 colvect<-append(colvect,cols[classnames[[l]][j]]) | 279 colvect<-append(colvect,cols[classnames[[l]][j]]) |
134 } | 280 } |
135 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 281 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) |
136 } | 282 |
137 } | 283 }#end length ==2 |
138 }#end if length >2 | 284 |
139 | 285 #case where only one class |
140 if (length(class)==2){ | 286 if (length(phenoDataClass)==1){ |
141 k=1 | 287 k=1 |
142 l=2 | 288 ylim = range(sapply(TIC, function(x) range(x[,2]))) |
143 colvect<-NULL | 289 |
144 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | 290 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") |
145 | |
146 for (j in 1:length(classnames[[k]])) { | |
147 | |
148 tic <- TIC[[classnames[[k]][j]]] | |
149 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
150 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
151 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
152 } | |
153 for (j in 1:length(classnames[[l]])) { | |
154 # i=class2names[j] | |
155 tic <- TIC[[classnames[[l]][j]]] | |
156 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
157 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
158 } | |
159 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
160 | |
161 }#end length ==2 | |
162 | |
163 #case where only one class | |
164 if (length(class)==1){ | |
165 k=1 | |
166 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
167 colvect<-NULL | |
168 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC") | |
169 | |
170 for (j in 1:length(classnames[[k]])) { | |
171 tic <- TIC[[classnames[[k]][j]]] | |
172 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
173 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
174 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
175 } | |
176 | |
177 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
178 | |
179 }#end length ==1 | |
180 | |
181 dev.off() #pdf(pdfname,w=16,h=10) | |
182 | |
183 invisible(TIC) | |
184 } | |
185 | |
186 | |
187 | |
188 #@author Y. Guitton | |
189 getTIC <- function(file,rtcor=NULL) { | |
190 object <- xcmsRaw(file) | |
191 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity) | |
192 } | |
193 | |
194 ## | |
195 ## overlay TIC from all files in current folder or from xcmsSet, create pdf | |
196 ## | |
197 #@author Y. Guitton | |
198 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) { | |
199 cat("Creating TIC pdf...\n") | |
200 | |
201 if (is.null(xcmsSet)) { | |
202 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | |
203 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|") | |
204 if (is.null(files)) | |
205 files <- getwd() | |
206 info <- file.info(files) | |
207 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE) | |
208 files <- c(files[!info$isdir], listed) | |
209 } else { | |
210 files <- filepaths(xcmsSet) | |
211 } | |
212 | |
213 class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class | |
214 | |
215 classnames<-vector("list",length(class)) | |
216 for (i in 1:length(class)){ | |
217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) | |
218 } | |
219 | |
220 N <- length(files) | |
221 TIC <- vector("list",N) | |
222 | |
223 for (i in 1:N) { | |
224 if (!is.null(xcmsSet) && rt == "corrected") | |
225 rtcor <- xcmsSet@rt$corrected[[i]] else | |
226 rtcor <- NULL | |
227 TIC[[i]] <- getTIC(files[i],rtcor=rtcor) | |
228 } | |
229 | |
230 pdf(pdfname,w=16,h=10) | |
231 cols <- rainbow(N) | |
232 lty = 1:N | |
233 pch = 1:N | |
234 #search for max x and max y in TICs | |
235 xlim = range(sapply(TIC, function(x) range(x[,1]))) | |
236 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
237 ylim = c(-ylim[2], ylim[2]) | |
238 | |
239 | |
240 ##plot start | |
241 if (length(class)>2){ | |
242 for (k in 1:(length(class)-1)){ | |
243 for (l in (k+1):length(class)){ | |
244 #print(paste(class[k],"vs",class[l],sep=" ")) | |
245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
246 colvect<-NULL | 291 colvect<-NULL |
247 for (j in 1:length(classnames[[k]])) { | 292 for (j in 1:length(classnames[[k]])) { |
248 | 293 tic <- TIC[[classnames[[k]][j]]] |
249 tic <- TIC[[classnames[[k]][j]]] | 294 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") |
250 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | 295 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") |
251 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | 296 colvect<-append(colvect,cols[classnames[[k]][j]]) |
252 colvect<-append(colvect,cols[classnames[[k]][j]]) | 297 } |
253 } | 298 |
254 for (j in 1:length(classnames[[l]])) { | 299 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) |
255 # i=class2names[j] | 300 |
256 tic <- TIC[[classnames[[l]][j]]] | 301 }#end length ==1 |
257 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | 302 |
258 colvect<-append(colvect,cols[classnames[[l]][j]]) | 303 dev.off() #pdf(pdfname,w=16,h=10) |
259 } | 304 |
260 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | 305 invisible(TIC) |
261 } | |
262 } | |
263 }#end if length >2 | |
264 if (length(class)==2){ | |
265 k=1 | |
266 l=2 | |
267 | |
268 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
269 colvect<-NULL | |
270 for (j in 1:length(classnames[[k]])) { | |
271 tic <- TIC[[classnames[[k]][j]]] | |
272 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
273 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
274 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
275 } | |
276 for (j in 1:length(classnames[[l]])) { | |
277 # i=class2names[j] | |
278 tic <- TIC[[classnames[[l]][j]]] | |
279 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") | |
280 colvect<-append(colvect,cols[classnames[[l]][j]]) | |
281 } | |
282 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) | |
283 | |
284 }#end length ==2 | |
285 | |
286 #case where only one class | |
287 if (length(class)==1){ | |
288 k=1 | |
289 ylim = range(sapply(TIC, function(x) range(x[,2]))) | |
290 | |
291 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC") | |
292 colvect<-NULL | |
293 for (j in 1:length(classnames[[k]])) { | |
294 tic <- TIC[[classnames[[k]][j]]] | |
295 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") | |
296 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") | |
297 colvect<-append(colvect,cols[classnames[[k]][j]]) | |
298 } | |
299 | |
300 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch) | |
301 | |
302 }#end length ==1 | |
303 | |
304 dev.off() #pdf(pdfname,w=16,h=10) | |
305 | |
306 invisible(TIC) | |
307 } | 306 } |
308 | 307 |
309 | 308 |
310 | 309 |
311 ## | 310 ## |
312 ## Get the polarities from all the samples of a condition | 311 ## Get the polarities from all the samples of a condition |
313 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 312 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
314 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 313 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
315 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { | 314 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { |
316 cat("Creating the sampleMetadata file...\n") | 315 cat("Creating the sampleMetadata file...\n") |
317 | 316 |
318 #Create the sampleMetada dataframe | 317 #Create the sampleMetada dataframe |
319 sampleMetadata=xset@phenoData | 318 sampleMetadata=xset@phenoData |
320 sampleNamesOrigin=rownames(sampleMetadata) | 319 sampleNamesOrigin=rownames(sampleMetadata) |
321 sampleNamesMakeNames=make.names(sampleNamesOrigin) | 320 sampleNamesMakeNames=make.names(sampleNamesOrigin) |
322 | 321 |
323 if (any(duplicated(sampleNamesMakeNames))) { | 322 if (any(duplicated(sampleNamesMakeNames))) { |
324 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) | 323 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) |
325 for (sampleName in sampleNamesOrigin) { | 324 for (sampleName in sampleNamesOrigin) { |
326 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) | 325 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) |
327 } | 326 } |
328 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 327 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
329 } | 328 } |
330 | 329 |
331 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { | 330 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { |
332 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") | 331 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") |
333 for (sampleName in sampleNamesOrigin) { | 332 for (sampleName in sampleNamesOrigin) { |
334 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) | 333 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) |
335 } | 334 } |
336 } | 335 } |
337 | 336 |
338 sampleMetadata$sampleMetadata=sampleNamesMakeNames | 337 sampleMetadata$sampleMetadata=sampleNamesMakeNames |
339 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns | 338 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns |
340 rownames(sampleMetadata)=NULL | 339 rownames(sampleMetadata)=NULL |
341 | 340 |
342 #Create a list of files name in the current directory | 341 #Create a list of files name in the current directory |
343 list_files=xset@filepaths | 342 list_files=xset@filepaths |
344 #For each sample file, the following actions are done | 343 #For each sample file, the following actions are done |
345 for (file in list_files){ | 344 for (file in list_files){ |
346 #Check if the file is in the CDF format | 345 #Check if the file is in the CDF format |
347 if (!mzR:::netCDFIsFile(file)){ | 346 if (!mzR:::netCDFIsFile(file)){ |
348 | 347 |
349 # If the column isn't exist, with add one filled with NA | 348 # If the column isn't exist, with add one filled with NA |
350 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA | 349 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA |
351 | 350 |
352 #Create a simple xcmsRaw object for each sample | 351 #Create a simple xcmsRaw object for each sample |
353 xcmsRaw=xcmsRaw(file) | 352 xcmsRaw=xcmsRaw(file) |
354 #Extract the polarity (a list of polarities) | 353 #Extract the polarity (a list of polarities) |
355 polarity=xcmsRaw@polarity | 354 polarity=xcmsRaw@polarity |
356 #Verify if all the scans have the same polarity | 355 #Verify if all the scans have the same polarity |
357 uniq_list=unique(polarity) | 356 uniq_list=unique(polarity) |
358 if (length(uniq_list)>1){ | 357 if (length(uniq_list)>1){ |
359 polarity="mixed" | 358 polarity="mixed" |
360 } else { | 359 } else { |
361 polarity=as.character(uniq_list) | 360 polarity=as.character(uniq_list) |
362 } | 361 } |
363 #Transforms the character to obtain only the sample name | 362 #Transforms the character to obtain only the sample name |
364 filename=basename(file) | 363 filename=basename(file) |
365 library(tools) | 364 library(tools) |
366 samplename=file_path_sans_ext(filename) | 365 samplename=file_path_sans_ext(filename) |
367 | 366 |
368 #Set the polarity attribute | 367 #Set the polarity attribute |
369 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity | 368 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity |
370 | 369 |
371 #Delete xcmsRaw object because it creates a bug for the fillpeaks step | 370 #Delete xcmsRaw object because it creates a bug for the fillpeaks step |
372 rm(xcmsRaw) | 371 rm(xcmsRaw) |
373 } | 372 } |
374 | 373 |
375 } | 374 } |
376 | 375 |
377 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) | 376 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) |
378 | 377 |
379 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) | 378 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) |
380 | 379 |
381 } | 380 } |
382 | 381 |
383 | 382 |
384 ## | 383 ## |
385 ## This function check if xcms will found all the files | 384 ## This function check if xcms will found all the files |
386 ## | 385 ## |
387 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM | 386 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM |
388 checkFilesCompatibilityWithXcms <- function(directory) { | 387 checkFilesCompatibilityWithXcms <- function(directory) { |
389 cat("Checking files filenames compatibilities with xmcs...\n") | 388 cat("Checking files filenames compatibilities with xmcs...\n") |
390 # WHAT XCMS WILL FIND | 389 # WHAT XCMS WILL FIND |
391 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 390 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
392 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | 391 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") |
393 info <- file.info(directory) | 392 info <- file.info(directory) |
394 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | 393 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) |
395 files <- c(directory[!info$isdir], listed) | 394 files <- c(directory[!info$isdir], listed) |
396 files_abs <- file.path(getwd(), files) | 395 files_abs <- file.path(getwd(), files) |
397 exists <- file.exists(files_abs) | 396 exists <- file.exists(files_abs) |
398 files[exists] <- files_abs[exists] | 397 files[exists] <- files_abs[exists] |
399 files[exists] <- sub("//","/",files[exists]) | 398 files[exists] <- sub("//","/",files[exists]) |
400 | 399 |
401 # WHAT IS ON THE FILESYSTEM | 400 # WHAT IS ON THE FILESYSTEM |
402 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) | 401 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) |
403 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] | 402 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] |
404 | 403 |
405 # COMPARISON | 404 # COMPARISON |
406 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { | 405 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { |
407 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) | 406 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) |
408 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) | 407 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) |
409 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") | 408 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") |
410 | 409 } |
411 } | |
412 } | 410 } |
413 | 411 |
414 | 412 |
415 | 413 |
416 ## | 414 ## |
417 ## This function check if XML contains special caracters. It also checks integrity and completness. | 415 ## This function check if XML contains special caracters. It also checks integrity and completness. |
418 ## | 416 ## |
419 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 417 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
420 checkXmlStructure <- function (directory) { | 418 checkXmlStructure <- function (directory) { |
421 cat("Checking XML structure...\n") | 419 cat("Checking XML structure...\n") |
422 | 420 |
423 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") | 421 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") |
424 capture=system(cmd,intern=TRUE) | 422 capture=system(cmd,intern=TRUE) |
425 | 423 |
426 if (length(capture)>0){ | 424 if (length(capture)>0){ |
427 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) | 425 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) |
428 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) | 426 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) |
429 write(capture, stderr()) | 427 write(capture, stderr()) |
430 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") | 428 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") |
431 } | 429 } |
432 | 430 |
433 } | 431 } |
434 | 432 |
435 | 433 |
436 ## | 434 ## |
437 ## This function check if XML contain special characters | 435 ## This function check if XML contain special characters |
438 ## | 436 ## |
439 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM | 437 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM |
440 deleteXmlBadCharacters<- function (directory) { | 438 deleteXmlBadCharacters<- function (directory) { |
441 cat("Checking Non ASCII characters in the XML...\n") | 439 cat("Checking Non ASCII characters in the XML...\n") |
442 | 440 |
443 processed=F | 441 processed=F |
444 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) | 442 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) |
445 for (i in l){ | 443 for (i in l){ |
446 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") | 444 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") |
447 capture=suppressWarnings(system(cmd,intern=TRUE)) | 445 capture=suppressWarnings(system(cmd,intern=TRUE)) |
448 if (length(capture)>0){ | 446 if (length(capture)>0){ |
449 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) | 447 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) |
450 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) | 448 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) |
451 c=system(cmd,intern=TRUE) | 449 c=system(cmd,intern=TRUE) |
452 capture="" | 450 capture="" |
453 processed=T | 451 processed=T |
454 } | 452 } |
455 } | 453 } |
456 if (processed) cat("\n\n") | 454 if (processed) cat("\n\n") |
457 return(processed) | 455 return(processed) |
458 } | 456 } |
459 | 457 |
460 | 458 |
461 ## | 459 ## |
462 ## This function will compute MD5 checksum to check the data integrity | 460 ## This function will compute MD5 checksum to check the data integrity |
463 ## | 461 ## |
464 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr | 462 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr |
465 getMd5sum <- function (directory) { | 463 getMd5sum <- function (directory) { |
466 cat("Compute md5 checksum...\n") | 464 cat("Compute md5 checksum...\n") |
467 # WHAT XCMS WILL FIND | 465 # WHAT XCMS WILL FIND |
468 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") | 466 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") |
469 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") | 467 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") |
470 info <- file.info(directory) | 468 info <- file.info(directory) |
471 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) | 469 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) |
472 files <- c(directory[!info$isdir], listed) | 470 files <- c(directory[!info$isdir], listed) |
473 exists <- file.exists(files) | 471 exists <- file.exists(files) |
474 files <- files[exists] | 472 files <- files[exists] |
475 | 473 |
476 library(tools) | 474 library(tools) |
477 | 475 |
478 #cat("\n\n") | 476 #cat("\n\n") |
479 | 477 |
480 return(as.matrix(md5sum(files))) | 478 return(as.matrix(md5sum(files))) |
481 } | 479 } |