comparison lib.r @ 16:20a75ba4345b draft

planemo upload for repository https://github.com/workflow4metabolomics/xcms commit 22c4e92909198328fc7439ff47e4546a273eb907
author lecorguille
date Sun, 05 Feb 2017 08:57:02 -0500
parents c04568596f40
children 3bd1e74d4abc
comparison
equal deleted inserted replaced
15:c04568596f40 16:20a75ba4345b
34 #@author G. Le Corguille 34 #@author G. Le Corguille
35 # value: intensity values to be used into, maxo or intb 35 # value: intensity values to be used into, maxo or intb
36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) { 36 getPeaklistW4M <- function(xset, intval="into",convertRTMinute=F,numDigitsMZ=4,numDigitsRT=0,variableMetadataOutput,dataMatrixOutput) {
37 groups <- xset@groups 37 groups <- xset@groups
38 values <- groupval(xset, "medret", value=intval) 38 values <- groupval(xset, "medret", value=intval)
39 39
40 # renamming of the column rtmed to rt to fit with camera peaklist function output 40 # renamming of the column rtmed to rt to fit with camera peaklist function output
41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt" 41 colnames(groups)[colnames(groups)=="rtmed"] <- "rt"
42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz" 42 colnames(groups)[colnames(groups)=="mzmed"] <- "mz"
43 43
44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ) 44 ids <- formatIonIdentifiers(groups, numDigitsRT=numDigitsRT, numDigitsMZ=numDigitsMZ)
45 groups = RTSecondToMinute(groups, convertRTMinute) 45 groups = RTSecondToMinute(groups, convertRTMinute)
46 46
47 rownames(groups) = ids 47 rownames(groups) = ids
48 rownames(values) = ids 48 rownames(values) = ids
55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA) 55 write.table(values, file=dataMatrixOutput,sep="\t",quote=F,row.names = T,col.names = NA)
56 } 56 }
57 57
58 #@author Y. Guitton 58 #@author Y. Guitton
59 getBPC <- function(file,rtcor=NULL, ...) { 59 getBPC <- function(file,rtcor=NULL, ...) {
60 object <- xcmsRaw(file) 60 object <- xcmsRaw(file)
61 sel <- profRange(object, ...) 61 sel <- profRange(object, ...)
62 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE])) 62 cbind(if (is.null(rtcor)) object@scantime[sel$scanidx] else rtcor ,xcms:::colMax(object@env$profile[sel$massidx,sel$scanidx,drop=FALSE]))
63 #plotChrom(xcmsRaw(file), base=T) 63 #plotChrom(xcmsRaw(file), base=T)
64 } 64 }
65 65
66 #@author Y. Guitton 66 #@author Y. Guitton
67 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) { 67 getBPCs <- function (xcmsSet=NULL, pdfname="BPCs.pdf",rt=c("raw","corrected"), scanrange=NULL) {
68 cat("Creating BIC pdf...\n") 68 cat("Creating BIC pdf...\n")
69 69
70 if (is.null(xcmsSet)) { 70 if (is.null(xcmsSet)) {
71 cat("Enter an xcmsSet \n") 71 cat("Enter an xcmsSet \n")
72 stop() 72 stop()
73 } else { 73 } else {
74 files <- filepaths(xcmsSet) 74 files <- filepaths(xcmsSet)
75 } 75 }
76 76
77 class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class 77 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
78 78
79 classnames<-vector("list",length(class)) 79 classnames<-vector("list",length(phenoDataClass))
80 for (i in 1:length(class)){ 80 for (i in 1:length(phenoDataClass)){
81 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i]) 81 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i])
82 } 82 }
83 83
84 N <- dim(phenoData(xcmsSet))[1] 84 N <- dim(phenoData(xcmsSet))[1]
85 85
86 TIC <- vector("list",N) 86 TIC <- vector("list",N)
87 87
88 88
89 for (j in 1:N) { 89 for (j in 1:N) {
90 90
91 TIC[[j]] <- getBPC(files[j]) 91 TIC[[j]] <- getBPC(files[j])
92 #good for raw 92 #good for raw
93 # seems strange for corrected 93 # seems strange for corrected
94 #errors if scanrange used in xcmsSetgeneration 94 #errors if scanrange used in xcmsSetgeneration
95 if (!is.null(xcmsSet) && rt == "corrected") 95 if (!is.null(xcmsSet) && rt == "corrected")
96 rtcor <- xcmsSet@rt$corrected[[j]] else 96 rtcor <- xcmsSet@rt$corrected[[j]]
97 rtcor <- NULL 97 else
98 98 rtcor <- NULL
99 TIC[[j]] <- getBPC(files[j],rtcor=rtcor) 99
100 # TIC[[j]][,1]<-rtcor 100 TIC[[j]] <- getBPC(files[j],rtcor=rtcor)
101 } 101 # TIC[[j]][,1]<-rtcor
102 102 }
103 103
104 104
105 pdf(pdfname,w=16,h=10) 105
106 cols <- rainbow(N) 106 pdf(pdfname,w=16,h=10)
107 lty = 1:N 107 cols <- rainbow(N)
108 pch = 1:N 108 lty = 1:N
109 #search for max x and max y in BPCs 109 pch = 1:N
110 xlim = range(sapply(TIC, function(x) range(x[,1]))) 110 #search for max x and max y in BPCs
111 ylim = range(sapply(TIC, function(x) range(x[,2]))) 111 xlim = range(sapply(TIC, function(x) range(x[,1])))
112 ylim = c(-ylim[2], ylim[2]) 112 ylim = range(sapply(TIC, function(x) range(x[,2])))
113 113 ylim = c(-ylim[2], ylim[2])
114 114
115 ##plot start 115
116 116 ##plot start
117 if (length(class)>2){ 117
118 for (k in 1:(length(class)-1)){ 118 if (length(phenoDataClass)>2){
119 for (l in (k+1):length(class)){ 119 for (k in 1:(length(phenoDataClass)-1)){
120 #print(paste(class[k],"vs",class[l],sep=" ")) 120 for (l in (k+1):length(phenoDataClass)){
121 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 121 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
122 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
123 colvect<-NULL
124 for (j in 1:length(classnames[[k]])) {
125 tic <- TIC[[classnames[[k]][j]]]
126 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
127 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
128 colvect<-append(colvect,cols[classnames[[k]][j]])
129 }
130 for (j in 1:length(classnames[[l]])) {
131 # i=class2names[j]
132 tic <- TIC[[classnames[[l]][j]]]
133 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
134 colvect<-append(colvect,cols[classnames[[l]][j]])
135 }
136 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
137 }
138 }
139 }#end if length >2
140
141 if (length(phenoDataClass)==2){
142 k=1
143 l=2
144 colvect<-NULL
145 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
146
147 for (j in 1:length(classnames[[k]])) {
148
149 tic <- TIC[[classnames[[k]][j]]]
150 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
151 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
152 colvect<-append(colvect,cols[classnames[[k]][j]])
153 }
154 for (j in 1:length(classnames[[l]])) {
155 # i=class2names[j]
156 tic <- TIC[[classnames[[l]][j]]]
157 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
158 colvect<-append(colvect,cols[classnames[[l]][j]])
159 }
160 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
161
162 }#end length ==2
163
164 #case where only one class
165 if (length(phenoDataClass)==1){
166 k=1
167 ylim = range(sapply(TIC, function(x) range(x[,2])))
168 colvect<-NULL
169 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
170
171 for (j in 1:length(classnames[[k]])) {
172 tic <- TIC[[classnames[[k]][j]]]
173 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
174 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
175 colvect<-append(colvect,cols[classnames[[k]][j]])
176 }
177
178 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
179
180 }#end length ==1
181
182 dev.off() #pdf(pdfname,w=16,h=10)
183
184 invisible(TIC)
185 }
186
187
188
189 #@author Y. Guitton
190 getTIC <- function(file,rtcor=NULL) {
191 object <- xcmsRaw(file)
192 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
193 }
194
195 ##
196 ## overlay TIC from all files in current folder or from xcmsSet, create pdf
197 ##
198 #@author Y. Guitton
199 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) {
200 cat("Creating TIC pdf...\n")
201
202 if (is.null(xcmsSet)) {
203 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
204 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
205 if (is.null(files))
206 files <- getwd()
207 info <- file.info(files)
208 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
209 files <- c(files[!info$isdir], listed)
210 } else {
211 files <- filepaths(xcmsSet)
212 }
213
214 phenoDataClass<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
215 classnames<-vector("list",length(phenoDataClass))
216 for (i in 1:length(phenoDataClass)){
217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==phenoDataClass[i])
218 }
219
220 N <- length(files)
221 TIC <- vector("list",N)
222
223 for (i in 1:N) {
224 if (!is.null(xcmsSet) && rt == "corrected")
225 rtcor <- xcmsSet@rt$corrected[[i]] else
226 rtcor <- NULL
227 TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
228 }
229
230 pdf(pdfname,w=16,h=10)
231 cols <- rainbow(N)
232 lty = 1:N
233 pch = 1:N
234 #search for max x and max y in TICs
235 xlim = range(sapply(TIC, function(x) range(x[,1])))
236 ylim = range(sapply(TIC, function(x) range(x[,2])))
237 ylim = c(-ylim[2], ylim[2])
238
239
240 ##plot start
241 if (length(phenoDataClass)>2){
242 for (k in 1:(length(phenoDataClass)-1)){
243 for (l in (k+1):length(phenoDataClass)){
244 #print(paste(phenoDataClass[k],"vs",phenoDataClass[l],sep=" "))
245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k]," vs ",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
246 colvect<-NULL
247 for (j in 1:length(classnames[[k]])) {
248 tic <- TIC[[classnames[[k]][j]]]
249 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
250 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
251 colvect<-append(colvect,cols[classnames[[k]][j]])
252 }
253 for (j in 1:length(classnames[[l]])) {
254 # i=class2names[j]
255 tic <- TIC[[classnames[[l]][j]]]
256 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
257 colvect<-append(colvect,cols[classnames[[l]][j]])
258 }
259 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
260 }
261 }
262 }#end if length >2
263 if (length(phenoDataClass)==2){
264 k=1
265 l=2
266
267 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k],"vs",phenoDataClass[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
122 colvect<-NULL 268 colvect<-NULL
123 for (j in 1:length(classnames[[k]])) { 269 for (j in 1:length(classnames[[k]])) {
124 tic <- TIC[[classnames[[k]][j]]] 270 tic <- TIC[[classnames[[k]][j]]]
125 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 271 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
126 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 272 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
127 colvect<-append(colvect,cols[classnames[[k]][j]]) 273 colvect<-append(colvect,cols[classnames[[k]][j]])
128 } 274 }
129 for (j in 1:length(classnames[[l]])) { 275 for (j in 1:length(classnames[[l]])) {
130 # i=class2names[j] 276 # i=class2names[j]
131 tic <- TIC[[classnames[[l]][j]]] 277 tic <- TIC[[classnames[[l]][j]]]
132 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") 278 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
133 colvect<-append(colvect,cols[classnames[[l]][j]]) 279 colvect<-append(colvect,cols[classnames[[l]][j]])
134 } 280 }
135 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 281 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
136 } 282
137 } 283 }#end length ==2
138 }#end if length >2 284
139 285 #case where only one class
140 if (length(class)==2){ 286 if (length(phenoDataClass)==1){
141 k=1 287 k=1
142 l=2 288 ylim = range(sapply(TIC, function(x) range(x[,2])))
143 colvect<-NULL 289
144 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "BPC") 290 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",phenoDataClass[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
145
146 for (j in 1:length(classnames[[k]])) {
147
148 tic <- TIC[[classnames[[k]][j]]]
149 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
150 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
151 colvect<-append(colvect,cols[classnames[[k]][j]])
152 }
153 for (j in 1:length(classnames[[l]])) {
154 # i=class2names[j]
155 tic <- TIC[[classnames[[l]][j]]]
156 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
157 colvect<-append(colvect,cols[classnames[[l]][j]])
158 }
159 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
160
161 }#end length ==2
162
163 #case where only one class
164 if (length(class)==1){
165 k=1
166 ylim = range(sapply(TIC, function(x) range(x[,2])))
167 colvect<-NULL
168 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Base Peak Chromatograms \n","BPCs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "BPC")
169
170 for (j in 1:length(classnames[[k]])) {
171 tic <- TIC[[classnames[[k]][j]]]
172 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
173 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
174 colvect<-append(colvect,cols[classnames[[k]][j]])
175 }
176
177 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
178
179 }#end length ==1
180
181 dev.off() #pdf(pdfname,w=16,h=10)
182
183 invisible(TIC)
184 }
185
186
187
188 #@author Y. Guitton
189 getTIC <- function(file,rtcor=NULL) {
190 object <- xcmsRaw(file)
191 cbind(if (is.null(rtcor)) object@scantime else rtcor, rawEIC(object,mzrange=range(object@env$mz))$intensity)
192 }
193
194 ##
195 ## overlay TIC from all files in current folder or from xcmsSet, create pdf
196 ##
197 #@author Y. Guitton
198 getTICs <- function(xcmsSet=NULL,files=NULL, pdfname="TICs.pdf",rt=c("raw","corrected")) {
199 cat("Creating TIC pdf...\n")
200
201 if (is.null(xcmsSet)) {
202 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]", "[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
203 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""), collapse = "|")
204 if (is.null(files))
205 files <- getwd()
206 info <- file.info(files)
207 listed <- list.files(files[info$isdir], pattern = filepattern, recursive = TRUE, full.names = TRUE)
208 files <- c(files[!info$isdir], listed)
209 } else {
210 files <- filepaths(xcmsSet)
211 }
212
213 class<-as.vector(levels(xcmsSet@phenoData[,1])) #sometime phenoData have more than 1 column use first as class
214
215 classnames<-vector("list",length(class))
216 for (i in 1:length(class)){
217 classnames[[i]]<-which( xcmsSet@phenoData[,1]==class[i])
218 }
219
220 N <- length(files)
221 TIC <- vector("list",N)
222
223 for (i in 1:N) {
224 if (!is.null(xcmsSet) && rt == "corrected")
225 rtcor <- xcmsSet@rt$corrected[[i]] else
226 rtcor <- NULL
227 TIC[[i]] <- getTIC(files[i],rtcor=rtcor)
228 }
229
230 pdf(pdfname,w=16,h=10)
231 cols <- rainbow(N)
232 lty = 1:N
233 pch = 1:N
234 #search for max x and max y in TICs
235 xlim = range(sapply(TIC, function(x) range(x[,1])))
236 ylim = range(sapply(TIC, function(x) range(x[,2])))
237 ylim = c(-ylim[2], ylim[2])
238
239
240 ##plot start
241 if (length(class)>2){
242 for (k in 1:(length(class)-1)){
243 for (l in (k+1):length(class)){
244 #print(paste(class[k],"vs",class[l],sep=" "))
245 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k]," vs ",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
246 colvect<-NULL 291 colvect<-NULL
247 for (j in 1:length(classnames[[k]])) { 292 for (j in 1:length(classnames[[k]])) {
248 293 tic <- TIC[[classnames[[k]][j]]]
249 tic <- TIC[[classnames[[k]][j]]] 294 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
250 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l") 295 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
251 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l") 296 colvect<-append(colvect,cols[classnames[[k]][j]])
252 colvect<-append(colvect,cols[classnames[[k]][j]]) 297 }
253 } 298
254 for (j in 1:length(classnames[[l]])) { 299 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
255 # i=class2names[j] 300
256 tic <- TIC[[classnames[[l]][j]]] 301 }#end length ==1
257 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l") 302
258 colvect<-append(colvect,cols[classnames[[l]][j]]) 303 dev.off() #pdf(pdfname,w=16,h=10)
259 } 304
260 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch) 305 invisible(TIC)
261 }
262 }
263 }#end if length >2
264 if (length(class)==2){
265 k=1
266 l=2
267
268 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k],"vs",class[l], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
269 colvect<-NULL
270 for (j in 1:length(classnames[[k]])) {
271 tic <- TIC[[classnames[[k]][j]]]
272 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
273 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
274 colvect<-append(colvect,cols[classnames[[k]][j]])
275 }
276 for (j in 1:length(classnames[[l]])) {
277 # i=class2names[j]
278 tic <- TIC[[classnames[[l]][j]]]
279 points(tic[,1]/60, -tic[,2], col = cols[classnames[[l]][j]], pch = pch[classnames[[l]][j]], type="l")
280 colvect<-append(colvect,cols[classnames[[l]][j]])
281 }
282 legend("topright",paste(basename(files[c(classnames[[k]],classnames[[l]])])), col = colvect, lty = lty, pch = pch)
283
284 }#end length ==2
285
286 #case where only one class
287 if (length(class)==1){
288 k=1
289 ylim = range(sapply(TIC, function(x) range(x[,2])))
290
291 plot(0, 0, type="n", xlim = xlim/60, ylim = ylim, main = paste("Total Ion Chromatograms \n","TICs_",class[k], sep=""), xlab = "Retention Time (min)", ylab = "TIC")
292 colvect<-NULL
293 for (j in 1:length(classnames[[k]])) {
294 tic <- TIC[[classnames[[k]][j]]]
295 # points(tic[,1]/60, tic[,2], col = cols[i], pch = pch[i], type="l")
296 points(tic[,1]/60, tic[,2], col = cols[classnames[[k]][j]], pch = pch[classnames[[k]][j]], type="l")
297 colvect<-append(colvect,cols[classnames[[k]][j]])
298 }
299
300 legend("topright",paste(basename(files[c(classnames[[k]])])), col = colvect, lty = lty, pch = pch)
301
302 }#end length ==1
303
304 dev.off() #pdf(pdfname,w=16,h=10)
305
306 invisible(TIC)
307 } 306 }
308 307
309 308
310 309
311 ## 310 ##
312 ## Get the polarities from all the samples of a condition 311 ## Get the polarities from all the samples of a condition
313 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 312 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
314 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 313 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
315 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") { 314 getSampleMetadata <- function(xcmsSet=NULL, sampleMetadataOutput="sampleMetadata.tsv") {
316 cat("Creating the sampleMetadata file...\n") 315 cat("Creating the sampleMetadata file...\n")
317 316
318 #Create the sampleMetada dataframe 317 #Create the sampleMetada dataframe
319 sampleMetadata=xset@phenoData 318 sampleMetadata=xset@phenoData
320 sampleNamesOrigin=rownames(sampleMetadata) 319 sampleNamesOrigin=rownames(sampleMetadata)
321 sampleNamesMakeNames=make.names(sampleNamesOrigin) 320 sampleNamesMakeNames=make.names(sampleNamesOrigin)
322 321
323 if (any(duplicated(sampleNamesMakeNames))) { 322 if (any(duplicated(sampleNamesMakeNames))) {
324 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr()) 323 write("\n\nERROR: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names().\nIn your case, at least two columns after the renaming obtain the same name, thus XCMS will collapse those columns per name.", stderr())
325 for (sampleName in sampleNamesOrigin) { 324 for (sampleName in sampleNamesOrigin) {
326 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr()) 325 write(paste(sampleName,"\t->\t",make.names(sampleName)),stderr())
327 } 326 }
328 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 327 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
329 } 328 }
330 329
331 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) { 330 if (!all(sampleNamesOrigin == sampleNamesMakeNames)) {
332 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n") 331 cat("\n\nWARNING: Usually, R has trouble to deal with special characters in its column names, so it rename them using make.names()\nIn your case, one or more sample names will be renamed in the sampleMetadata and dataMatrix files:\n")
333 for (sampleName in sampleNamesOrigin) { 332 for (sampleName in sampleNamesOrigin) {
334 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n")) 333 cat(paste(sampleName,"\t->\t",make.names(sampleName),"\n"))
335 } 334 }
336 } 335 }
337 336
338 sampleMetadata$sampleMetadata=sampleNamesMakeNames 337 sampleMetadata$sampleMetadata=sampleNamesMakeNames
339 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns 338 sampleMetadata=cbind(sampleMetadata["sampleMetadata"],sampleMetadata["class"]) #Reorder columns
340 rownames(sampleMetadata)=NULL 339 rownames(sampleMetadata)=NULL
341 340
342 #Create a list of files name in the current directory 341 #Create a list of files name in the current directory
343 list_files=xset@filepaths 342 list_files=xset@filepaths
344 #For each sample file, the following actions are done 343 #For each sample file, the following actions are done
345 for (file in list_files){ 344 for (file in list_files){
346 #Check if the file is in the CDF format 345 #Check if the file is in the CDF format
347 if (!mzR:::netCDFIsFile(file)){ 346 if (!mzR:::netCDFIsFile(file)){
348 347
349 # If the column isn't exist, with add one filled with NA 348 # If the column isn't exist, with add one filled with NA
350 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA 349 if (is.null(sampleMetadata$polarity)) sampleMetadata$polarity=NA
351 350
352 #Create a simple xcmsRaw object for each sample 351 #Create a simple xcmsRaw object for each sample
353 xcmsRaw=xcmsRaw(file) 352 xcmsRaw=xcmsRaw(file)
354 #Extract the polarity (a list of polarities) 353 #Extract the polarity (a list of polarities)
355 polarity=xcmsRaw@polarity 354 polarity=xcmsRaw@polarity
356 #Verify if all the scans have the same polarity 355 #Verify if all the scans have the same polarity
357 uniq_list=unique(polarity) 356 uniq_list=unique(polarity)
358 if (length(uniq_list)>1){ 357 if (length(uniq_list)>1){
359 polarity="mixed" 358 polarity="mixed"
360 } else { 359 } else {
361 polarity=as.character(uniq_list) 360 polarity=as.character(uniq_list)
362 } 361 }
363 #Transforms the character to obtain only the sample name 362 #Transforms the character to obtain only the sample name
364 filename=basename(file) 363 filename=basename(file)
365 library(tools) 364 library(tools)
366 samplename=file_path_sans_ext(filename) 365 samplename=file_path_sans_ext(filename)
367 366
368 #Set the polarity attribute 367 #Set the polarity attribute
369 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity 368 sampleMetadata$polarity[sampleMetadata$sampleMetadata==samplename]=polarity
370 369
371 #Delete xcmsRaw object because it creates a bug for the fillpeaks step 370 #Delete xcmsRaw object because it creates a bug for the fillpeaks step
372 rm(xcmsRaw) 371 rm(xcmsRaw)
373 } 372 }
374 373
375 } 374 }
376 375
377 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput) 376 write.table(sampleMetadata, sep="\t", quote=FALSE, row.names=FALSE, file=sampleMetadataOutput)
378 377
379 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames)) 378 return(list("sampleNamesOrigin"=sampleNamesOrigin,"sampleNamesMakeNames"=sampleNamesMakeNames))
380 379
381 } 380 }
382 381
383 382
384 ## 383 ##
385 ## This function check if xcms will found all the files 384 ## This function check if xcms will found all the files
386 ## 385 ##
387 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM 386 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr ABiMS TEAM
388 checkFilesCompatibilityWithXcms <- function(directory) { 387 checkFilesCompatibilityWithXcms <- function(directory) {
389 cat("Checking files filenames compatibilities with xmcs...\n") 388 cat("Checking files filenames compatibilities with xmcs...\n")
390 # WHAT XCMS WILL FIND 389 # WHAT XCMS WILL FIND
391 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 390 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
392 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 391 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
393 info <- file.info(directory) 392 info <- file.info(directory)
394 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) 393 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
395 files <- c(directory[!info$isdir], listed) 394 files <- c(directory[!info$isdir], listed)
396 files_abs <- file.path(getwd(), files) 395 files_abs <- file.path(getwd(), files)
397 exists <- file.exists(files_abs) 396 exists <- file.exists(files_abs)
398 files[exists] <- files_abs[exists] 397 files[exists] <- files_abs[exists]
399 files[exists] <- sub("//","/",files[exists]) 398 files[exists] <- sub("//","/",files[exists])
400 399
401 # WHAT IS ON THE FILESYSTEM 400 # WHAT IS ON THE FILESYSTEM
402 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T) 401 filesystem_filepaths=system(paste("find $PWD/",directory," -not -name '\\.*' -not -path '*conda-env*' -type f -name \"*\"", sep=""), intern=T)
403 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)] 402 filesystem_filepaths=filesystem_filepaths[grep(filepattern, filesystem_filepaths, perl=T)]
404 403
405 # COMPARISON 404 # COMPARISON
406 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) { 405 if (!is.na(table(filesystem_filepaths %in% files)["FALSE"])) {
407 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr()) 406 write("\n\nERROR: List of the files which will not be imported by xcmsSet",stderr())
408 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr()) 407 write(filesystem_filepaths[!(filesystem_filepaths %in% files)],stderr())
409 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.") 408 stop("\n\nERROR: One or more of your files will not be import by xcmsSet. It may due to bad characters in their filenames.")
410 409 }
411 }
412 } 410 }
413 411
414 412
415 413
416 ## 414 ##
417 ## This function check if XML contains special caracters. It also checks integrity and completness. 415 ## This function check if XML contains special caracters. It also checks integrity and completness.
418 ## 416 ##
419 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 417 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
420 checkXmlStructure <- function (directory) { 418 checkXmlStructure <- function (directory) {
421 cat("Checking XML structure...\n") 419 cat("Checking XML structure...\n")
422 420
423 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;") 421 cmd=paste("IFS=$'\n'; for xml in $(find",directory,"-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'); do if [ $(xmllint --nonet --noout \"$xml\" 2> /dev/null; echo $?) -gt 0 ]; then echo $xml;fi; done;")
424 capture=system(cmd,intern=TRUE) 422 capture=system(cmd,intern=TRUE)
425 423
426 if (length(capture)>0){ 424 if (length(capture)>0){
427 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture) 425 #message=paste("The following mzXML or mzML file is incorrect, please check these files first:",capture)
428 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr()) 426 write("\n\nERROR: The following mzXML or mzML file(s) are incorrect, please check these files first:", stderr())
429 write(capture, stderr()) 427 write(capture, stderr())
430 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files") 428 stop("ERROR: xcmsSet cannot continue with incorrect mzXML or mzML files")
431 } 429 }
432 430
433 } 431 }
434 432
435 433
436 ## 434 ##
437 ## This function check if XML contain special characters 435 ## This function check if XML contain special characters
438 ## 436 ##
439 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM 437 #@author Misharl Monsoor misharl.monsoor@sb-roscoff.fr ABiMS TEAM
440 deleteXmlBadCharacters<- function (directory) { 438 deleteXmlBadCharacters<- function (directory) {
441 cat("Checking Non ASCII characters in the XML...\n") 439 cat("Checking Non ASCII characters in the XML...\n")
442 440
443 processed=F 441 processed=F
444 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE) 442 l=system( paste("find",directory, "-not -name '\\.*' -not -path '*conda-env*' -type f -iname '*.*ml*'"),intern=TRUE)
445 for (i in l){ 443 for (i in l){
446 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="") 444 cmd=paste("LC_ALL=C grep '[^ -~]' \"",i,"\"",sep="")
447 capture=suppressWarnings(system(cmd,intern=TRUE)) 445 capture=suppressWarnings(system(cmd,intern=TRUE))
448 if (length(capture)>0){ 446 if (length(capture)>0){
449 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i) 447 cmd=paste("perl -i -pe 's/[^[:ascii:]]//g;'",i)
450 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") ) 448 print( paste("WARNING: Non ASCII characters have been removed from the ",i,"file") )
451 c=system(cmd,intern=TRUE) 449 c=system(cmd,intern=TRUE)
452 capture="" 450 capture=""
453 processed=T 451 processed=T
454 } 452 }
455 } 453 }
456 if (processed) cat("\n\n") 454 if (processed) cat("\n\n")
457 return(processed) 455 return(processed)
458 } 456 }
459 457
460 458
461 ## 459 ##
462 ## This function will compute MD5 checksum to check the data integrity 460 ## This function will compute MD5 checksum to check the data integrity
463 ## 461 ##
464 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr 462 #@author Gildas Le Corguille lecorguille@sb-roscoff.fr
465 getMd5sum <- function (directory) { 463 getMd5sum <- function (directory) {
466 cat("Compute md5 checksum...\n") 464 cat("Compute md5 checksum...\n")
467 # WHAT XCMS WILL FIND 465 # WHAT XCMS WILL FIND
468 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]") 466 filepattern <- c("[Cc][Dd][Ff]", "[Nn][Cc]", "([Mm][Zz])?[Xx][Mm][Ll]","[Mm][Zz][Dd][Aa][Tt][Aa]", "[Mm][Zz][Mm][Ll]")
469 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|") 467 filepattern <- paste(paste("\\.", filepattern, "$", sep = ""),collapse = "|")
470 info <- file.info(directory) 468 info <- file.info(directory)
471 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE) 469 listed <- list.files(directory[info$isdir], pattern = filepattern,recursive = TRUE, full.names = TRUE)
472 files <- c(directory[!info$isdir], listed) 470 files <- c(directory[!info$isdir], listed)
473 exists <- file.exists(files) 471 exists <- file.exists(files)
474 files <- files[exists] 472 files <- files[exists]
475 473
476 library(tools) 474 library(tools)
477 475
478 #cat("\n\n") 476 #cat("\n\n")
479 477
480 return(as.matrix(md5sum(files))) 478 return(as.matrix(md5sum(files)))
481 } 479 }