comparison lib.r @ 36:b6a8143c397c draft

planemo upload commit aa538ae477bc2f901c95d916e11d70eab75b5e00
author lecorguille
date Fri, 01 Feb 2019 11:29:35 -0500
parents b02797fbead1
children 636e36a64d31
comparison
equal deleted inserted replaced
35:b02797fbead1 36:b6a8143c397c
32 system(paste0("gm convert ",filebase,"_box/*.png ",pdfBoxOutput)) 32 system(paste0("gm convert ",filebase,"_box/*.png ",pdfBoxOutput))
33 33
34 } 34 }
35 35
36 #@author G. Le Corguille 36 #@author G. Le Corguille
37 #The function create a zip archive from the different png generated by diffreport
38 diffreport_png2zip <- function() {
39 zip("eic.zip",dir(pattern="_eic"))
40 zip("box.zip",dir(pattern="_box"))
41 }
42
43 #The function create a zip archive from the different tabular generated by diffreport
44 diffreport_tabular2zip <- function() {
45 zip("tabular.zip",dir(pattern="tabular/*"))
46 }
47
48 #@author G. Le Corguille
37 #This function convert if it is required the Retention Time in minutes 49 #This function convert if it is required the Retention Time in minutes
38 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { 50 RTSecondToMinute <- function(variableMetadata, convertRTMinute) {
39 if (convertRTMinute){ 51 if (convertRTMinute){
40 #converting the retention times (seconds) into minutes 52 #converting the retention times (seconds) into minutes
41 print("converting the retention times into minutes in the variableMetadata") 53 print("converting the retention times into minutes in the variableMetadata")
55 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) 67 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))])
56 return(variableMetadata) 68 return(variableMetadata)
57 } 69 }
58 70
59 #The function annotateDiffreport without the corr function which bugs 71 #The function annotateDiffreport without the corr function which bugs
60 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv", dataMatrixOutput="dataMatrix.tsv") { 72 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv") {
61 # Resolve the bug with x11, with the function png 73 # Resolve the bug with x11, with the function png
62 options(bitmapType='cairo') 74 options(bitmapType='cairo')
63 75
64 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. 76 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
65 res=try(is.null(xset@filled)) 77 res=try(is.null(xset@filled))
105 117
106 # launch annotate 118 # launch annotate
107 xa = do.call("annotate", listArguments4annotate) 119 xa = do.call("annotate", listArguments4annotate)
108 peakList=getPeaklist(xa,intval=listArguments[["intval"]]) 120 peakList=getPeaklist(xa,intval=listArguments[["intval"]])
109 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); 121 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name");
110
111 # --- dataMatrix ---
112 dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c("name", make.names(sampnames(xa@xcmsSet))))]
113 write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput)
114
115 122
116 # --- Multi condition : diffreport --- 123 # --- Multi condition : diffreport ---
117 diffrepOri=NULL 124 diffrepOri=NULL
118 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) { 125 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) {
119 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. 126 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped.
147 154
148 dir.create("tabular") 155 dir.create("tabular")
149 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep="")) 156 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep=""))
150 157
151 if (listArguments[["eicmax"]] != 0) { 158 if (listArguments[["eicmax"]] != 0) {
152 diffreport_png2pdf(filebase) 159 if (listArguments[["png2"]] == "pdf")
160 diffreport_png2pdf(filebase)
153 } 161 }
154 } 162 }
155 } 163 }
156 } 164 }
165 if (listArguments[["png2"]] == "zip")
166 diffreport_png2zip()
167 if (listArguments[["tabular2"]] == "zip")
168 diffreport_tabular2zip()
157 } 169 }
158 170
159 # --- variableMetadata --- 171 # --- variableMetadata ---
160 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] 172 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))]
161 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]]) 173 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]])
328 ############################################################ 340 ############################################################
329 ## getEIC 341 ## getEIC
330 getEIC <- function(object, mzrange, rtrange = 200, 342 getEIC <- function(object, mzrange, rtrange = 200,
331 groupidx, sampleidx = sampnames(object), 343 groupidx, sampleidx = sampnames(object),
332 rt = c("corrected", "raw")) { 344 rt = c("corrected", "raw")) {
333 345
334 files <- filepaths(object) 346 files <- filepaths(object)
335 grp <- groups(object) 347 grp <- groups(object)
336 samp <- sampnames(object) 348 samp <- sampnames(object)
337 prof <- profinfo(object) 349 prof <- profinfo(object)
338 350
339 rt <- match.arg(rt) 351 rt <- match.arg(rt)
340 352
341 if (is.numeric(sampleidx)) 353 if (is.numeric(sampleidx))
342 sampleidx <- sampnames(object)[sampleidx] 354 sampleidx <- sampnames(object)[sampleidx]
343 sampidx <- match(sampleidx, sampnames(object)) 355 sampidx <- match(sampleidx, sampnames(object))
344 356
345 if (!missing(groupidx)) { 357 if (!missing(groupidx)) {
346 if (is.numeric(groupidx)) 358 if (is.numeric(groupidx))
347 groupidx <- groupnames(object)[unique(as.integer(groupidx))] 359 groupidx <- groupnames(object)[unique(as.integer(groupidx))]
348 grpidx <- match(groupidx, groupnames(object, template = groupidx)) 360 grpidx <- match(groupidx, groupnames(object, template = groupidx))
349 } 361 }
350 362
351 if (missing(mzrange)) { 363 if (missing(mzrange)) {
352 if (missing(groupidx)) 364 if (missing(groupidx))
353 stop("No m/z range or groups specified") 365 stop("No m/z range or groups specified")
354 if (any(is.na(groupval(object, value = "mz")))) 366 if (any(is.na(groupval(object, value = "mz"))))
355 warning( 367 warning(
367 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange))) 379 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange)))
368 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE] 380 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE]
369 else if (is.null(dim(mzrange))) 381 else if (is.null(dim(mzrange)))
370 stop("mzrange must be a matrix") 382 stop("mzrange must be a matrix")
371 colnames(mzrange) <- c("mzmin", "mzmax") 383 colnames(mzrange) <- c("mzmin", "mzmax")
372 384
373 if (length(rtrange) == 1) { 385 if (length(rtrange) == 1) {
374 if (missing(groupidx)) 386 if (missing(groupidx))
375 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)), 387 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)),
376 ncol = 2, byrow = TRUE) 388 ncol = 2, byrow = TRUE)
377 else { 389 else {
378 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange) 390 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange)
379 } 391 }
380 } else if (is.null(dim(rtrange))) 392 } else if (is.null(dim(rtrange)))
381 stop("rtrange must be a matrix or single number") 393 stop("rtrange must be a matrix or single number")
382 colnames(rtrange) <- c("rtmin", "rtmax") 394 colnames(rtrange) <- c("rtmin", "rtmax")
383 395
384 ## Ensure that we've got corrected retention time if requested. 396 ## Ensure that we've got corrected retention time if requested.
385 if (is.null(object@rt[[rt]])) 397 if (is.null(object@rt[[rt]]))
386 stop(rt, " retention times not present in 'object'!") 398 stop(rt, " retention times not present in 'object'!")
387 399
388 ## Ensure that the defined retention time range is within the rtrange of the 400 ## Ensure that the defined retention time range is within the rtrange of the
389 ## object: we're using the max minimal rt of all files and the min maximal rt 401 ## object: we're using the max minimal rt of all files and the min maximal rt
390 rtrs <- lapply(object@rt[[rt]], range) 402 rtrs <- lapply(object@rt[[rt]], range)
391 rtr <- c(max(unlist(lapply(rtrs, "[", 1))), 403 rtr <- c(max(unlist(lapply(rtrs, "[", 1))),
392 min(unlist(lapply(rtrs, "[", 2)))) 404 min(unlist(lapply(rtrs, "[", 2))))
404 if (any(lower_rt_outside) | any(upper_rt_outside)) { 416 if (any(lower_rt_outside) | any(upper_rt_outside)) {
405 ## Silently fix these ranges. 417 ## Silently fix these ranges.
406 rtrange[lower_rt_outside, "rtmin"] <- rtr[1] 418 rtrange[lower_rt_outside, "rtmin"] <- rtr[1]
407 rtrange[upper_rt_outside, "rtmax"] <- rtr[2] 419 rtrange[upper_rt_outside, "rtmax"] <- rtr[2]
408 } 420 }
409 421
410 if (missing(groupidx)) 422 if (missing(groupidx))
411 gnames <- character(0) 423 gnames <- character(0)
412 else 424 else
413 gnames <- groupidx 425 gnames <- groupidx
414 426
415 eic <- vector("list", length(sampleidx)) 427 eic <- vector("list", length(sampleidx))
416 names(eic) <- sampleidx 428 names(eic) <- sampleidx
417 429
418 for (i in seq(along = sampidx)) { 430 for (i in seq(along = sampidx)) {
419 431
420 ## cat(sampleidx[i], "") 432 ## cat(sampleidx[i], "")
421 flush.console() 433 flush.console()
422 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other 434 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other
423 ## stuff. 435 ## stuff.
424 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt) 436 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt)
426 eic[[i]] <- currenteic@eic[[1]] 438 eic[[i]] <- currenteic@eic[[1]]
427 rm(lcraw) 439 rm(lcraw)
428 gc() 440 gc()
429 } 441 }
430 ## cat("\n") 442 ## cat("\n")
431 443
432 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange, 444 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange,
433 rt = rt, groupnames = gnames)) 445 rt = rt, groupnames = gnames))
434 } 446 }
435 447
436 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 448 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7
447 classeic = c(class1,class2), 459 classeic = c(class1,class2),
448 value = c("into","maxo","intb"), 460 value = c("into","maxo","intb"),
449 metlin = FALSE, 461 metlin = FALSE,
450 h = 480, w = 640, mzdec=2, 462 h = 480, w = 640, mzdec=2,
451 missing = numeric(), ...) { 463 missing = numeric(), ...) {
452 464
453 if ( nrow(object@groups)<1 || length(object@groupidx) <1) { 465 if ( nrow(object@groups)<1 || length(object@groupidx) <1) {
454 stop("No group information. Use group().") 466 stop("No group information. Use group().")
455 } 467 }
456 468
457 if (!is.numeric(w) || !is.numeric(h)) 469 if (!is.numeric(w) || !is.numeric(h))
458 stop("'h' and 'w' have to be numeric") 470 stop("'h' and 'w' have to be numeric")
459 ## require(multtest) || stop("Couldn't load multtest") 471 ## require(multtest) || stop("Couldn't load multtest")
460 472
461 value <- match.arg(value) 473 value <- match.arg(value)
462 groupmat <- groups(object) 474 groupmat <- groups(object)
463 if (length(groupmat) == 0) 475 if (length(groupmat) == 0)
464 stop("No group information found") 476 stop("No group information found")
465 samples <- sampnames(object) 477 samples <- sampnames(object)
466 n <- length(samples) 478 n <- length(samples)
467 classlabel <- sampclass(object) 479 classlabel <- sampclass(object)
468 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))] 480 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))]
469 481
470 values <- groupval(object, "medret", value=value) 482 values <- groupval(object, "medret", value=value)
471 indecies <- groupval(object, "medret", value = "index") 483 indecies <- groupval(object, "medret", value = "index")
472 484
473 if (!all(c(class1,class2) %in% classlabel)) 485 if (!all(c(class1,class2) %in% classlabel))
474 stop("Incorrect Class Labels") 486 stop("Incorrect Class Labels")
475 487
476 ## c1 and c2 are column indices of class1 and class2 resp. 488 ## c1 and c2 are column indices of class1 and class2 resp.
477 c1 <- which(classlabel %in% class1) 489 c1 <- which(classlabel %in% class1)
478 c2 <- which(classlabel %in% class2) 490 c2 <- which(classlabel %in% class2)
479 ceic <- which(classlabel %in% classeic) 491 ceic <- which(classlabel %in% classeic)
480 if (length(intersect(c1, c2)) > 0) 492 if (length(intersect(c1, c2)) > 0)
481 stop("Intersecting Classes") 493 stop("Intersecting Classes")
482 494
483 ## Optionally replace NA values with the value provided with missing 495 ## Optionally replace NA values with the value provided with missing
484 if (length(missing)) { 496 if (length(missing)) {
485 if (is.numeric(missing)) { 497 if (is.numeric(missing)) {
486 ## handles NA, Inf and -Inf 498 ## handles NA, Inf and -Inf
487 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1] 499 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1]
491 ## Check against missing Values 503 ## Check against missing Values
492 if (any(is.na(values[, c(c1, c2)]))) 504 if (any(is.na(values[, c(c1, c2)])))
493 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill", 505 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill",
494 "-in missing peak values. Note however that this will also ", 506 "-in missing peak values. Note however that this will also ",
495 "insert intensities of 0 for peaks that can not be filled in.") 507 "insert intensities of 0 for peaks that can not be filled in.")
496 508
497 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE) 509 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE)
498 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE) 510 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE)
499 511
500 ## Calculate fold change. 512 ## Calculate fold change.
501 ## For foldchange <1 set fold to 1/fold 513 ## For foldchange <1 set fold to 1/fold
502 ## See tstat to check which was higher 514 ## See tstat to check which was higher
503 fold <- mean2 / mean1 515 fold <- mean2 / mean1
504 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1] 516 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1]
505 517
506 testval <- values[,c(c1,c2)] 518 testval <- values[,c(c1,c2)]
507 ## Replace eventual infinite values with NA (CAMERA issue #33) 519 ## Replace eventual infinite values with NA (CAMERA issue #33)
508 testval[is.infinite(testval)] <- NA 520 testval[is.infinite(testval)] <- NA
509 testclab <- c(rep(0,length(c1)),rep(1,length(c2))) 521 testclab <- c(rep(0,length(c1)),rep(1,length(c2)))
510 522
511 if (min(length(c1), length(c2)) >= 2) { 523 if (min(length(c1), length(c2)) >= 2) {
512 tstat <- mt.teststat(testval, testclab, ...) 524 tstat <- mt.teststat(testval, testclab, ...)
513 pvalue <- xcms:::pval(testval, testclab, tstat) 525 pvalue <- xcms:::pval(testval, testclab, tstat)
514 } else { 526 } else {
515 message("Too few samples per class, skipping t-test.") 527 message("Too few samples per class, skipping t-test.")
541 twosamp <- twosamp[tsidx,] 553 twosamp <- twosamp[tsidx,]
542 rownames(twosamp) <- 1:nrow(twosamp) 554 rownames(twosamp) <- 1:nrow(twosamp)
543 values<-values[tsidx,] 555 values<-values[tsidx,]
544 } else 556 } else
545 tsidx <- 1:nrow(values) 557 tsidx <- 1:nrow(values)
546 558
547 if (length(filebase)) 559 if (length(filebase))
548 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA) 560 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA)
549 561
550 if (eicmax > 0) { 562 if (eicmax > 0) {
551 if (length(unique(peaks(object)[,"rt"])) > 1) { 563 if (length(unique(peaks(object)[,"rt"])) > 1) {
552 ## This looks like "normal" LC data 564 ## This looks like "normal" LC data
553 565
554 eicmax <- min(eicmax, length(tsidx)) 566 eicmax <- min(eicmax, length(tsidx))
555 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic, 567 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic,
556 groupidx = tsidx[seq(length = eicmax)]) 568 groupidx = tsidx[seq(length = eicmax)])
557 569
558 if (length(filebase)) { 570 if (length(filebase)) {
559 eicdir <- paste(filebase, "_eic", sep="") 571 eicdir <- paste(filebase, "_eic", sep="")
560 boxdir <- paste(filebase, "_box", sep="") 572 boxdir <- paste(filebase, "_box", sep="")
561 dir.create(eicdir) 573 dir.create(eicdir)
562 dir.create(boxdir) 574 dir.create(boxdir)
570 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, 582 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
571 height = h/72, onefile = FALSE) 583 height = h/72, onefile = FALSE)
572 } 584 }
573 } 585 }
574 plot(eics, object, rtrange = eicwidth, mzdec=mzdec) 586 plot(eics, object, rtrange = eicwidth, mzdec=mzdec)
575 587
576 if (length(filebase)) 588 if (length(filebase))
577 dev.off() 589 dev.off()
578 } else { 590 } else {
579 ## This looks like a direct-infusion single spectrum 591 ## This looks like a direct-infusion single spectrum
580 if (length(filebase)) { 592 if (length(filebase)) {
594 width=w, height=h) 606 width=w, height=h)
595 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, 607 pdf(file.path(eicdir, "%003d.pdf"), width = w/72,
596 height = h/72, onefile = FALSE) 608 height = h/72, onefile = FALSE)
597 } 609 }
598 } 610 }
599 611
600 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1) 612 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1)
601 613
602 if (length(filebase)) 614 if (length(filebase))
603 dev.off() 615 dev.off()
604 } 616 }
605 } 617 }
606 618
607 invisible(twosamp) 619 invisible(twosamp)
608 } 620 }