Mercurial > repos > lecorguille > camera_annotate
comparison lib.r @ 36:b6a8143c397c draft
planemo upload commit aa538ae477bc2f901c95d916e11d70eab75b5e00
author | lecorguille |
---|---|
date | Fri, 01 Feb 2019 11:29:35 -0500 |
parents | b02797fbead1 |
children | 636e36a64d31 |
comparison
equal
deleted
inserted
replaced
35:b02797fbead1 | 36:b6a8143c397c |
---|---|
32 system(paste0("gm convert ",filebase,"_box/*.png ",pdfBoxOutput)) | 32 system(paste0("gm convert ",filebase,"_box/*.png ",pdfBoxOutput)) |
33 | 33 |
34 } | 34 } |
35 | 35 |
36 #@author G. Le Corguille | 36 #@author G. Le Corguille |
37 #The function create a zip archive from the different png generated by diffreport | |
38 diffreport_png2zip <- function() { | |
39 zip("eic.zip",dir(pattern="_eic")) | |
40 zip("box.zip",dir(pattern="_box")) | |
41 } | |
42 | |
43 #The function create a zip archive from the different tabular generated by diffreport | |
44 diffreport_tabular2zip <- function() { | |
45 zip("tabular.zip",dir(pattern="tabular/*")) | |
46 } | |
47 | |
48 #@author G. Le Corguille | |
37 #This function convert if it is required the Retention Time in minutes | 49 #This function convert if it is required the Retention Time in minutes |
38 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { | 50 RTSecondToMinute <- function(variableMetadata, convertRTMinute) { |
39 if (convertRTMinute){ | 51 if (convertRTMinute){ |
40 #converting the retention times (seconds) into minutes | 52 #converting the retention times (seconds) into minutes |
41 print("converting the retention times into minutes in the variableMetadata") | 53 print("converting the retention times into minutes in the variableMetadata") |
55 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) | 67 variableMetadata=cbind(name=variableMetadata$name, namecustom=namecustom, variableMetadata[,!(colnames(variableMetadata) %in% c("name"))]) |
56 return(variableMetadata) | 68 return(variableMetadata) |
57 } | 69 } |
58 | 70 |
59 #The function annotateDiffreport without the corr function which bugs | 71 #The function annotateDiffreport without the corr function which bugs |
60 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv", dataMatrixOutput="dataMatrix.tsv") { | 72 annotatediff <- function(xset=xset, listArguments=listArguments, variableMetadataOutput="variableMetadata.tsv") { |
61 # Resolve the bug with x11, with the function png | 73 # Resolve the bug with x11, with the function png |
62 options(bitmapType='cairo') | 74 options(bitmapType='cairo') |
63 | 75 |
64 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. | 76 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. |
65 res=try(is.null(xset@filled)) | 77 res=try(is.null(xset@filled)) |
105 | 117 |
106 # launch annotate | 118 # launch annotate |
107 xa = do.call("annotate", listArguments4annotate) | 119 xa = do.call("annotate", listArguments4annotate) |
108 peakList=getPeaklist(xa,intval=listArguments[["intval"]]) | 120 peakList=getPeaklist(xa,intval=listArguments[["intval"]]) |
109 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); | 121 peakList=cbind(groupnames(xa@xcmsSet),peakList); colnames(peakList)[1] = c("name"); |
110 | |
111 # --- dataMatrix --- | |
112 dataMatrix = peakList[,(make.names(colnames(peakList)) %in% c("name", make.names(sampnames(xa@xcmsSet))))] | |
113 write.table(dataMatrix, sep="\t", quote=FALSE, row.names=FALSE, file=dataMatrixOutput) | |
114 | |
115 | 122 |
116 # --- Multi condition : diffreport --- | 123 # --- Multi condition : diffreport --- |
117 diffrepOri=NULL | 124 diffrepOri=NULL |
118 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) { | 125 if (!is.null(listArguments[["runDiffreport"]]) & nlevels(sampclass(xset))>=2) { |
119 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. | 126 #Check if the fillpeaks step has been done previously, if it hasn't, there is an error message and the execution is stopped. |
147 | 154 |
148 dir.create("tabular") | 155 dir.create("tabular") |
149 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep="")) | 156 write.table(diffrep, sep="\t", quote=FALSE, row.names=FALSE, file=paste("tabular/",filebase,"_tsv.tabular",sep="")) |
150 | 157 |
151 if (listArguments[["eicmax"]] != 0) { | 158 if (listArguments[["eicmax"]] != 0) { |
152 diffreport_png2pdf(filebase) | 159 if (listArguments[["png2"]] == "pdf") |
160 diffreport_png2pdf(filebase) | |
153 } | 161 } |
154 } | 162 } |
155 } | 163 } |
156 } | 164 } |
165 if (listArguments[["png2"]] == "zip") | |
166 diffreport_png2zip() | |
167 if (listArguments[["tabular2"]] == "zip") | |
168 diffreport_tabular2zip() | |
157 } | 169 } |
158 | 170 |
159 # --- variableMetadata --- | 171 # --- variableMetadata --- |
160 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] | 172 variableMetadata=peakList[,!(make.names(colnames(peakList)) %in% c(make.names(sampnames(xa@xcmsSet))))] |
161 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]]) | 173 variableMetadata = RTSecondToMinute(variableMetadata, listArguments[["convertRTMinute"]]) |
328 ############################################################ | 340 ############################################################ |
329 ## getEIC | 341 ## getEIC |
330 getEIC <- function(object, mzrange, rtrange = 200, | 342 getEIC <- function(object, mzrange, rtrange = 200, |
331 groupidx, sampleidx = sampnames(object), | 343 groupidx, sampleidx = sampnames(object), |
332 rt = c("corrected", "raw")) { | 344 rt = c("corrected", "raw")) { |
333 | 345 |
334 files <- filepaths(object) | 346 files <- filepaths(object) |
335 grp <- groups(object) | 347 grp <- groups(object) |
336 samp <- sampnames(object) | 348 samp <- sampnames(object) |
337 prof <- profinfo(object) | 349 prof <- profinfo(object) |
338 | 350 |
339 rt <- match.arg(rt) | 351 rt <- match.arg(rt) |
340 | 352 |
341 if (is.numeric(sampleidx)) | 353 if (is.numeric(sampleidx)) |
342 sampleidx <- sampnames(object)[sampleidx] | 354 sampleidx <- sampnames(object)[sampleidx] |
343 sampidx <- match(sampleidx, sampnames(object)) | 355 sampidx <- match(sampleidx, sampnames(object)) |
344 | 356 |
345 if (!missing(groupidx)) { | 357 if (!missing(groupidx)) { |
346 if (is.numeric(groupidx)) | 358 if (is.numeric(groupidx)) |
347 groupidx <- groupnames(object)[unique(as.integer(groupidx))] | 359 groupidx <- groupnames(object)[unique(as.integer(groupidx))] |
348 grpidx <- match(groupidx, groupnames(object, template = groupidx)) | 360 grpidx <- match(groupidx, groupnames(object, template = groupidx)) |
349 } | 361 } |
350 | 362 |
351 if (missing(mzrange)) { | 363 if (missing(mzrange)) { |
352 if (missing(groupidx)) | 364 if (missing(groupidx)) |
353 stop("No m/z range or groups specified") | 365 stop("No m/z range or groups specified") |
354 if (any(is.na(groupval(object, value = "mz")))) | 366 if (any(is.na(groupval(object, value = "mz")))) |
355 warning( | 367 warning( |
367 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange))) | 379 } else if (all(c("mzmin","mzmax") %in% colnames(mzrange))) |
368 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE] | 380 mzrange <- mzrange[,c("mzmin", "mzmax"),drop=FALSE] |
369 else if (is.null(dim(mzrange))) | 381 else if (is.null(dim(mzrange))) |
370 stop("mzrange must be a matrix") | 382 stop("mzrange must be a matrix") |
371 colnames(mzrange) <- c("mzmin", "mzmax") | 383 colnames(mzrange) <- c("mzmin", "mzmax") |
372 | 384 |
373 if (length(rtrange) == 1) { | 385 if (length(rtrange) == 1) { |
374 if (missing(groupidx)) | 386 if (missing(groupidx)) |
375 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)), | 387 rtrange <- matrix(rep(range(object@rt[[rt]][sampidx]), nrow(mzrange)), |
376 ncol = 2, byrow = TRUE) | 388 ncol = 2, byrow = TRUE) |
377 else { | 389 else { |
378 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange) | 390 rtrange <- retexp(grp[grpidx,c("rtmin","rtmax"),drop=FALSE], rtrange) |
379 } | 391 } |
380 } else if (is.null(dim(rtrange))) | 392 } else if (is.null(dim(rtrange))) |
381 stop("rtrange must be a matrix or single number") | 393 stop("rtrange must be a matrix or single number") |
382 colnames(rtrange) <- c("rtmin", "rtmax") | 394 colnames(rtrange) <- c("rtmin", "rtmax") |
383 | 395 |
384 ## Ensure that we've got corrected retention time if requested. | 396 ## Ensure that we've got corrected retention time if requested. |
385 if (is.null(object@rt[[rt]])) | 397 if (is.null(object@rt[[rt]])) |
386 stop(rt, " retention times not present in 'object'!") | 398 stop(rt, " retention times not present in 'object'!") |
387 | 399 |
388 ## Ensure that the defined retention time range is within the rtrange of the | 400 ## Ensure that the defined retention time range is within the rtrange of the |
389 ## object: we're using the max minimal rt of all files and the min maximal rt | 401 ## object: we're using the max minimal rt of all files and the min maximal rt |
390 rtrs <- lapply(object@rt[[rt]], range) | 402 rtrs <- lapply(object@rt[[rt]], range) |
391 rtr <- c(max(unlist(lapply(rtrs, "[", 1))), | 403 rtr <- c(max(unlist(lapply(rtrs, "[", 1))), |
392 min(unlist(lapply(rtrs, "[", 2)))) | 404 min(unlist(lapply(rtrs, "[", 2)))) |
404 if (any(lower_rt_outside) | any(upper_rt_outside)) { | 416 if (any(lower_rt_outside) | any(upper_rt_outside)) { |
405 ## Silently fix these ranges. | 417 ## Silently fix these ranges. |
406 rtrange[lower_rt_outside, "rtmin"] <- rtr[1] | 418 rtrange[lower_rt_outside, "rtmin"] <- rtr[1] |
407 rtrange[upper_rt_outside, "rtmax"] <- rtr[2] | 419 rtrange[upper_rt_outside, "rtmax"] <- rtr[2] |
408 } | 420 } |
409 | 421 |
410 if (missing(groupidx)) | 422 if (missing(groupidx)) |
411 gnames <- character(0) | 423 gnames <- character(0) |
412 else | 424 else |
413 gnames <- groupidx | 425 gnames <- groupidx |
414 | 426 |
415 eic <- vector("list", length(sampleidx)) | 427 eic <- vector("list", length(sampleidx)) |
416 names(eic) <- sampleidx | 428 names(eic) <- sampleidx |
417 | 429 |
418 for (i in seq(along = sampidx)) { | 430 for (i in seq(along = sampidx)) { |
419 | 431 |
420 ## cat(sampleidx[i], "") | 432 ## cat(sampleidx[i], "") |
421 flush.console() | 433 flush.console() |
422 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other | 434 ## getXcmsRaw takes care of rt correction, susetting to scanrage and other |
423 ## stuff. | 435 ## stuff. |
424 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt) | 436 lcraw <- getXcmsRaw(object, sampleidx = sampidx[i], rt=rt) |
426 eic[[i]] <- currenteic@eic[[1]] | 438 eic[[i]] <- currenteic@eic[[1]] |
427 rm(lcraw) | 439 rm(lcraw) |
428 gc() | 440 gc() |
429 } | 441 } |
430 ## cat("\n") | 442 ## cat("\n") |
431 | 443 |
432 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange, | 444 invisible(new("xcmsEIC", eic = eic, mzrange = mzrange, rtrange = rtrange, |
433 rt = rt, groupnames = gnames)) | 445 rt = rt, groupnames = gnames)) |
434 } | 446 } |
435 | 447 |
436 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 | 448 #@TODO: remove this function as soon as we can use xcms 3.x.x from Bioconductor 3.7 |
447 classeic = c(class1,class2), | 459 classeic = c(class1,class2), |
448 value = c("into","maxo","intb"), | 460 value = c("into","maxo","intb"), |
449 metlin = FALSE, | 461 metlin = FALSE, |
450 h = 480, w = 640, mzdec=2, | 462 h = 480, w = 640, mzdec=2, |
451 missing = numeric(), ...) { | 463 missing = numeric(), ...) { |
452 | 464 |
453 if ( nrow(object@groups)<1 || length(object@groupidx) <1) { | 465 if ( nrow(object@groups)<1 || length(object@groupidx) <1) { |
454 stop("No group information. Use group().") | 466 stop("No group information. Use group().") |
455 } | 467 } |
456 | 468 |
457 if (!is.numeric(w) || !is.numeric(h)) | 469 if (!is.numeric(w) || !is.numeric(h)) |
458 stop("'h' and 'w' have to be numeric") | 470 stop("'h' and 'w' have to be numeric") |
459 ## require(multtest) || stop("Couldn't load multtest") | 471 ## require(multtest) || stop("Couldn't load multtest") |
460 | 472 |
461 value <- match.arg(value) | 473 value <- match.arg(value) |
462 groupmat <- groups(object) | 474 groupmat <- groups(object) |
463 if (length(groupmat) == 0) | 475 if (length(groupmat) == 0) |
464 stop("No group information found") | 476 stop("No group information found") |
465 samples <- sampnames(object) | 477 samples <- sampnames(object) |
466 n <- length(samples) | 478 n <- length(samples) |
467 classlabel <- sampclass(object) | 479 classlabel <- sampclass(object) |
468 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))] | 480 classlabel <- levels(classlabel)[as.vector(unclass(classlabel))] |
469 | 481 |
470 values <- groupval(object, "medret", value=value) | 482 values <- groupval(object, "medret", value=value) |
471 indecies <- groupval(object, "medret", value = "index") | 483 indecies <- groupval(object, "medret", value = "index") |
472 | 484 |
473 if (!all(c(class1,class2) %in% classlabel)) | 485 if (!all(c(class1,class2) %in% classlabel)) |
474 stop("Incorrect Class Labels") | 486 stop("Incorrect Class Labels") |
475 | 487 |
476 ## c1 and c2 are column indices of class1 and class2 resp. | 488 ## c1 and c2 are column indices of class1 and class2 resp. |
477 c1 <- which(classlabel %in% class1) | 489 c1 <- which(classlabel %in% class1) |
478 c2 <- which(classlabel %in% class2) | 490 c2 <- which(classlabel %in% class2) |
479 ceic <- which(classlabel %in% classeic) | 491 ceic <- which(classlabel %in% classeic) |
480 if (length(intersect(c1, c2)) > 0) | 492 if (length(intersect(c1, c2)) > 0) |
481 stop("Intersecting Classes") | 493 stop("Intersecting Classes") |
482 | 494 |
483 ## Optionally replace NA values with the value provided with missing | 495 ## Optionally replace NA values with the value provided with missing |
484 if (length(missing)) { | 496 if (length(missing)) { |
485 if (is.numeric(missing)) { | 497 if (is.numeric(missing)) { |
486 ## handles NA, Inf and -Inf | 498 ## handles NA, Inf and -Inf |
487 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1] | 499 values[, c(c1, c2)][!is.finite(values[, c(c1, c2)])] <- missing[1] |
491 ## Check against missing Values | 503 ## Check against missing Values |
492 if (any(is.na(values[, c(c1, c2)]))) | 504 if (any(is.na(values[, c(c1, c2)]))) |
493 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill", | 505 warning("`NA` values in xcmsSet. Use fillPeaks() on the object to fill", |
494 "-in missing peak values. Note however that this will also ", | 506 "-in missing peak values. Note however that this will also ", |
495 "insert intensities of 0 for peaks that can not be filled in.") | 507 "insert intensities of 0 for peaks that can not be filled in.") |
496 | 508 |
497 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE) | 509 mean1 <- rowMeans(values[,c1,drop=FALSE], na.rm = TRUE) |
498 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE) | 510 mean2 <- rowMeans(values[,c2,drop=FALSE], na.rm = TRUE) |
499 | 511 |
500 ## Calculate fold change. | 512 ## Calculate fold change. |
501 ## For foldchange <1 set fold to 1/fold | 513 ## For foldchange <1 set fold to 1/fold |
502 ## See tstat to check which was higher | 514 ## See tstat to check which was higher |
503 fold <- mean2 / mean1 | 515 fold <- mean2 / mean1 |
504 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1] | 516 fold[!is.na(fold) & fold < 1] <- 1/fold[!is.na(fold) & fold < 1] |
505 | 517 |
506 testval <- values[,c(c1,c2)] | 518 testval <- values[,c(c1,c2)] |
507 ## Replace eventual infinite values with NA (CAMERA issue #33) | 519 ## Replace eventual infinite values with NA (CAMERA issue #33) |
508 testval[is.infinite(testval)] <- NA | 520 testval[is.infinite(testval)] <- NA |
509 testclab <- c(rep(0,length(c1)),rep(1,length(c2))) | 521 testclab <- c(rep(0,length(c1)),rep(1,length(c2))) |
510 | 522 |
511 if (min(length(c1), length(c2)) >= 2) { | 523 if (min(length(c1), length(c2)) >= 2) { |
512 tstat <- mt.teststat(testval, testclab, ...) | 524 tstat <- mt.teststat(testval, testclab, ...) |
513 pvalue <- xcms:::pval(testval, testclab, tstat) | 525 pvalue <- xcms:::pval(testval, testclab, tstat) |
514 } else { | 526 } else { |
515 message("Too few samples per class, skipping t-test.") | 527 message("Too few samples per class, skipping t-test.") |
541 twosamp <- twosamp[tsidx,] | 553 twosamp <- twosamp[tsidx,] |
542 rownames(twosamp) <- 1:nrow(twosamp) | 554 rownames(twosamp) <- 1:nrow(twosamp) |
543 values<-values[tsidx,] | 555 values<-values[tsidx,] |
544 } else | 556 } else |
545 tsidx <- 1:nrow(values) | 557 tsidx <- 1:nrow(values) |
546 | 558 |
547 if (length(filebase)) | 559 if (length(filebase)) |
548 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA) | 560 write.table(twosamp, paste(filebase, ".tsv", sep = ""), quote = FALSE, sep = "\t", col.names = NA) |
549 | 561 |
550 if (eicmax > 0) { | 562 if (eicmax > 0) { |
551 if (length(unique(peaks(object)[,"rt"])) > 1) { | 563 if (length(unique(peaks(object)[,"rt"])) > 1) { |
552 ## This looks like "normal" LC data | 564 ## This looks like "normal" LC data |
553 | 565 |
554 eicmax <- min(eicmax, length(tsidx)) | 566 eicmax <- min(eicmax, length(tsidx)) |
555 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic, | 567 eics <- getEIC(object, rtrange = eicwidth*1.1, sampleidx = ceic, |
556 groupidx = tsidx[seq(length = eicmax)]) | 568 groupidx = tsidx[seq(length = eicmax)]) |
557 | 569 |
558 if (length(filebase)) { | 570 if (length(filebase)) { |
559 eicdir <- paste(filebase, "_eic", sep="") | 571 eicdir <- paste(filebase, "_eic", sep="") |
560 boxdir <- paste(filebase, "_box", sep="") | 572 boxdir <- paste(filebase, "_box", sep="") |
561 dir.create(eicdir) | 573 dir.create(eicdir) |
562 dir.create(boxdir) | 574 dir.create(boxdir) |
570 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, | 582 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, |
571 height = h/72, onefile = FALSE) | 583 height = h/72, onefile = FALSE) |
572 } | 584 } |
573 } | 585 } |
574 plot(eics, object, rtrange = eicwidth, mzdec=mzdec) | 586 plot(eics, object, rtrange = eicwidth, mzdec=mzdec) |
575 | 587 |
576 if (length(filebase)) | 588 if (length(filebase)) |
577 dev.off() | 589 dev.off() |
578 } else { | 590 } else { |
579 ## This looks like a direct-infusion single spectrum | 591 ## This looks like a direct-infusion single spectrum |
580 if (length(filebase)) { | 592 if (length(filebase)) { |
594 width=w, height=h) | 606 width=w, height=h) |
595 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, | 607 pdf(file.path(eicdir, "%003d.pdf"), width = w/72, |
596 height = h/72, onefile = FALSE) | 608 height = h/72, onefile = FALSE) |
597 } | 609 } |
598 } | 610 } |
599 | 611 |
600 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1) | 612 plotSpecWindow(object, gidxs = tsidx[seq(length = eicmax)], borderwidth=1) |
601 | 613 |
602 if (length(filebase)) | 614 if (length(filebase)) |
603 dev.off() | 615 dev.off() |
604 } | 616 } |
605 } | 617 } |
606 | 618 |
607 invisible(twosamp) | 619 invisible(twosamp) |
608 } | 620 } |