Mercurial > repos > shians > shrnaseq
changeset 10:d36d60a17534
- Edited tool dependencies
author | shian_su <registertonysu@gmail.com> |
---|---|
date | Wed, 19 Nov 2014 11:59:37 +1100 |
parents | cd53d419af28 (current diff) a084fa7939be (diff) |
children | b898ca3fca45 |
files | tool_dependencies.xml |
diffstat | 3 files changed, 153 insertions(+), 62 deletions(-) [+] |
line wrap: on
line diff
--- a/hairpinTool.R Wed Nov 19 11:56:02 2014 +1100 +++ b/hairpinTool.R Wed Nov 19 11:59:37 2014 +1100 @@ -42,6 +42,7 @@ # Smear Plot # Barcode Plots (If Genewise testing was selected) # Top Expression Table +# Feature Counts Table # HTML file linking to the ouputs # # Author: Shian Su - registertonysu@gmail.com - Jan 2014 @@ -56,14 +57,26 @@ library(edgeR, quietly=TRUE, warn.conflicts=FALSE) library(limma, quietly=TRUE, warn.conflicts=FALSE) -if (packageVersion("edgeR") < "3.5.23") { - message("Please update 'edgeR' to version >= 3.5.23 to run this script") +if (packageVersion("edgeR") < "3.5.27") { + stop("Please update 'edgeR' to version >= 3.5.23 to run this tool") +} + +if (packageVersion("limma")<"3.19.19") { + message("Update 'limma' to version >= 3.19.19 to see updated barcode graphs") } ################################################################################ ### Function declarations ################################################################################ +# Function to load libaries without messages +silentLibrary <- function(...) { + list <- c(...) + for (package in list){ + suppressPackageStartupMessages(library(package, character.only=TRUE)) + } +} + # Function to sanitise contrast equations so there are no whitespaces # surrounding the arithmetic operators, leading or trailing whitespace sanitiseEquation <- function(equation) { @@ -96,16 +109,16 @@ # Function has string input and generates both a pdf and png output strings imgOut <- function(filename) { assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")), - envir = .GlobalEnv) + envir=.GlobalEnv) assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")), - envir = .GlobalEnv) + envir=.GlobalEnv) } # Create cat function default path set, default seperator empty and appending # true by default (Ripped straight from the cat function with altered argument # defaults) -cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL, - append = TRUE) { +cata <- function(..., file=htmlPath, sep="", fill=FALSE, labels=NULL, + append=TRUE) { if (is.character(file)) if (file == "") file <- stdout() @@ -182,6 +195,7 @@ workMode <- as.character(argv[12]) htmlPath <- as.character(argv[13]) folderPath <- as.character(argv[14]) + if (workMode=="classic") { pairData <- character() pairData[2] <- as.character(argv[15]) @@ -273,7 +287,7 @@ } # Generate output folder and paths -dir.create(folderPath) +dir.create(folderPath, showWarnings=FALSE) # Generate links for outputs imgOut("barHairpin") @@ -295,17 +309,26 @@ smearPng[i] <- makeOut(paste0("smear(", contrastData[i], ").png")) smearPdf[i] <- makeOut(paste0("smear(", contrastData[i], ").pdf")) topOut[i] <- makeOut(paste0("toptag(", contrastData[i], ").tsv")) - roastOut[i] <- makeOut(paste0("roast(", contrastData[i], ").tsv")) + roastOut[i] <- makeOut(paste0("gene_level(", contrastData[i], ").tsv")) barcodePng[i] <- makeOut(paste0("barcode(", contrastData[i], ").png")) barcodePdf[i] <- makeOut(paste0("barcode(", contrastData[i], ").pdf")) } } +countsOut <- makeOut("counts.tsv") +sessionOut <- makeOut("session_info.txt") + # Initialise data for html links and images, table with the link label and # link address linkData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE) imageData <- data.frame(Label=character(), Link=character(), stringsAsFactors=FALSE) + +# Initialise vectors for storage of up/down/neutral regulated counts +upCount <- numeric() +downCount <- numeric() +flatCount <- numeric() + ################################################################################ ### Data Processing ################################################################################ @@ -328,7 +351,7 @@ selectVals <- as.numeric(unique(selectVals)) } else { selectVals <- gsub(" ", "", selectVals, fixed=TRUE) - selectVals <- unlist(strsplit(selectVals, " ")) + selectVals <- unlist(strsplit(selectVals, ",")) } } @@ -447,6 +470,17 @@ linkData <- rbind(linkData, newEntry) invisible(dev.off()) +# BCV Plot +png(bcvPng, width=600, height=600) +plotBCV(data, main="BCV Plot") +imageData <- rbind(imageData, c("BCV Plot", "bcv.png")) +invisible(dev.off()) + +pdf(bcvPdf) +plotBCV(data, main="BCV Plot") +newEntry <- c("BCV Plot (.pdf)", "bcv.pdf") +invisible(dev.off()) + if (workMode=="classic") { # Assess differential representation using classic exact testing methodology # in edgeR @@ -455,20 +489,30 @@ top <- topTags(testData, n=Inf) topIDs <- top$table[(top$table$FDR < fdrThresh) & (abs(top$table$logFC) > lfcThresh), 1] + write.table(top, file=topOut, row.names=FALSE, sep="\t") + linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1], ") (.tsv)") linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv") linkData <- rbind(linkData, c(linkName, linkAddr)) + upCount[1] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh) + downCount[1] <- sum(top$table$FDR < fdrThresh & + top$table$logFC < -lfcThresh) + flatCount[1] <- sum(top$table$FDR > fdrThresh | + abs(top$table$logFC) < lfcThresh) + + + # Select hairpins with FDR < 0.05 to highlight on plot png(smearPng, width=600, height=600) plotTitle <- gsub(".", " ", paste0("Smear Plot: ", pairData[2], "-", pairData[1]), - fixed = TRUE) + fixed=TRUE) plotSmear(testData, de.tags=topIDs, pch=20, cex=1.0, main=plotTitle) - abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2) + abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")") imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png") imageData <- rbind(imageData, c(imgName, imgAddr)) @@ -477,14 +521,15 @@ pdf(smearPdf) plotTitle <- gsub(".", " ", paste0("Smear Plot: ", pairData[2], "-", pairData[1]), - fixed = TRUE) + fixed=TRUE) plotSmear(testData, de.tags=topIDs, pch=20, cex=1.0, main=plotTitle) - abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2) + abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2) imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)") imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf") linkData <- rbind(linkData, c(imgName, imgAddr)) invisible(dev.off()) + } else if (workMode=="glm") { # Generating design information factors <- factor(data$sample$group) @@ -494,14 +539,15 @@ # Split up contrasts seperated by comma into a vector contrastData <- unlist(strsplit(contrastData, split=",")) + for (i in 1:length(contrastData)) { # Generate contrasts information contrasts <- makeContrasts(contrasts=contrastData[i], levels=design) # Fit negative bionomial GLM - fit = glmFit(data, design) + fit <- glmFit(data, design) # Carry out Likelihood ratio test - testData = glmLRT(fit, contrast=contrasts) + testData <- glmLRT(fit, contrast=contrasts) # Select hairpins with FDR < 0.05 to highlight on plot top <- topTags(testData, n=Inf) @@ -513,6 +559,13 @@ linkAddr <- paste0("toptag(", contrastData[i], ").tsv") linkData <- rbind(linkData, c(linkName, linkAddr)) + # Collect counts for differential representation + upCount[i] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh) + downCount[i] <- sum(top$table$FDR < fdrThresh & + top$table$logFC < -lfcThresh) + flatCount[i] <- sum(top$table$FDR > fdrThresh | + abs(top$table$logFC) < lfcThresh) + # Make a plot of logFC versus logCPM png(smearPng[i], height=600, width=600) plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], @@ -548,7 +601,7 @@ if (wantRoast) { # Input preparaton for roast - nrot = 9999 + nrot <- 9999 set.seed(602214129) roastData <- mroast(data, index=geneList, design=design, contrast=contrasts, nrot=nrot) @@ -556,7 +609,7 @@ write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t") linkName <- paste0("Gene Level Analysis Table(", contrastData[i], ") (.tsv)") - linkAddr <- paste0("roast(", contrastData[i], ").tsv") + linkAddr <- paste0("gene_level(", contrastData[i], ").tsv") linkData <- rbind(linkData, c(linkName, linkAddr)) if (selectOpt=="rank") { selectedGenes <- rownames(roastData)[selectVals] @@ -599,6 +652,27 @@ } } +# Generate data frame of the significant differences +sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount) +if (workMode == "glm") { + row.names(sigDiff) <- contrastData +} else if (workMode == "classic") { + row.names(sigDiff) <- paste0(pairData[2], "-", pairData[1]) +} + +# Output table of summarised counts +ID <- rownames(data$counts) +outputCounts <- cbind(ID, data$counts) +write.table(outputCounts, file=countsOut, row.names=FALSE, sep="\t", + quote=FALSE) +linkName <- "Counts table (.tsv)" +linkAddr <- "counts.tsv" +linkData <- rbind(linkData, c(linkName, linkAddr)) + +# Record session info +writeLines(capture.output(sessionInfo()), sessionOut) +linkData <- rbind(linkData, c("Session Info", "session_info.txt")) + # Record ending time and calculate total run time timeEnd <- as.character(Sys.time()) timeTaken <- capture.output(round(difftime(timeEnd,timeStart), digits=3)) @@ -620,12 +694,12 @@ ListItem(hpReadout[1]) ListItem(hpReadout[2]) cata("</ul>\n") - cata(hpReadout[3], "<br/>\n") + cata(hpReadout[3], "<br />\n") cata("<ul>\n") ListItem(hpReadout[4]) ListItem(hpReadout[7]) cata("</ul>\n") - cata(hpReadout[8:11], sep="<br/>\n") + cata(hpReadout[8:11], sep="<br />\n") cata("<br />\n") cata("<b>Please check that read percentages are consistent with ") cata("expectations.</b><br >\n") @@ -642,8 +716,7 @@ commonBCV, "<br />\n") cata("<h4>Output:</h4>\n") -cata("All images displayed have PDF copy at the bottom of the page, these can ") -cata("exported in a pdf viewer to high resolution image format. <br/>\n") +cata("PDF copies of JPEGS available in 'Plots' section.<br />\n") for (i in 1:nrow(imageData)) { if (grepl("barcode", imageData$Link[i])) { if (packageVersion("limma")<"3.19.19") { @@ -657,11 +730,30 @@ HtmlImage(imageData$Link[i], imageData$Label[i]) } } -cata("<br/>\n") +cata("<br />\n") + +cata("<h4>Differential Representation Counts:</h4>\n") + +cata("<table border=\"1\" cellpadding=\"4\">\n") +cata("<tr>\n") +TableItem() +for (i in colnames(sigDiff)) { + TableHeadItem(i) +} +cata("</tr>\n") +for (i in 1:nrow(sigDiff)) { + cata("<tr>\n") + TableHeadItem(unmake.names(row.names(sigDiff)[i])) + for (j in 1:ncol(sigDiff)) { + TableItem(as.character(sigDiff[i, j])) + } + cata("</tr>\n") +} +cata("</table>") cata("<h4>Plots:</h4>\n") for (i in 1:nrow(linkData)) { - if (!grepl(".tsv", linkData$Link[i])) { + if (grepl(".pdf", linkData$Link[i])) { HtmlLink(linkData$Link[i], linkData$Label[i]) } } @@ -673,12 +765,10 @@ } } -cata("<p>alt-click any of the links to download the file, or click the name ") -cata("of this task in the galaxy history panel and click on the floppy ") -cata("disk icon to download all files in a zip archive.</p>\n") -cata("<p>.tsv files are tab seperated files that can be viewed using Excel ") -cata("or other spreadsheet programs</p>\n") -cata("<table border=\"0\">\n") +cata("<p>Alt-click links to download file.</p>\n") +cata("<p>Click floppy disc icon associated history item to download ") +cata("all files.</p>\n") +cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n") cata("<h4>Additional Information:</h4>\n") @@ -689,9 +779,9 @@ } if (cpmReq!=0 && sampleReq!=0) { - tempStr <- paste("Hairpins that do not have more than", cpmReq, - "CPM in at least", sampleReq, "samples are considered", - "insignificant and filtered out.") + tempStr <- paste("Hairpins without more than", cpmReq, + "CPM in at least", sampleReq, "samples are insignificant", + "and filtered out.") ListItem(tempStr) filterProp <- round(filteredCount/preFilterCount*100, digits=2) tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp, @@ -705,8 +795,6 @@ ListItem("A generalised linear model was fitted to each hairpin.") } - - cit <- character() link <-character() link[1] <- paste0("<a href=\"", @@ -742,6 +830,14 @@ ListItem(cit[4]) cata("</ol>\n") +cata("<p>Report problems to: su.s@wehi.edu.au</p>\n") + +for (i in 1:nrow(linkData)) { + if (grepl("session_info", linkData$Link[i])) { + HtmlLink(linkData$Link[i], linkData$Label[i]) + } +} + cata("<table border=\"0\">\n") cata("<tr>\n") TableItem("Task started at:"); TableItem(timeStart)
--- a/hairpinTool.xml Wed Nov 19 11:56:02 2014 +1100 +++ b/hairpinTool.xml Wed Nov 19 11:59:37 2014 +1100 @@ -1,11 +1,11 @@ -<tool id="shRNAseq" name="shRNAseq Tool" version="1.0.6"> +<tool id="shRNAseq" name="shRNAseq Tool" version="1.0.10"> <description> Analyse hairpin differential representation using edgeR </description> <requirements> - <requirement type="R-module" version="3.5.27">edgeR</requirement> - <requirement type="R-module" version="3.18.13">limma</requirement> + <requirement type="package" version="3.0.3">R</requirement> + <requirement type="package" version="1.0.10">shrnaseq</requirement> </requirements> <stdio> @@ -35,8 +35,8 @@ #end if #else: $inputOpt.counts - $inputOpt.anno - $inputOpt.factors + $inputOpt.hairpin + $inputOpt.samples 0 0 0 #end if @@ -57,7 +57,7 @@ #if $workMode.mode=="classic": "$workMode.pair1" "$workMode.pair2" - #else: + #elif $workMode.mode=="glm": "$workMode.contrast" $workMode.roast.roastOption #if $workMode.roast.roastOption=="yes": @@ -118,9 +118,9 @@ <when value="counts"> <param name="counts" type="data" format="tabular" label="Counts Table"/> - <param name="anno" type="data" format="tabular" + <param name="hairpin" type="data" format="tabular" label="Hairpin Annotation"/> - <param name="factors" type="data" format="tabular" + <param name="samples" type="data" format="tabular" label="Sample Annotation"/> </when> </conditional> @@ -232,18 +232,6 @@ <outputs> <data format="html" name="outFile" label="shRNAseq Analysis"/> </outputs> - <!-- Tests under construction - <tests> - <test> - <param name="counts" value="zuber_count.txt"/> - <param name="anno" value="zuber_anno.txt"/> - <param name="factors" value="zuber_samples.txt"/> - <param name="fdr" value="0.05"/> - <output name="output" file="shRNAseq_Analysis_html" lines_diff="50"/> - </test> - - </tests> - --> <help> .. class:: infomark @@ -254,6 +242,11 @@ reads. This tool will generate plots and tables for the analysis of differential representation. +.. class:: infomark + +A tutorial of how to use this tool is available at: +http://bioinf.wehi.edu.au/shRNAseq/galaxy.html + ----- .. class:: infomark @@ -412,6 +405,8 @@ * McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis of multifactor RNA-Seq experiments with respect to biological variation. Nucleic Acids Research 40, 4288-4297 + +Report problems to: su.s@wehi.edu.au .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
--- a/tool_dependencies.xml Wed Nov 19 11:56:02 2014 +1100 +++ b/tool_dependencies.xml Wed Nov 19 11:59:37 2014 +1100 @@ -1,20 +1,20 @@ <?xml version="1.0"?> <tool_dependency> - <package name="R_3_1_1" version="3.1.1"> - <repository name="package_r_3_1_1" owner="fubar" prior_installation_required="True" /> + <package name="R" version="3.0.3"> + <repository name="package_r_3_0_3" owner="iuc" prior_installation_required="True" /> </package> <package name="shrnaseq" version="1.0.10"> <install version="1.0"> <actions> <action type="setup_r_environment"> - <repository name="package_r_3_1_1" owner="fubar"> - <package name="R_3_1_1" version="3.1.1" /> + <repository name="package_r_3_0_3" owner="iuc"> + <package name="R" version="3.0.3" /> </repository> - <package>https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/locfit_1.5-9.1.tar.gz</package> - <package>https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/statmod_1.4.20.tar.gz</package> - <package>https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/limma_3.20.9.tar.gz</package> - <package>https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/edgeR_3.6.8.tar.gz</package> + <package>https://github.com/Shians/diffexp/tree/master/dependencies/locfit_1.5-9.1.tar.gz</package> + <package>https://github.com/Shians/diffexp/tree/master/dependencies/statmod_1.4.20.tar.gz</package> + <package>https://github.com/Shians/diffexp/tree/master/dependencies/limma_3.20.7.tar.gz</package> + <package>https://github.com/Shians/diffexp/tree/master/dependencies/edgeR_3.6.2.tar.gz</package> </action> </actions> </install>