changeset 10:d36d60a17534

- Edited tool dependencies
author shian_su <registertonysu@gmail.com>
date Wed, 19 Nov 2014 11:59:37 +1100
parents cd53d419af28 (current diff) a084fa7939be (diff)
children b898ca3fca45
files tool_dependencies.xml
diffstat 3 files changed, 153 insertions(+), 62 deletions(-) [+]
line wrap: on
line diff
--- a/hairpinTool.R	Wed Nov 19 11:56:02 2014 +1100
+++ b/hairpinTool.R	Wed Nov 19 11:59:37 2014 +1100
@@ -42,6 +42,7 @@
 #       Smear Plot
 #       Barcode Plots (If Genewise testing was selected)
 #       Top Expression Table
+#       Feature Counts Table
 #       HTML file linking to the ouputs
 #
 # Author: Shian Su - registertonysu@gmail.com - Jan 2014
@@ -56,14 +57,26 @@
 library(edgeR, quietly=TRUE, warn.conflicts=FALSE)
 library(limma, quietly=TRUE, warn.conflicts=FALSE)
 
-if (packageVersion("edgeR") < "3.5.23") {
-  message("Please update 'edgeR' to version >= 3.5.23 to run this script")
+if (packageVersion("edgeR") < "3.5.27") {
+  stop("Please update 'edgeR' to version >= 3.5.23 to run this tool")
+}
+
+if (packageVersion("limma")<"3.19.19") {
+  message("Update 'limma' to version >= 3.19.19 to see updated barcode graphs")
 }
 
 ################################################################################
 ### Function declarations
 ################################################################################
 
+# Function to load libaries without messages
+silentLibrary <- function(...) {
+  list <- c(...)
+  for (package in list){
+    suppressPackageStartupMessages(library(package, character.only=TRUE))
+  }
+}
+
 # Function to sanitise contrast equations so there are no whitespaces
 # surrounding the arithmetic operators, leading or trailing whitespace
 sanitiseEquation <- function(equation) {
@@ -96,16 +109,16 @@
 # Function has string input and generates both a pdf and png output strings
 imgOut <- function(filename) {
   assign(paste0(filename, "Png"), makeOut(paste0(filename,".png")), 
-         envir = .GlobalEnv)
+         envir=.GlobalEnv)
   assign(paste0(filename, "Pdf"), makeOut(paste0(filename,".pdf")),
-         envir = .GlobalEnv)
+         envir=.GlobalEnv)
 }
 
 # Create cat function default path set, default seperator empty and appending
 # true by default (Ripped straight from the cat function with altered argument
 # defaults)
-cata <- function(..., file = htmlPath, sep = "", fill = FALSE, labels = NULL, 
-                 append = TRUE) {
+cata <- function(..., file=htmlPath, sep="", fill=FALSE, labels=NULL, 
+                 append=TRUE) {
   if (is.character(file)) 
     if (file == "") 
       file <- stdout()
@@ -182,6 +195,7 @@
 workMode <- as.character(argv[12])
 htmlPath <- as.character(argv[13])
 folderPath <- as.character(argv[14])
+
 if (workMode=="classic") {
   pairData <- character()
   pairData[2] <- as.character(argv[15])
@@ -273,7 +287,7 @@
 }
 
 # Generate output folder and paths
-dir.create(folderPath)
+dir.create(folderPath, showWarnings=FALSE)
 
 # Generate links for outputs
 imgOut("barHairpin")
@@ -295,17 +309,26 @@
     smearPng[i] <- makeOut(paste0("smear(", contrastData[i], ").png"))
     smearPdf[i] <- makeOut(paste0("smear(", contrastData[i], ").pdf"))
     topOut[i] <- makeOut(paste0("toptag(", contrastData[i], ").tsv"))
-    roastOut[i] <- makeOut(paste0("roast(", contrastData[i], ").tsv"))
+    roastOut[i] <- makeOut(paste0("gene_level(", contrastData[i], ").tsv"))
     barcodePng[i] <- makeOut(paste0("barcode(", contrastData[i], ").png"))
     barcodePdf[i] <- makeOut(paste0("barcode(", contrastData[i], ").pdf"))
   }
 }
+countsOut <- makeOut("counts.tsv")
+sessionOut <- makeOut("session_info.txt")
+
 # Initialise data for html links and images, table with the link label and
 # link address
 linkData <- data.frame(Label=character(), Link=character(),
                        stringsAsFactors=FALSE)
 imageData <- data.frame(Label=character(), Link=character(),
                         stringsAsFactors=FALSE)
+                        
+# Initialise vectors for storage of up/down/neutral regulated counts
+upCount <- numeric()
+downCount <- numeric()
+flatCount <- numeric()
+
 ################################################################################
 ### Data Processing
 ################################################################################
@@ -328,7 +351,7 @@
     selectVals <- as.numeric(unique(selectVals))
   } else {
     selectVals <- gsub(" ", "", selectVals, fixed=TRUE)
-    selectVals <- unlist(strsplit(selectVals, " "))
+    selectVals <- unlist(strsplit(selectVals, ","))
   }                                                           
 }
                                                   
@@ -447,6 +470,17 @@
 linkData <- rbind(linkData, newEntry)
 invisible(dev.off())
 
+# BCV Plot
+png(bcvPng, width=600, height=600)
+plotBCV(data, main="BCV Plot")
+imageData <- rbind(imageData, c("BCV Plot", "bcv.png"))
+invisible(dev.off())
+
+pdf(bcvPdf)
+plotBCV(data, main="BCV Plot")
+newEntry <- c("BCV Plot (.pdf)", "bcv.pdf")
+invisible(dev.off())
+
 if (workMode=="classic") {
   # Assess differential representation using classic exact testing methodology 
   # in edgeR
@@ -455,20 +489,30 @@
   top <- topTags(testData, n=Inf)
   topIDs <- top$table[(top$table$FDR < fdrThresh) &
                       (abs(top$table$logFC) > lfcThresh), 1]
+                      
   write.table(top, file=topOut, row.names=FALSE, sep="\t")
+  
   linkName <- paste0("Top Tags Table(", pairData[2], "-", pairData[1], 
                      ") (.tsv)")
   linkAddr <- paste0("toptag(", pairData[2], "-", pairData[1], ").tsv")
   linkData <- rbind(linkData, c(linkName, linkAddr))
   
+  upCount[1] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh)
+  downCount[1] <- sum(top$table$FDR < fdrThresh & 
+                      top$table$logFC < -lfcThresh)
+  flatCount[1] <- sum(top$table$FDR > fdrThresh |
+                      abs(top$table$logFC) < lfcThresh)
+  
+  
+  
   # Select hairpins with FDR < 0.05 to highlight on plot
   png(smearPng, width=600, height=600)
   plotTitle <- gsub(".", " ", 
                     paste0("Smear Plot: ", pairData[2], "-", pairData[1]),
-                    fixed = TRUE)
+                    fixed=TRUE)
   plotSmear(testData, de.tags=topIDs, 
             pch=20, cex=1.0, main=plotTitle)
-  abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2)
+  abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2)
   imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ")")
   imgAddr <- paste0("smear(", pairData[2], "-", pairData[1],").png")
   imageData <- rbind(imageData, c(imgName, imgAddr))
@@ -477,14 +521,15 @@
   pdf(smearPdf)
   plotTitle <- gsub(".", " ", 
                     paste0("Smear Plot: ", pairData[2], "-", pairData[1]),
-                    fixed = TRUE)
+                    fixed=TRUE)
   plotSmear(testData, de.tags=topIDs, 
             pch=20, cex=1.0, main=plotTitle)
-  abline(h = c(-1, 0, 1), col = c("dodgerblue", "yellow", "dodgerblue"), lty=2)
+  abline(h=c(-1, 0, 1), col=c("dodgerblue", "yellow", "dodgerblue"), lty=2)
   imgName <- paste0("Smear Plot(", pairData[2], "-", pairData[1], ") (.pdf)")
   imgAddr <- paste0("smear(", pairData[2], "-", pairData[1], ").pdf")
   linkData <- rbind(linkData, c(imgName, imgAddr))
   invisible(dev.off())
+  
 } else if (workMode=="glm") {
   # Generating design information
   factors <- factor(data$sample$group)
@@ -494,14 +539,15 @@
   
   # Split up contrasts seperated by comma into a vector
   contrastData <- unlist(strsplit(contrastData, split=","))
+  
   for (i in 1:length(contrastData)) {
     # Generate contrasts information
     contrasts <- makeContrasts(contrasts=contrastData[i], levels=design)
     
     # Fit negative bionomial GLM
-    fit = glmFit(data, design)
+    fit <- glmFit(data, design)
     # Carry out Likelihood ratio test
-    testData = glmLRT(fit, contrast=contrasts)
+    testData <- glmLRT(fit, contrast=contrasts)
     
     # Select hairpins with FDR < 0.05 to highlight on plot
     top <- topTags(testData, n=Inf)
@@ -513,6 +559,13 @@
     linkAddr <- paste0("toptag(", contrastData[i], ").tsv")
     linkData <- rbind(linkData, c(linkName, linkAddr))
     
+    # Collect counts for differential representation
+    upCount[i] <- sum(top$table$FDR < fdrThresh & top$table$logFC > lfcThresh)
+    downCount[i] <- sum(top$table$FDR < fdrThresh & 
+                        top$table$logFC < -lfcThresh)
+    flatCount[i] <- sum(top$table$FDR > fdrThresh |
+                        abs(top$table$logFC) < lfcThresh)
+    
     # Make a plot of logFC versus logCPM
     png(smearPng[i], height=600, width=600)
     plotTitle <- paste("Smear Plot:", gsub(".", " ", contrastData[i], 
@@ -548,7 +601,7 @@
     
     if (wantRoast) {
       # Input preparaton for roast
-      nrot = 9999
+      nrot <- 9999
       set.seed(602214129)
       roastData <- mroast(data, index=geneList, design=design,
                          contrast=contrasts, nrot=nrot)
@@ -556,7 +609,7 @@
       write.table(roastData, file=roastOut[i], row.names=FALSE, sep="\t")
       linkName <- paste0("Gene Level Analysis Table(", contrastData[i], 
                          ") (.tsv)")
-      linkAddr <- paste0("roast(", contrastData[i], ").tsv")
+      linkAddr <- paste0("gene_level(", contrastData[i], ").tsv")
       linkData <- rbind(linkData, c(linkName, linkAddr))
       if (selectOpt=="rank") {
         selectedGenes <- rownames(roastData)[selectVals]
@@ -599,6 +652,27 @@
   }
 }
 
+# Generate data frame of the significant differences
+sigDiff <- data.frame(Up=upCount, Flat=flatCount, Down=downCount)
+if (workMode == "glm") {
+  row.names(sigDiff) <- contrastData
+} else if (workMode == "classic") {
+  row.names(sigDiff) <- paste0(pairData[2], "-", pairData[1])
+}
+
+# Output table of summarised counts
+ID <- rownames(data$counts)
+outputCounts <- cbind(ID, data$counts)
+write.table(outputCounts, file=countsOut, row.names=FALSE, sep="\t",
+            quote=FALSE)
+linkName <- "Counts table (.tsv)"
+linkAddr <- "counts.tsv"
+linkData <- rbind(linkData, c(linkName, linkAddr))
+
+# Record session info
+writeLines(capture.output(sessionInfo()), sessionOut)
+linkData <- rbind(linkData, c("Session Info", "session_info.txt"))
+
 # Record ending time and calculate total run time
 timeEnd <- as.character(Sys.time())
 timeTaken <- capture.output(round(difftime(timeEnd,timeStart), digits=3))
@@ -620,12 +694,12 @@
   ListItem(hpReadout[1])
   ListItem(hpReadout[2])
   cata("</ul>\n")
-  cata(hpReadout[3], "<br/>\n")
+  cata(hpReadout[3], "<br />\n")
   cata("<ul>\n")
   ListItem(hpReadout[4])
   ListItem(hpReadout[7])
   cata("</ul>\n")
-  cata(hpReadout[8:11], sep="<br/>\n")
+  cata(hpReadout[8:11], sep="<br />\n")
   cata("<br />\n")
   cata("<b>Please check that read percentages are consistent with ")
   cata("expectations.</b><br >\n")
@@ -642,8 +716,7 @@
      commonBCV, "<br />\n")
 
 cata("<h4>Output:</h4>\n")
-cata("All images displayed have PDF copy at the bottom of the page, these can ")
-cata("exported in a pdf viewer to high resolution image format. <br/>\n")
+cata("PDF copies of JPEGS available in 'Plots' section.<br />\n")
 for (i in 1:nrow(imageData)) {
   if (grepl("barcode", imageData$Link[i])) {
     if (packageVersion("limma")<"3.19.19") {
@@ -657,11 +730,30 @@
     HtmlImage(imageData$Link[i], imageData$Label[i])
   }
 }
-cata("<br/>\n")
+cata("<br />\n")
+
+cata("<h4>Differential Representation Counts:</h4>\n")
+
+cata("<table border=\"1\" cellpadding=\"4\">\n")
+cata("<tr>\n")
+TableItem()
+for (i in colnames(sigDiff)) {
+  TableHeadItem(i)
+}
+cata("</tr>\n")
+for (i in 1:nrow(sigDiff)) {
+  cata("<tr>\n")
+  TableHeadItem(unmake.names(row.names(sigDiff)[i]))
+  for (j in 1:ncol(sigDiff)) {
+    TableItem(as.character(sigDiff[i, j]))
+  }
+  cata("</tr>\n")
+}
+cata("</table>")
 
 cata("<h4>Plots:</h4>\n")
 for (i in 1:nrow(linkData)) {
-  if (!grepl(".tsv", linkData$Link[i])) {
+  if (grepl(".pdf", linkData$Link[i])) {
     HtmlLink(linkData$Link[i], linkData$Label[i])
   }
 }
@@ -673,12 +765,10 @@
   }
 }
 
-cata("<p>alt-click any of the links to download the file, or click the name ")
-cata("of this task in the galaxy history panel and click on the floppy ")
-cata("disk icon to download all files in a zip archive.</p>\n")
-cata("<p>.tsv files are tab seperated files that can be viewed using Excel ")
-cata("or other spreadsheet programs</p>\n")
-cata("<table border=\"0\">\n")
+cata("<p>Alt-click links to download file.</p>\n")
+cata("<p>Click floppy disc icon associated history item to download ")
+cata("all files.</p>\n")
+cata("<p>.tsv files can be viewed in Excel or any spreadsheet program.</p>\n")
 
 cata("<h4>Additional Information:</h4>\n")
 
@@ -689,9 +779,9 @@
 }
 
 if (cpmReq!=0 && sampleReq!=0) {
-  tempStr <- paste("Hairpins that do not have more than", cpmReq,
-                   "CPM in at least", sampleReq, "samples are considered",
-                   "insignificant and filtered out.")
+  tempStr <- paste("Hairpins without more than", cpmReq,
+                   "CPM in at least", sampleReq, "samples are insignificant",
+                   "and filtered out.")
   ListItem(tempStr)
   filterProp <- round(filteredCount/preFilterCount*100, digits=2)
   tempStr <- paste0(filteredCount, " of ", preFilterCount," (", filterProp,
@@ -705,8 +795,6 @@
   ListItem("A generalised linear model was fitted to each hairpin.")
 }
 
-
-
 cit <- character()
 link <-character()
 link[1] <- paste0("<a href=\"",
@@ -742,6 +830,14 @@
 ListItem(cit[4])
 cata("</ol>\n")
 
+cata("<p>Report problems to: su.s@wehi.edu.au</p>\n")
+
+for (i in 1:nrow(linkData)) {
+  if (grepl("session_info", linkData$Link[i])) {
+    HtmlLink(linkData$Link[i], linkData$Label[i])
+  }
+}
+
 cata("<table border=\"0\">\n")
 cata("<tr>\n")
 TableItem("Task started at:"); TableItem(timeStart)
--- a/hairpinTool.xml	Wed Nov 19 11:56:02 2014 +1100
+++ b/hairpinTool.xml	Wed Nov 19 11:59:37 2014 +1100
@@ -1,11 +1,11 @@
-<tool id="shRNAseq" name="shRNAseq Tool" version="1.0.6">
+<tool id="shRNAseq" name="shRNAseq Tool" version="1.0.10">
   <description>
     Analyse hairpin differential representation using edgeR
   </description>
     
   <requirements>
-    <requirement type="R-module" version="3.5.27">edgeR</requirement>
-    <requirement type="R-module" version="3.18.13">limma</requirement>
+    <requirement type="package" version="3.0.3">R</requirement>
+    <requirement type="package" version="1.0.10">shrnaseq</requirement>
   </requirements>
   
   <stdio>
@@ -35,8 +35,8 @@
                   #end if
                 #else:
                   $inputOpt.counts
-                  $inputOpt.anno
-                  $inputOpt.factors
+                  $inputOpt.hairpin
+                  $inputOpt.samples
                   0 0 0
                 #end if
           
@@ -57,7 +57,7 @@
                 #if $workMode.mode=="classic":
                   "$workMode.pair1"
                   "$workMode.pair2"
-                #else:
+                #elif $workMode.mode=="glm":
                   "$workMode.contrast"
                   $workMode.roast.roastOption
                   #if $workMode.roast.roastOption=="yes":
@@ -118,9 +118,9 @@
       
       <when value="counts">
         <param name="counts" type="data" format="tabular" label="Counts Table"/>
-        <param name="anno" type="data" format="tabular" 
+        <param name="hairpin" type="data" format="tabular" 
                label="Hairpin Annotation"/>
-        <param name="factors" type="data" format="tabular"
+        <param name="samples" type="data" format="tabular"
                label="Sample Annotation"/> 
       </when>
     </conditional>
@@ -232,18 +232,6 @@
   <outputs>
     <data format="html" name="outFile" label="shRNAseq Analysis"/>
   </outputs>
-  <!-- Tests under construction
-  <tests>
-    <test>
-      <param name="counts" value="zuber_count.txt"/>
-      <param name="anno" value="zuber_anno.txt"/>
-      <param name="factors" value="zuber_samples.txt"/>
-      <param name="fdr" value="0.05"/>
-      <output name="output" file="shRNAseq_Analysis_html" lines_diff="50"/>
-    </test>
-    
-  </tests>
-  -->
   <help>
 .. class:: infomark
 
@@ -254,6 +242,11 @@
 reads. This tool will generate plots and tables for the analysis of differential
 representation.
 
+.. class:: infomark
+
+A tutorial of how to use this tool is available at:
+http://bioinf.wehi.edu.au/shRNAseq/galaxy.html
+
 -----
 
 .. class:: infomark
@@ -412,6 +405,8 @@
 	* McCarthy DJ, Chen Y and Smyth GK (2012). Differential expression analysis 
 	  of multifactor RNA-Seq experiments with respect to biological variation. 
 	  Nucleic Acids Research 40, 4288-4297
+	  
+Report problems to: su.s@wehi.edu.au
 
 .. _edgeR: http://www.bioconductor.org/packages/release/bioc/html/edgeR.html
 .. _limma: http://www.bioconductor.org/packages/release/bioc/html/limma.html
--- a/tool_dependencies.xml	Wed Nov 19 11:56:02 2014 +1100
+++ b/tool_dependencies.xml	Wed Nov 19 11:59:37 2014 +1100
@@ -1,20 +1,20 @@
 <?xml version="1.0"?>
 <tool_dependency>
-        <package name="R_3_1_1" version="3.1.1">
-        <repository name="package_r_3_1_1" owner="fubar" prior_installation_required="True" />
+    <package name="R" version="3.0.3">
+        <repository name="package_r_3_0_3" owner="iuc" prior_installation_required="True" />
     </package>
     <package name="shrnaseq" version="1.0.10">
         <install version="1.0">
             <actions>
                 <action type="setup_r_environment">
 
-                    <repository name="package_r_3_1_1" owner="fubar">
-                        <package name="R_3_1_1" version="3.1.1" />
+                    <repository name="package_r_3_0_3" owner="iuc">
+                        <package name="R" version="3.0.3" />
                     </repository>
-                    <package>https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/locfit_1.5-9.1.tar.gz</package>
-                    <package>https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/statmod_1.4.20.tar.gz</package>
-                    <package>https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/limma_3.20.9.tar.gz</package>
-                    <package>https://github.com/fubar2/galaxy_tool_source/blob/master/RELEASE_2_14/edgeR_3.6.8.tar.gz</package>
+                    <package>https://github.com/Shians/diffexp/tree/master/dependencies/locfit_1.5-9.1.tar.gz</package>
+                    <package>https://github.com/Shians/diffexp/tree/master/dependencies/statmod_1.4.20.tar.gz</package>
+                    <package>https://github.com/Shians/diffexp/tree/master/dependencies/limma_3.20.7.tar.gz</package>
+                    <package>https://github.com/Shians/diffexp/tree/master/dependencies/edgeR_3.6.2.tar.gz</package>
                 </action>
             </actions>
         </install>