changeset 1:b81ab8b769f0 draft

update
author mingchen0919
date Tue, 27 Feb 2018 23:55:18 -0500
parents 5bf8b4d47190
children 706ecfe0bac3
files DESeq.xml DESeq_01.Rmd DESeq_02.Rmd DESeq_03.Rmd DESeq_04.Rmd DESeq_05.Rmd DESeq_render.R DESeq_results.xml DESeq_results_index.Rmd DESeq_results_render.R DESeq_results_site.yml DESeq_site.yml
diffstat 11 files changed, 202 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/DESeq.xml	Tue Feb 27 22:54:20 2018 -0500
+++ b/DESeq.xml	Tue Feb 27 23:55:18 2018 -0500
@@ -1,6 +1,6 @@
 <tool name="DESeq2: Analysis" id='deseq2' version="2.0.1">
     <description>
-        "some description"
+        perform differential expression analysis
     </description>
     <requirements>
         <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_01.Rmd	Tue Feb 27 23:55:18 2018 -0500
@@ -0,0 +1,30 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+# `DESeqDataSet` object
+
+```{r 'DESeqDataSet object'}
+count_file_paths = strsplit(opt$X_P, ',')[[1]]
+count_file_names = strsplit(opt$X_N, ',')[[1]]
+sample_table = read.table(opt$X_S, header = TRUE)
+row.names(sample_table) = sample_table[,2]
+sample_table = sample_table[count_file_names, ]
+
+## copy count files into OUTPUT_DIR/counts
+dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE)
+file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE)
+
+## DESeqDataSet object
+dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table,
+                                 directory = paste0(OUTPUT_DIR, '/counts'),
+                                 design = formula(opt$X_p))
+dds
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_02.Rmd	Tue Feb 27 23:55:18 2018 -0500
@@ -0,0 +1,27 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+
+
+# Pre-filtering the dataset.
+
+We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed.
+
+* Number of rows before pre-filtering
+```{r}
+nrow(dds)
+```
+
+* Number of rows after pre-filtering
+```{r}
+dds = dds[rowSums(counts(dds)) > 1, ]
+nrow(dds)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_03.Rmd	Tue Feb 27 23:55:18 2018 -0500
@@ -0,0 +1,27 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+
+# Peek at data {.tabset}
+
+## Count Data
+
+```{r 'count data'}
+datatable(head(counts(dds), 100), style="bootstrap", 
+          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
+
+## Sample Table 
+
+```{r 'sample table'}
+datatable(sample_table, style="bootstrap",
+          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_04.Rmd	Tue Feb 27 23:55:18 2018 -0500
@@ -0,0 +1,29 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+
+
+# Sample distance on variance stabilized data {.tabset}
+
+## `rlog` Stabilizing transformation
+
+```{r}
+rld = rlog(dds, blind = FALSE)
+datatable(head(assay(rld), 100), style="bootstrap", 
+          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
+
+## Sample distance
+
+```{r}
+sampleDists <- dist(t(assay(rld)))
+sampleDists
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_05.Rmd	Tue Feb 27 23:55:18 2018 -0500
@@ -0,0 +1,23 @@
+---
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+
+# Differential expression analysis
+
+```{r}
+dds <- DESeq(dds)
+```
+
+```{r echo=FALSE}
+# save useful objects.
+save(dds, rld, sample_table, sampleDists, zz, file=opt$X_w)
+```
+
--- a/DESeq_render.R	Tue Feb 27 22:54:20 2018 -0500
+++ b/DESeq_render.R	Tue Feb 27 23:55:18 2018 -0500
@@ -13,23 +13,26 @@
 library(DESeq2)
 library(pheatmap)
 library(DT)
+library(ggplot2)
+library(genefilter)
+library(RColorBrewer)
 #------------------------------------------------
 
 
 #------------get arguments into R--------------------
-# getopt_specification_matrix(extract_short_flags('')) %>%
+# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>%
 #   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
 
 
 spec_matrix = as.matrix(
   data.frame(stringsAsFactors=FALSE,
-              long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_P", "X_N",
-                             "X_S", "X_p", "X_w"),
+             long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_P", "X_N",
+                            "X_S", "X_p", "X_w"),
              short_flags = c("e", "o", "d", "s", "t", "P", "N", "S", "p", "w"),
-     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
-         data_type_flags = c("character", "character", "character", "character",
-                             "character", "character", "character",
-                             "character", "character", "character")
+             argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
+             data_type_flags = c("character", "character", "character", "character",
+                                 "character", "character", "character",
+                                 "character", "character", "character")
   )
 )
 opt = getopt(spec_matrix)
@@ -42,22 +45,39 @@
 #----------------------------------------------------
 
 #---------- often used variables ----------------
+# OUTPUT_REPORT: path to galaxy output report
 # OUTPUT_DIR: path to the output associated directory, which stores all outputs
 # TOOL_DIR: path to the tool installation directory
-# RMD_NAME: name of Rmd file to be rendered
-# OUTPUT_REPORT: path to galaxy output report
 OUTPUT_DIR = opt$X_d
 TOOL_DIR =   opt$X_t
-RMD_NAME = 'DESeq.Rmd'
 OUTPUT_REPORT = opt$X_o
 
+
 # create the output associated directory to store all outputs
 dir.create(OUTPUT_DIR, recursive = TRUE)
 
-#-----------------render Rmd--------------
-render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT)
+#-----------------render site--------------
+# copy site generating materials into OUTPUT_DIR
+dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
+command_cp = paste0('cp -r ', TOOL_DIR, '/DESeq_0*.Rmd ', OUTPUT_DIR, '/site_generator')
+system(command_cp)
+system(paste0('cp -r ', TOOL_DIR, '/DESeq_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml'))
+system(paste0('cp -r ', TOOL_DIR, '/DESeq_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
+# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file
+dir.create(paste0(OUTPUT_DIR, '/_site'))
+render_site(input = paste0(OUTPUT_DIR, '/site_generator'))
+# remove site generating materials from output associated directory
+print(unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE))
+# move _site/* into output associated directory
+move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR)
+system(move_cmd)
 #------------------------------------------
 
+#-----link index.html to output-----
+cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT)
+system(cp_index)
+#-----------------------------------
+
 #==============the end==============
 
 
--- a/DESeq_results.xml	Tue Feb 27 22:54:20 2018 -0500
+++ b/DESeq_results.xml	Tue Feb 27 23:55:18 2018 -0500
@@ -8,7 +8,7 @@
         <requirement type="package" version="1.0.8">r-pheatmap</requirement>
     </requirements>
     <description>
-        An R Markdown tool to display DESeq analysis.
+        display DESeq2 analysis results.
     </description>
     <stdio>
         <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won't be interrupted during testing-->
--- a/DESeq_results_render.R	Tue Feb 27 22:54:20 2018 -0500
+++ b/DESeq_results_render.R	Tue Feb 27 23:55:18 2018 -0500
@@ -63,8 +63,8 @@
 dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
 command_cp = paste0('cp -r ', TOOL_DIR, '/DESeq_results_*.Rmd ', OUTPUT_DIR, '/site_generator')
 system(command_cp)
-system(paste0('cp -r ', TOOL_DIR, '/DESeq_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml'))
-system(paste0('cp -r ', TOOL_DIR, '/DESeq_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
+system(paste0('cp -r ', TOOL_DIR, '/DESeq_results_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml'))
+system(paste0('cp -r ', TOOL_DIR, '/DESeq_results_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
 # render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file
 dir.create(paste0(OUTPUT_DIR, '/_site'))
 render_site(input = paste0(OUTPUT_DIR, '/site_generator'))
@@ -76,7 +76,7 @@
 #------------------------------------------
 
 #-----link index.html to output-----
-cp_index = paste0('cp ', OUTPUT_DIR, '/_site/index.html ', OUTPUT_REPORT)
+cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT)
 system(cp_index)
 #-----------------------------------
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_results_site.yml	Tue Feb 27 23:55:18 2018 -0500
@@ -0,0 +1,21 @@
+name: "Analysis Report"
+output_dir: "../_site"
+navbar:
+    title: ""
+    type: inverse
+    left:
+        - text: "Home"
+          icon: fa-home
+          href: index.html
+        - text: "Results"
+          href: DESeq_results_01.html
+        - text: "MA-plot"
+          href: DESeq_results_02.html
+        - text: "Histogram of p values"
+          href: DESeq_results_03.html
+        - text: "Visualization"
+          href: DESeq_results_04.html
+output:
+  html_document:
+    theme: cosmo
+    highlight: textmate
\ No newline at end of file
--- a/DESeq_site.yml	Tue Feb 27 22:54:20 2018 -0500
+++ b/DESeq_site.yml	Tue Feb 27 23:55:18 2018 -0500
@@ -7,14 +7,14 @@
         - text: "Home"
           icon: fa-home
           href: index.html
-        - text: "Results"
-          href: DESeq_results_01.html
-        - text: "MA-plot"
-          href: DESeq_results_02.html
-        - text: "Histogram of p values"
-          href: DESeq_results_03.html
-        - text: "Visualization"
-          href: DESeq_results_04.html
+        - text: "DESeqDataSet"
+          href: DESeq_01.html
+        - text: "Pre-filtering"
+          href: DESeq_02.html
+        - text: "Data"
+          href: DESeq_03.html
+        - text: "Sample distance"
+          href: DESeq_04.html
 output:
   html_document:
     theme: cosmo