Mercurial > repos > mingchen0919 > aurora_deseq2_site

--- a/DESeq.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
----
-title: 'DESeq2: Perform DESeq analysis'
-output:
-    html_document:
-      number_sections: true
-      toc: true
-      theme: cosmo
-      highlight: tango
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-# `DESeqDataSet` object
-
-```{r 'DESeqDataSet object'}
-count_file_paths = strsplit(opt$X_P, ',')[[1]]
-count_file_names = strsplit(opt$X_N, ',')[[1]]
-sample_table = read.table(opt$X_S, header = TRUE)
-row.names(sample_table) = sample_table[,2]
-sample_table = sample_table[count_file_names, ]
-
-## copy count files into OUTPUT_DIR/counts
-dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE)
-file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE)
-
-## DESeqDataSet object
-dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table,
-                                 directory = paste0(OUTPUT_DIR, '/counts'),
-                                 design = formula(opt$X_p))
-dds
-```
-
-# Pre-filtering the dataset.
-
-We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed.
-
-* Number of rows before pre-filtering
-```{r}
-nrow(dds)
-```
-
-* Number of rows after pre-filtering
-```{r}
-dds = dds[rowSums(counts(dds)) > 1, ]
-nrow(dds)
-```
-
-# Peek at data {.tabset}
-
-## Count Data
-
-```{r 'count data'}
-datatable(head(counts(dds), 100), style="bootstrap",
-          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
-```
-
-## Sample Table
-
-```{r 'sample table'}
-datatable(sample_table, style="bootstrap",
-          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
-```
-
-# Sample distance on variance stabilized data {.tabset}
-
-## `rlog` Stabilizing transformation
-
-```{r}
-rld = rlog(dds, blind = FALSE)
-datatable(head(assay(rld), 100), style="bootstrap",
-          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
-```
-
-## Sample distance
-
-```{r}
-sampleDists <- dist(t(assay(rld)))
-sampleDists
-```
-
-# Differential expression analysis
-
-```{r}
-dds <- DESeq(dds)
-```
-
-```{r echo=FALSE}
-# save objects except for opt.
-save(list=ls()[ls() != "opt"], file=opt$X_w)
-```
-
-
--- a/DESeq.xml	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,106 +0,0 @@
-<tool name="DESeq2: Analysis" id='deseq2' version="2.0.1">
-    <description>
-        perform differential expression analysis
-    </description>
-    <requirements>
-        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
-        <requirement type="package" version="1.20.0">r-getopt</requirement>
-        <requirement type="package" version="1.6">r-rmarkdown</requirement>
-        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
-        <requirement type="package" version="0.2">r-dt</requirement>
-        <requirement type="package" version="1.0.8">r-pheatmap</requirement>
-    </requirements>
-    <stdio>
-        <regex match="XXX" source="stderr" level="warning"
-               description="Check the warnings_and_errors.txt file for more details."/>
-    </stdio>
-    <command><![CDATA[
-
-
-        Rscript '${__tool_directory__}/DESeq_render.R'
-
-            -e $echo
-
-		    -o $report
-		    -d $report.files_path
-		    -s $sink_message
-		    -t '${__tool_directory__}'
-
-
-		    ##----- code chunk to get file paths and raw file names for a multiple inputs data field ----
-            #set $sep = ''
-            #set $count_file_paths = ''
-            #set $count_file_names = ''
-            #for $count_file in $count_files:
-                #set $count_file_paths += $sep + str($count_file)
-                #set $count_file_names += $sep + str($count_file.name)
-                #set $sep = ','
-            #end for
-            ##----------------- end for getting file names and file paths ------------------------------
-            -P '$count_file_paths'
-            -N '$count_file_names'
-            -S $sample_table
-            -p '$design_formula'
-            -w $deseq_workspace
-
-
-    ]]></command>
-    <inputs>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
-               label="Display analysis code in report?"/>
-        <param type="data" name="count_files" format="txt" multiple="true" label="Count files from htseq-count"/>
-        <param type="data" name="sample_table" format="txt" multiple="false" label="sample table file"
-               help="The sample table file contains a table. The first column is the sample name, the second column is
-                    the count file name and the rest of columns are treatment columns. The file names in this table have
-                    to be in the same order as the count files uploaded in the previous step. "/>
-        <param type="text" name="design_formula" value="~ condition_1 + condition_2" label="Design formula"
-               help="The simplest design formula for differential expression would be ~ condition, where condition
-                     is a column in colData(dds) that specifies which of two (or more groups) the samples belong to">
-            <sanitizer>
-                <valid initial="default">
-                    <add preset="string.printable"/>
-                    <add value="~"/>
-                </valid>
-            </sanitizer>
-        </param>
-    </inputs>
-    <outputs>
-        <data name="report" format="html" label="DESeq Analysis on ${on_string}"/>
-        <data format="txt" name="sink_message" label="Warnings and Errors on"
-              from_work_dir="warnings_and_errors.txt"/>
-        <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis on ${on_string}"/>
-    </outputs>
-    <citations>
-        <citation type="bibtex">
-            @article{love2014moderated,
-            title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
-            author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
-            journal={Genome biology},
-            volume={15},
-            number={12},
-            pages={550},
-            year={2014},
-            publisher={BioMed Central}
-            }
-        </citation>
-        <citation type="bibtex"><![CDATA[
-            @article{allaire2016rmarkdown,
-            title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
-            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
-            journal={R package version 0.9},
-            volume={6},
-            year={2016}
-            }
-        ]]></citation>
-        <citation type="bibtex"><![CDATA[
-            @book{xie2015dynamic,
-            title={Dynamic Documents with R and knitr},
-            author={Xie, Yihui},
-            volume={29},
-            year={2015},
-            publisher={CRC Press}
-            }
-        ]]></citation>
-    </citations>
-</tool>
--- a/DESeq_01.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-# `DESeqDataSet` object
-
-```{r 'DESeqDataSet object'}
-count_file_paths = strsplit(opt$X_P, ',')[[1]]
-count_file_names = strsplit(opt$X_N, ',')[[1]]
-sample_table = read.table(opt$X_S, header = TRUE)
-row.names(sample_table) = sample_table[,2]
-sample_table = sample_table[count_file_names, ]
-
-## copy count files into OUTPUT_DIR/counts
-dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE)
-file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE)
-
-## DESeqDataSet object
-dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table,
-                                 directory = paste0(OUTPUT_DIR, '/counts'),
-                                 design = formula(opt$X_p))
-dds
-```
--- a/DESeq_02.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-
-
-# Pre-filtering the dataset.
-
-We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed.
-
-* Number of rows before pre-filtering
-```{r}
-nrow(dds)
-```
-
-* Number of rows after pre-filtering
-```{r}
-dds = dds[rowSums(counts(dds)) > 1, ]
-nrow(dds)
-```
--- a/DESeq_03.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-
-# Peek at data {.tabset}
-
-## Count Data
-
-```{r 'count data'}
-datatable(head(counts(dds), 100), style="bootstrap",
-          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
-```
-
-## Sample Table
-
-```{r 'sample table'}
-datatable(sample_table, style="bootstrap",
-          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
-```
--- a/DESeq_04.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-
-
-# Sample distance on variance stabilized data {.tabset}
-
-## `rlog` Stabilizing transformation
-
-```{r}
-rld = rlog(dds, blind = FALSE)
-datatable(head(assay(rld), 100), style="bootstrap",
-          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
-```
-
-## Sample distance
-
-```{r}
-sampleDists <- dist(t(assay(rld)))
-sampleDists
-```
--- a/DESeq_05.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,23 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-
-# Differential expression analysis
-
-```{r}
-dds <- DESeq(dds)
-```
-
-```{r echo=FALSE}
-# save useful objects.
-save(dds, rld, sample_table, sampleDists, zz, file=opt$X_w)
-```
-
--- a/DESeq_render.R	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-##============ Sink warnings and errors to a file ==============
-## use the sink() function to wrap all code within it.
-##==============================================================
-zz = file('warnings_and_errors.txt')
-sink(zz)
-sink(zz, type = 'message')
-
-#------------import libraries--------------------
-options(stringsAsFactors = FALSE)
-
-library(getopt)
-library(rmarkdown)
-library(DESeq2)
-library(pheatmap)
-library(DT)
-library(ggplot2)
-library(genefilter)
-library(RColorBrewer)
-#------------------------------------------------
-
-
-#------------get arguments into R--------------------
-# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>%
-#   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
-
-
-spec_matrix = as.matrix(
-  data.frame(stringsAsFactors=FALSE,
-             long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_P", "X_N",
-                            "X_S", "X_p", "X_w"),
-             short_flags = c("e", "o", "d", "s", "t", "P", "N", "S", "p", "w"),
-             argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
-             data_type_flags = c("character", "character", "character", "character",
-                                 "character", "character", "character",
-                                 "character", "character", "character")
-  )
-)
-opt = getopt(spec_matrix)
-#----------------------------------------------------
-
-
-#-----------using passed arguments in R
-#           to define system environment variables---
-do.call(Sys.setenv, opt[-1])
-#----------------------------------------------------
-
-#---------- often used variables ----------------
-# OUTPUT_REPORT: path to galaxy output report
-# OUTPUT_DIR: path to the output associated directory, which stores all outputs
-# TOOL_DIR: path to the tool installation directory
-OUTPUT_DIR = opt$X_d
-TOOL_DIR =   opt$X_t
-OUTPUT_REPORT = opt$X_o
-
-
-# create the output associated directory to store all outputs
-dir.create(OUTPUT_DIR, recursive = TRUE)
-
-#-----------------render site--------------
-# copy site generating materials into OUTPUT_DIR
-dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
-command_cp = paste0('cp -r ', TOOL_DIR, '/DESeq_0*.Rmd ', OUTPUT_DIR, '/site_generator')
-system(command_cp)
-system(paste0('cp -r ', TOOL_DIR, '/DESeq_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml'))
-system(paste0('cp -r ', TOOL_DIR, '/DESeq_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
-# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file
-dir.create(paste0(OUTPUT_DIR, '/_site'))
-render_site(input = paste0(OUTPUT_DIR, '/site_generator'))
-# remove site generating materials from output associated directory
-print(unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE))
-# move _site/* into output associated directory
-move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR)
-system(move_cmd)
-#------------------------------------------
-
-#-----link index.html to output-----
-cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT)
-system(cp_index)
-#-----------------------------------
-
-#==============the end==============
-
-
-##--------end of code rendering .Rmd templates----------------
-sink()
-##=========== End of sinking output=============================
\ No newline at end of file
--- a/DESeq_results.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,109 +0,0 @@
----
-title: 'DESeq2: Results'
-output:
-    html_document:
-      number_sections: true
-      toc: true
-      theme: cosmo
-      highlight: tango
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-
-```{r eval=TRUE}
-# Import workspace
-# fcp = file.copy(opt$X_W, "deseq.RData")
-load(opt$X_W)
-```
-
-# Results {.tabset}
-
-## Result table
-
-```{r}
-cat('--- View the top 100 rows of the result table ---')
-res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K))
-write.csv(as.data.frame(res), file = opt$X_R)
-res_df = as.data.frame(res)[1:100, ]
-datatable(res_df, style="bootstrap", filter = 'top',
-          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
-```
-
-## Result summary
-
-```{r}
-summary(res)
-```
-
-
-# MA-plot {.tabset}
-
-
-
-```{r}
-cat('--- Shrinked with Bayesian procedure ---')
-plotMA(res)
-```
-
-
-# Histogram of p values
-
-```{r}
-hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20,
-     col = "grey50", border = "white", main = "",
-     xlab = "Mean normalized count larger than 1")
-```
-
-
-# Visualization {.tabset}
-## Gene clustering
-
-```{r}
-clustering_groups = strsplit(opt$X_M, ',')[[1]]
-
-topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20)
-mat  <- assay(rld)[ topVarGenes, ]
-mat  <- mat - rowMeans(mat)
-annotation_col <- as.data.frame(colData(rld)[, clustering_groups])
-colnames(annotation_col) = clustering_groups
-rownames(annotation_col) = colnames(mat)
-pheatmap(mat, annotation_col = annotation_col)
-```
-
-## Sample-to-sample distance
-
-```{r}
-sampleDistMatrix <- as.matrix( sampleDists )
-colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
-pheatmap(sampleDistMatrix,
-         clustering_distance_cols = sampleDists,
-         col = colors)
-```
-
-## PCA plot
-
-```{r}
-plotPCA(rld, intgroup = clustering_groups)
-```
-
-## MDS plot {.tabset}
-
-### Data table
-```{r}
-mds <- as.data.frame(colData(rld))  %>%
-         cbind(cmdscale(sampleDistMatrix))
-knitr::kable(mds)
-```
-
-### Plot
-```{r}
-ggplot(mds, aes(x = `1`, y = `2`, col = time)) +
-  geom_point(size = 3) + coord_fixed()
-```
-
--- a/DESeq_results.xml	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,99 +0,0 @@
-<tool id="deseq_results" name="DESeq2: Results" version="2.0.1">
-    <requirements>
-        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
-        <requirement type="package" version="1.20.0">r-getopt</requirement>
-        <requirement type="package" version="1.6">r-rmarkdown</requirement>
-        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
-        <requirement type="package" version="0.2">r-dt</requirement>
-        <requirement type="package" version="1.0.8">r-pheatmap</requirement>
-    </requirements>
-    <description>
-        display DESeq2 analysis results.
-    </description>
-    <stdio>
-        <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won't be interrupted during testing-->
-        <regex match="XXX"
-               source="stderr"
-               level="warning"
-               description="Check the warnings_and_errors.txt file for more details."/>
-    </stdio>
-    <command>
-        <![CDATA[
-
-        Rscript '${__tool_directory__}/DESeq_results_render.R'
-
-            -e $echo
-		    -o $report
-		    -d $report.files_path
-		    -s $sink_message
-		    -t '${__tool_directory__}'
-
-            ## 1. input data
-
-            -W $deseq_workspace
-            -C '$contrast_factor'
-            -T '$treatment'
-            -K '$condition'
-
-            -M '$clustering_factors'
-
-            ## 2. output report and report site directory
-            -R $deseq_results
-
-
-        ]]>
-    </command>
-    <inputs>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
-               label="Display analysis code in report?"/>
-        <param type="data" name="deseq_workspace" format="rdata" multiple="false" optional="false"
-               label="Workspace from tool DESeq2: DESeq"/>
-        <param type="text" name="contrast_factor" label="Factor" optional="false"
-               help="the name of a factor in the design formula"/>
-        <param type="text" name="treatment" label="Treatment level" optional="false"
-               help=" the name of the numerator level for the fold change"/>
-        <param type="text" name="condition" label="Condition level" optional="false"
-               help=" the name of the denominator level for the fold change"/>
-        <param type="text" name="clustering_factors" title="Gene clustering factors" optional="false"
-               label="factors of interest for clustering samples and PCA plot"
-               help="A single factor or multiple factors from the design formula. Multiple factors are separated by comma (,)."/>
-    </inputs>
-    <outputs>
-        <data format="html" name="report" label="DESeq results report on ${on_string}" />
-        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
-        <data format="csv" name="deseq_results" label="DESeq results on ${on_string}" from_work_dir="deseq_results.csv" />
-    </outputs>
-    <citations>
-        <citation type="bibtex">
-            @article{love2014moderated,
-            title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
-            author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
-            journal={Genome biology},
-            volume={15},
-            number={12},
-            pages={550},
-            year={2014},
-            publisher={BioMed Central}
-            }
-        </citation>
-        <citation type="bibtex">
-            @article{allaire2016rmarkdown,
-            title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
-            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
-            journal={R package version 0.9},
-            volume={6},
-            year={2016}
-            }
-        </citation>
-        <citation type="bibtex">
-            @book{xie2015dynamic,
-            title={Dynamic Documents with R and knitr},
-            author={Xie, Yihui},
-            volume={29},
-            year={2015},
-            publisher={CRC Press}
-            }
-        </citation>
-    </citations>
-</tool>
\ No newline at end of file
--- a/DESeq_results_01.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-
-```{r eval=TRUE}
-# Import workspace
-# fcp = file.copy(opt$X_W, "deseq.RData")
-load(opt$X_W)
-```
-
-# Results {.tabset}
-
-## Result table
-
-```{r}
-cat('--- View the top 100 rows of the result table ---')
-res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K))
-write.csv(as.data.frame(res), file = opt$X_R)
-res_df = as.data.frame(res)[1:100, ]
-datatable(res_df, style="bootstrap", filter = 'top',
-          class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
-```
--- a/DESeq_results_02.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-# MA-plot {.tabset}
-
-```{r}
-cat('--- Shrinked with Bayesian procedure ---')
-plotMA(res)
-```
--- a/DESeq_results_03.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-
-# Histogram of p values
-
-```{r}
-hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20,
-     col = "grey50", border = "white", main = "",
-     xlab = "Mean normalized count larger than 1")
-```
-
--- a/DESeq_results_04.Rmd	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
----
-output: html_document
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = as.logical(opt$X_e),
-  error = TRUE
-)
-```
-
-
-# Visualization {.tabset}
-## Gene clustering
-
-```{r}
-clustering_groups = strsplit(opt$X_M, ',')[[1]]
-
-topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20)
-mat  <- assay(rld)[ topVarGenes, ]
-mat  <- mat - rowMeans(mat)
-annotation_col <- as.data.frame(colData(rld)[, clustering_groups])
-colnames(annotation_col) = clustering_groups
-rownames(annotation_col) = colnames(mat)
-pheatmap(mat, annotation_col = annotation_col)
-```
-
-## Sample-to-sample distance
-
-```{r}
-sampleDistMatrix <- as.matrix( sampleDists )
-colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
-pheatmap(sampleDistMatrix,
-         clustering_distance_cols = sampleDists,
-         col = colors)
-```
-
-## PCA plot
-
-```{r}
-plotPCA(rld, intgroup = clustering_groups)
-```
-
-## MDS plot {.tabset}
-
-### Data table
-```{r}
-mds <- as.data.frame(colData(rld))  %>%
-         cbind(cmdscale(sampleDistMatrix))
-knitr::kable(mds)
-```
-
-### Plot
-```{r}
-ggplot(mds, aes(x = `1`, y = `2`, col = time)) +
-  geom_point(size = 3) + coord_fixed()
-```
--- a/DESeq_results_render.R	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,88 +0,0 @@
-##============ Sink warnings and errors to a file ==============
-## use the sink() function to wrap all code within it.
-##==============================================================
-zz = file('warnings_and_errors.txt')
-sink(zz)
-sink(zz, type = 'message')
-
-#------------import libraries--------------------
-options(stringsAsFactors = FALSE)
-
-library(getopt)
-library(rmarkdown)
-library(DESeq2)
-library(pheatmap)
-library(DT)
-library(ggplot2)
-library(genefilter)
-library(RColorBrewer)
-#------------------------------------------------
-
-
-#------------get arguments into R--------------------
-# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>%
-#   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
-
-
-spec_matrix = as.matrix(
-  data.frame(stringsAsFactors=FALSE,
-             long_flags = c("X_e", "X_W", "X_C", "X_T", "X_K", "X_M", "X_o",
-                            "X_d", "X_s", "X_R", "X_t"),
-             short_flags = c("e", "W", "C", "T", "K", "M", "o", "d", "s", "R",
-                             "t"),
-             argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
-             data_type_flags = c("character", "character", "character", "character",
-                                 "character", "character", "character",
-                                 "character", "character", "character", "character")
-  )
-)
-opt = getopt(spec_matrix)
-opt
-#----------------------------------------------------
-
-
-#-----------using passed arguments in R
-#           to define system environment variables---
-do.call(Sys.setenv, opt[-1])
-#----------------------------------------------------
-
-#---------- often used variables ----------------
-# OUTPUT_REPORT: path to galaxy output report
-# OUTPUT_DIR: path to the output associated directory, which stores all outputs
-# TOOL_DIR: path to the tool installation directory
-OUTPUT_DIR = opt$X_d
-TOOL_DIR =   opt$X_t
-OUTPUT_REPORT = opt$X_o
-
-
-# create the output associated directory to store all outputs
-dir.create(OUTPUT_DIR, recursive = TRUE)
-
-#-----------------render site--------------
-# copy site generating materials into OUTPUT_DIR
-dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
-command_cp = paste0('cp -r ', TOOL_DIR, '/DESeq_results_*.Rmd ', OUTPUT_DIR, '/site_generator')
-system(command_cp)
-system(paste0('cp -r ', TOOL_DIR, '/DESeq_results_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml'))
-system(paste0('cp -r ', TOOL_DIR, '/DESeq_results_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
-# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file
-dir.create(paste0(OUTPUT_DIR, '/_site'))
-render_site(input = paste0(OUTPUT_DIR, '/site_generator'))
-# remove site generating materials from output associated directory
-print(unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE))
-# move _site/* into output associated directory
-move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR)
-system(move_cmd)
-#------------------------------------------
-
-#-----link index.html to output-----
-cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT)
-system(cp_index)
-#-----------------------------------
-
-#==============the end==============
-
-
-##--------end of code rendering .Rmd templates----------------
-sink()
-##=========== End of sinking output=============================
\ No newline at end of file
--- a/DESeq_results_site.yml	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-name: "Analysis Report"
-output_dir: "../_site"
-navbar:
-    title: ""
-    type: inverse
-    left:
-        - text: "Home"
-          icon: fa-home
-          href: index.html
-        - text: "Results"
-          href: DESeq_results_01.html
-        - text: "MA-plot"
-          href: DESeq_results_02.html
-        - text: "Histogram of p values"
-          href: DESeq_results_03.html
-        - text: "Visualization"
-          href: DESeq_results_04.html
-output:
-  html_document:
-    theme: cosmo
-    highlight: textmate
\ No newline at end of file
--- a/DESeq_site.yml	Tue Feb 27 23:55:18 2018 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-name: "Analysis Report"
-output_dir: "../_site"
-navbar:
-    title: ""
-    type: inverse
-    left:
-        - text: "Home"
-          icon: fa-home
-          href: index.html
-        - text: "DESeqDataSet"
-          href: DESeq_01.html
-        - text: "Pre-filtering"
-          href: DESeq_02.html
-        - text: "Data"
-          href: DESeq_03.html
-        - text: "Sample distance"
-          href: DESeq_04.html
-output:
-  html_document:
-    theme: cosmo
-    highlight: textmate
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_01_run_deseq.Rmd	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,25 @@
+---
+title: 'DESeq2 Analysis'
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+## DESeq2 analysis
+
+```{r}
+# load count data
+load(opt$X_A)
+# load column data
+coldata = read.csv(opt$X_B, row.names = 1, header = TRUE)[colnames(count_data), ]
+dds = DESeqDataSetFromMatrix(countData = count_data,
+                             colData = coldata,
+                             design = formula(opt$X_C))
+dds = DESeq(dds, test = opt$X_G, fitType = opt$X_H)
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_02_results.Rmd	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,29 @@
+---
+title: 'DESeq2 Analysis'
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+
+## Results
+
+```{r}
+res = results(dds, contrast = c(opt$X_D, opt$X_E, opt$X_F), alpha = opt$X_I)
+DT::datatable(as.data.frame(res))
+```
+
+```{r}
+# significant genes
+sig_res = res[(res$padj < opt$X_I) & !is.na(res$padj), ]
+write.csv(sig_res, file = paste0(opt$X_d, '/significant_genes.csv'), quote = FALSE)
+```
+
+```{bash echo=FALSE}
+cp ${X_d}/significant_genes.csv ${X_J}
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_03_MA_plot.Rmd	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,34 @@
+---
+title: 'DESeq2 Analysis'
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+
+## MA-plot
+
+```{r warning=FALSE, message=FALSE}
+df = data.frame(ID = rownames(res),
+                mean = res$baseMean,
+                lfc = res$log2FoldChange,
+                padj = res$padj,
+                stringsAsFactors = FALSE)
+cols = vector(mode='character', length = nrow(res))
+cols[(res$padj < opt$X_I) & !is.na(res$padj)] = paste0('< ', opt$X_I)
+cols[(res$padj >= opt$X_I) & !is.na(res$padj)] = paste0('>= ', opt$X_I)
+cols[cols == ''] = 'NA'
+df$col = cols
+p = ggplot(data = df) +
+  geom_point(mapping = aes(x = log(mean), y = lfc, col = cols, key = ID)) +
+  scale_x_continuous(name = 'Log(mean)') +
+  scale_y_continuous(name = 'Log fold change') +
+  scale_color_discrete(name = 'Adjusted P')+
+  theme_classic()
+ggplotly(p)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_04_heatmap.Rmd	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,22 @@
+---
+title: 'DESeq2 Analysis'
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+
+## Heatmap of count matrix
+
+```{r}
+ntd <- normTransform(dds)
+select <- order(rowMeans(counts(dds,normalized=TRUE)),
+                decreasing=TRUE)[1:20]
+df <- as.data.frame(colData(dds)[, -ncol(colData(dds))])
+pheatmap(assay(ntd)[select,], annotation_col=df)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_05_pca.Rmd	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,21 @@
+---
+title: 'DESeq2 Analysis'
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE
+)
+```
+
+## Principle component plot
+
+```{r}
+vsd <- vst(dds, blind=FALSE)
+p = plotPCA(vsd, intgroup=c(opt$X_D)) +
+  scale_color_discrete(name = 'Group') +
+  theme_classic()
+ggplotly(p)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_site.sh	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,18 @@
+Rscript '${__tool_directory__}/deseq2_render.R'
+
+  -e $echo
+  -o $report
+  -d $report.files_path
+  -s $sink_message
+  -t '${__tool_directory__}'
+
+  -A '$count_data'
+  -B '$column_data'
+  -C '$design_formula'
+  -D '$treatment_name'
+  -E '$treated'
+  -F '$untreated'
+  -G '$test_type'
+  -H '$fit_type'
+  -I '$alpha'
+  -J '$significant_genes'
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_site.xml	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,111 @@
+<tool id="aurora_deseq2_site" name="Aurora DESeq2 site" version="2.2.0">
+    <description>Differential analysis of count data with the DESeq2 package</description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.6">r-rmarkdown</requirement>
+        <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement>
+        <requirement type="package" version="4.5.6">r-plotly</requirement>
+        <requirement type="package" version="2.2.1">r-ggplot2</requirement>
+        <requirement type="package" version="1.0.8">r-pheatmap</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[Rscript '${__tool_directory__}/deseq2_render.R'
+
+  -e $echo
+  -o $report
+  -d $report.files_path
+  -s $sink_message
+  -t '${__tool_directory__}'
+
+  -A '$count_data'
+  -B '$column_data'
+  -C '$design_formula'
+  -D '$treatment_name'
+  -E '$treated'
+  -F '$untreated'
+  -G '$test_type'
+  -H '$fit_type'
+  -I '$alpha'
+  -J '$significant_genes']]></command>
+    <inputs>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="true"
+               label="Display analysis code in report?"/>
+        <param type="data" name="count_data" label="Count data"
+               help="an RData file that stores the count matrix data. The file is generated from the aurora_htseq tool."
+               optional="False" format="rdata"/>
+        <param type="data" name="column_data" label="Column data"
+               help="A table file that stores column data for DESeq2 analysis. The first columns are the column names from the count matrix data. The remaining columns are experimental treatments."
+               optional="False" format="csv"/>
+        <param type="text" name="design_formula" label="Design formula"
+               help="The design formula expresses the variables which will be used in modeling. The formula should be a tilde (~) followed by the variables with plus signs between them, e.g., ~ condition1 + condition2"
+               optional="False">
+            <sanitizer sanitize="true">
+                <valid initial="default">
+                    <add preset="string.printable"/>
+                    <add value="~"/>
+                </valid>
+            </sanitizer>
+        </param>
+        <param type="text" name="treatment_name" label="Treatment name"
+               help="one of the treatment column names from the column data." optional="False"/>
+        <param type="text" name="treated" label="Treated"
+               help="A level from the specified treatment column that will be used as the treated group in the DESeq2 analysis."
+               optional="False"/>
+        <param type="text" name="untreated" label="Untreated"
+               help="A level from the specified treatment column that will be used as the untreated group in the DESeq2 analysis."
+               optional="False"/>
+        <param type="select" name="test_type" label="Test type" optional="False">
+            <option value="Wald" selected="true">Wald</option>
+            <option value="LRT" selected="false">Likelihood Ratio Test (LRT)</option>
+        </param>
+        <param type="select" name="fit_type" label="Fitting of dispersions"
+               help="either &quot;parametric&quot;, &quot;local&quot;, or &quot;mean&quot; for the type of fitting of dispersions to the mean intensity"
+               optional="False">
+            <option value="parametric" selected="true">parametric</option>
+            <option value="local" selected="false">local</option>
+            <option value="mean" selected="false">mean</option>
+        </param>
+        <param type="float" name="alpha" label="Alpha"
+               help="the significance cutoff used for optimizing the independent filtering (by default 0.1). If the adjusted p-value cutoff (FDR) will be a value other than 0.1, alpha should be set to that value."
+               optional="False" value="0.1" min="0" max="1"/>
+    </inputs>
+    <outputs>
+        <data format="html" name="report" label="Aurora DESeq2 site"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
+        <data name="significant_genes" format="csv" label="signficant genes from ${on_string} " hidden="false"/>
+    </outputs>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[@article{love2014differential,
+  title={Differential analysis of count data--the DESeq2 package},
+  author={Love, Michael and Anders, Simon and Huber, Wolfgang},
+  journal={Genome Biol},
+  volume={15},
+  pages={550},
+  year={2014}
+}]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_site_index.Rmd	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,16 @@
+---
+title: "Analysis Report"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = TRUE, error = TRUE)
+```
+
+## References
+
+Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6.
+
+Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29.
+
+Love, Michael and Anders, Simon and Huber, Wolfgang (2014). Differential analysis of count data--the DESeq2 package. In Genome Biol, 15, pp. 550.
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_site_render.R	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,86 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(getopt)
+library(rmarkdown)
+library(ggplot2)
+library(plotly)
+library(htmltools)
+library(DESeq2)
+library(pheatmap)
+library(DT)
+#------------------------------------------------
+
+
+#------------get arguments into R--------------------
+# library(dplyr)
+# getopt_specification_matrix(extract_short_flags('deseq2.xml')) %>%
+#   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
+
+
+spec_matrix = as.matrix(
+  data.frame(stringsAsFactors=FALSE,
+              long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B",
+                             "X_C", "X_D", "X_E", "X_F", "X_G", "X_H", "X_I", "X_J"),
+             short_flags = c("e", "o", "d", "s", "t", "A", "B", "C", "D", "E",
+                             "F", "G", "H", "I", "J"),
+     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
+                             1L, 1L),
+         data_type_flags = c("character", "character", "character", "character",
+                             "character", "character", "character", "character",
+                             "character", "character", "character", "character",
+                             "character", "double", "character")
+  )
+)
+opt = getopt(spec_matrix)
+#----------------------------------------------------
+
+
+#-----------using passed arguments in R
+#           to define system environment variables---
+do.call(Sys.setenv, opt[-1])
+#----------------------------------------------------
+
+#---------- often used variables ----------------
+# OUTPUT_DIR: path to the output associated directory, which stores all outputs
+# TOOL_DIR: path to the tool installation directory
+OUTPUT_DIR = opt$X_d
+TOOL_DIR =   opt$X_t
+OUTPUT_REPORT = opt$X_o
+
+# create the output associated directory to store all outputs
+dir.create(OUTPUT_DIR, recursive = TRUE)
+
+#-----------------render site--------------
+# copy site generating materials into OUTPUT_DIR
+dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
+system(paste0('cp -r ', TOOL_DIR, '/deseq2_0*.Rmd ', OUTPUT_DIR, '/site_generator/'))
+system(paste0('cp -r ', TOOL_DIR, '/deseq2_site_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml'))
+system(paste0('cp -r ', TOOL_DIR, '/deseq2_site_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd'))
+# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file
+render_site(input = paste0(OUTPUT_DIR, '/site_generator'))
+# remove site generating materials from output associated directory
+unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)
+# move _site/* into output associated directory
+move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR)
+system(move_cmd)
+#------------------------------------------
+
+#-----link index.html to output-----
+cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT)
+system(cp_index)
+#-----------------------------------
+
+#==============the end==============
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq2_site_site.yml	Fri Mar 09 08:53:17 2018 -0500
@@ -0,0 +1,24 @@
+name: "Analysis Report"
+output_dir: "../_site"
+navbar:
+    title: ""
+    type: inverse
+    left:
+        - text: "Home"
+          icon: fa-home
+          href: index.html
+        - text: "Differential Expression Analysis"
+          menu:
+            - text: "DESeq analysis"
+              href: deseq2_01_run_deseq.html
+            - text: "Results"
+              href: deseq2_02_results.html
+            - text: "MA-plot"
+              href: deseq2_03_MA_plot.html
+            - text: "Heatmap"
+              href: deseq2_04_heatmap.html
+            - text: "PCA plot"
+output:
+  html_document:
+    theme: cosmo
+    highlight: textmate
\ No newline at end of file