# HG changeset patch
# User mingchen0919
# Date 1519754652 18000
# Node ID 6b12f3dc358aac48eef55edb7bc2ffd2e096f51d
planemo upload
diff -r 000000000000 -r 6b12f3dc358a DESeq.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq.Rmd Tue Feb 27 13:04:12 2018 -0500
@@ -0,0 +1,96 @@
+---
+title: 'DESeq2: Perform DESeq analysis'
+output:
+ html_document:
+ number_sections: true
+ toc: true
+ theme: cosmo
+ highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+ echo = as.logical(opt$X_e),
+ error = TRUE
+)
+```
+
+# `DESeqDataSet` object
+
+```{r 'DESeqDataSet object'}
+count_file_paths = strsplit(opt$X_P, ',')[[1]]
+count_file_names = strsplit(opt$X_N, ',')[[1]]
+sample_table = read.table(opt$X_S, header = TRUE)
+row.names(sample_table) = sample_table[,2]
+sample_table = sample_table[count_file_names, ]
+
+## copy count files into working directory
+file_copy = file.copy(count_file_paths, count_file_names, overwrite = TRUE)
+
+## DESeqDataSet object
+dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table,
+ directory = './',
+ design = formula(opt$X_p))
+dds
+```
+
+# Pre-filtering the dataset.
+
+We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed.
+
+* Number of rows before pre-filtering
+```{r}
+nrow(dds)
+```
+
+* Number of rows after pre-filtering
+```{r}
+dds = dds[rowSums(counts(dds)) > 1, ]
+nrow(dds)
+```
+
+# Peek at data {.tabset}
+
+## Count Data
+
+```{r 'count data'}
+datatable(head(counts(dds), 100), style="bootstrap",
+ class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
+
+## Sample Table
+
+```{r 'sample table'}
+datatable(sample_table, style="bootstrap",
+ class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
+
+# Sample distance on variance stabilized data {.tabset}
+
+## `rlog` Stabilizing transformation
+
+```{r}
+rld = rlog(dds, blind = FALSE)
+datatable(head(assay(rld), 100), style="bootstrap",
+ class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
+
+## Sample distance
+
+```{r}
+sampleDists <- dist(t(assay(rld)))
+sampleDists
+```
+
+# Differential expression analysis
+
+```{r}
+dds <- DESeq(dds)
+```
+
+```{r}
+rm("opt")
+save(list=ls(all.names = TRUE), file=opt$X_w)
+```
+
+
diff -r 000000000000 -r 6b12f3dc358a DESeq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq.xml Tue Feb 27 13:04:12 2018 -0500
@@ -0,0 +1,90 @@
+
+
+ "some description"
+
+
+ pandoc
+ r-getopt
+ r-rmarkdown
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r 6b12f3dc358a DESeq_render.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_render.R Tue Feb 27 13:04:12 2018 -0500
@@ -0,0 +1,59 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+library(stringi)
+library(DESeq2)
+library(pheatmap)
+library(RColorBrewer)
+library(DT)
+#------------------------------------------------
+
+
+#------------get arguments into R--------------------
+# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>%
+# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
+
+
+spec_matrix = as.matrix()
+opt = getopt(spec_matrix)
+#----------------------------------------------------
+
+
+#-----------using passed arguments in R
+# to define system environment variables---
+do.call(Sys.setenv, opt[-1])
+#----------------------------------------------------
+
+#---------- often used variables ----------------
+# OUTPUT_DIR: path to the output associated directory, which stores all outputs
+# TOOL_DIR: path to the tool installation directory
+# RMD_NAME: name of Rmd file to be rendered
+OUTPUT_DIR = opt$X_d
+TOOL_DIR = opt$X_t
+RMD_NAME = 'DESeq.Rmd'
+OUTPUT_REPORT = opt$X_o
+
+# create the output associated directory to store all outputs
+dir.create(OUT_DIR, recursive = TRUE)
+
+#-----------------render Rmd--------------
+render(paste0(TOOL_DIR, RMD_NAME, sep = '/'), output_file = OUTPUT_REPORT)
+#------------------------------------------
+
+#==============the end==============
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
diff -r 000000000000 -r 6b12f3dc358a DESeq_results.Rmd
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_results.Rmd Tue Feb 27 13:04:12 2018 -0500
@@ -0,0 +1,109 @@
+---
+title: 'DESeq2: Results'
+output:
+ html_document:
+ number_sections: true
+ toc: true
+ theme: cosmo
+ highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+ echo = ECHO,
+ error = TRUE
+)
+```
+
+
+```{r eval=TRUE}
+# Import workspace
+fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData")
+load("deseq.RData")
+```
+
+# Results {.tabset}
+
+## Result table
+
+```{r}
+cat('--- View the top 100 rows of the result table ---')
+res <- results(dds, contrast = c('CONTRAST_FACTOR', 'TREATMENT_LEVEL', 'CONDITION_LEVEL'))
+write.csv(as.data.frame(res), file = 'deseq_results.csv')
+res_df = as.data.frame(res)[1:100, ]
+datatable(res_df, style="bootstrap", filter = 'top',
+ class="table-condensed", options = list(dom = 'tp', scrollX = TRUE))
+```
+
+## Result summary
+
+```{r}
+summary(res)
+```
+
+
+# MA-plot {.tabset}
+
+
+
+```{r}
+cat('--- Shrinked with Bayesian procedure ---')
+plotMA(res)
+```
+
+
+# Histogram of p values
+
+```{r}
+hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20,
+ col = "grey50", border = "white", main = "",
+ xlab = "Mean normalized count larger than 1")
+```
+
+
+# Visualization {.tabset}
+## Gene clustering
+
+```{r}
+clustering_groups = strsplit("CLUSTERING_FACTORS", ',')[[1]]
+
+topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20)
+mat <- assay(rld)[ topVarGenes, ]
+mat <- mat - rowMeans(mat)
+annotation_col <- as.data.frame(colData(rld)[, clustering_groups])
+colnames(annotation_col) = clustering_groups
+rownames(annotation_col) = colnames(mat)
+pheatmap(mat, annotation_col = annotation_col)
+```
+
+## Sample-to-sample distance
+
+```{r}
+sampleDistMatrix <- as.matrix( sampleDists )
+colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
+pheatmap(sampleDistMatrix,
+ clustering_distance_cols = sampleDists,
+ col = colors)
+```
+
+## PCA plot
+
+```{r}
+plotPCA(rld, intgroup = clustering_groups)
+```
+
+## MDS plot {.tabset}
+
+### Data table
+```{r}
+mds <- as.data.frame(colData(rld)) %>%
+ cbind(cmdscale(sampleDistMatrix))
+knitr::kable(mds)
+```
+
+### Plot
+```{r}
+ggplot(mds, aes(x = `1`, y = `2`, col = time)) +
+ geom_point(size = 3) + coord_fixed()
+```
+
diff -r 000000000000 -r 6b12f3dc358a DESeq_results.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_results.xml Tue Feb 27 13:04:12 2018 -0500
@@ -0,0 +1,107 @@
+
+
+ pandoc
+ r-getopt
+ r-rmarkdown
+ r-plyr
+ bioconductor-deseq2
+ r-stringr
+ r-highcharter
+ r-dt
+ r-reshape2
+ r-plotly
+ r-formattable
+ r-htmltools
+ r-pheatmap
+
+
+ An R Markdown tool to display DESeq analysis.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @article{love2014moderated,
+ title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
+ author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
+ journal={Genome biology},
+ volume={15},
+ number={12},
+ pages={550},
+ year={2014},
+ publisher={BioMed Central}
+ }
+
+
+ @article{allaire2016rmarkdown,
+ title={rmarkdown: Dynamic Documents for R, 2016},
+ author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+ and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+ journal={R package version 0.9},
+ volume={6},
+ year={2016}
+ }
+
+
+ @book{xie2015dynamic,
+ title={Dynamic Documents with R and knitr},
+ author={Xie, Yihui},
+ volume={29},
+ year={2015},
+ publisher={CRC Press}
+ }
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 6b12f3dc358a DESeq_results_render.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/DESeq_results_render.R Tue Feb 27 13:04:12 2018 -0500
@@ -0,0 +1,108 @@
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+library(DESeq2)
+library(pheatmap)
+library(genefilter)
+library(DT)
+library(stringi)
+library(RColorBrewer)
+library(ggplot2)
+
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+ ##---------below is the code for rendering .Rmd templates-----
+
+ ##=============STEP 1: handle command line arguments==========
+ ##
+ ##============================================================
+ # column 1: the long flag name
+ # column 2: the short flag alias. A SINGLE character string
+ # column 3: argument mask
+ # 0: no argument
+ # 1: argument required
+ # 2: argument is optional
+ # column 4: date type to which the flag's argument shall be cast.
+ # possible values: logical, integer, double, complex, character.
+ #-------------------------------------------------------------
+ #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
+ # 1. short flag alias should match the flag in the command section in the XML file.
+ # 2. long flag name can be any legal R variable names
+ # 3. two names in args_list can have common string but one name should not be a part of another name.
+ # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
+ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ args_list=list()
+ ##------- 1. input data ---------------------
+ args_list$ECHO = c('echo', 'e', '1', 'character')
+ args_list$DESEQ_WORKSPACE = c('deseq_workspace', 'W', '1', 'character')
+ args_list$CONTRAST_FACTOR = c('contrast_factor', 'C', '1', 'character')
+ args_list$TREATMENT_LEVEL = c('treatment_level', 'T', '1', 'character')
+ args_list$CONDITION_LEVEL = c('condition_level', 'K', '1', 'character')
+ args_list$CLUSTERING_FACTORS = c('clustering_factors', 'M', '1', 'character')
+ ##--------2. output report and outputs --------------
+ args_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
+ args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
+ args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character')
+ args_list$DESEQ_RESULTS = c('deseq_results', 'R', '1', 'character')
+ ##--------3. .Rmd templates in the tool directory ----------
+ args_list$deseq_results_RMD = c('deseq_results_rmd', 't', '1', 'character')
+ ##-----------------------------------------------------------
+ opt = getopt(t(as.data.frame(args_list)))
+
+
+
+ ##=======STEP 2: create report directory (optional)==========
+ ##
+ ##===========================================================
+ dir.create(opt$report_dir)
+
+ ##=STEP 3: replace placeholders in .Rmd with argument values=
+ ##
+ ##===========================================================
+ #++ need to replace placeholders with args values one by one+
+ readLines(opt$deseq_results_rmd) %>%
+ (function(x) {
+ gsub('ECHO', opt$echo, x)
+ }) %>%
+ (function(x) {
+ gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x)
+ }) %>%
+ (function(x) {
+ gsub('CONTRAST_FACTOR', opt$contrast_factor, x)
+ }) %>%
+ (function(x) {
+ gsub('TREATMENT_LEVEL', opt$treatment_level, x)
+ }) %>%
+ (function(x) {
+ gsub('CONDITION_LEVEL', opt$condition_level, x)
+ }) %>%
+ (function(x) {
+ gsub('CLUSTERING_FACTORS', opt$clustering_factors, x)
+ }) %>%
+ (function(x) {
+ gsub('REPORT_DIR', opt$report_dir, x)
+ }) %>%
+ (function(x) {
+ gsub('DESEQ_RESULTS', opt$deseq_results, x)
+ }) %>%
+ (function(x) {
+ fileConn = file('deseq_results.Rmd')
+ writeLines(x, con=fileConn)
+ close(fileConn)
+ })
+
+
+ ##=============STEP 4: render .Rmd templates=================
+ ##
+ ##===========================================================
+ render('deseq_results.Rmd', output_file = opt$report_html)
+
+
+ ##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
diff -r 000000000000 -r 6b12f3dc358a o-DESeq.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/o-DESeq.xml Tue Feb 27 13:04:12 2018 -0500
@@ -0,0 +1,120 @@
+
+
+ pandoc
+ r-getopt
+ r-rmarkdown
+ r-plyr
+ bioconductor-deseq2
+ r-stringr
+ r-highcharter
+ r-dt
+ r-reshape2
+ r-plotly
+ r-formattable
+ r-htmltools
+ r-pheatmap
+
+
+ An R Markdown tool to perform DESeq analysis.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ @article{love2014moderated,
+ title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
+ author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
+ journal={Genome biology},
+ volume={15},
+ number={12},
+ pages={550},
+ year={2014},
+ publisher={BioMed Central}
+ }
+
+
+ @article{allaire2016rmarkdown,
+ title={rmarkdown: Dynamic Documents for R, 2016},
+ author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+ and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+ journal={R package version 0.9},
+ volume={6},
+ year={2016}
+ }
+
+
+ @book{xie2015dynamic,
+ title={Dynamic Documents with R and knitr},
+ author={Xie, Yihui},
+ volume={29},
+ year={2015},
+ publisher={CRC Press}
+ }
+
+
+
\ No newline at end of file
diff -r 000000000000 -r 6b12f3dc358a o-DESeq_render.R
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/o-DESeq_render.R Tue Feb 27 13:04:12 2018 -0500
@@ -0,0 +1,93 @@
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(dplyr)
+library(stringi)
+library(DESeq2)
+library(pheatmap)
+library(RColorBrewer)
+library(DT)
+
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+ ##---------below is the code for rendering .Rmd templates-----
+
+ ##=============STEP 1: handle command line arguments==========
+ ##
+ ##============================================================
+ # column 1: the long flag name
+ # column 2: the short flag alias. A SINGLE character string
+ # column 3: argument mask
+ # 0: no argument
+ # 1: argument required
+ # 2: argument is optional
+ # column 4: date type to which the flag's argument shall be cast.
+ # possible values: logical, integer, double, complex, character.
+ #-------------------------------------------------------------
+ #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
+ # 1. short flag alias should match the flag in the command section in the XML file.
+ # 2. long flag name can be any legal R variable names
+ # 3. two names in args_list can have common string but one name should not be a part of another name.
+ # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
+ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ args_list=list()
+ ##------- 1. input data ---------------------
+ args_list$ECHO = c('echo', 'e', '1', 'character')
+ args_list$COUNT_FILE_PATHS = c('count_file_paths', 'P', '1', 'character')
+ args_list$COUNT_FILE_NAMES = c('count_file_names', 'N', '1', 'character')
+ args_list$SAMPLE_TABLE = c('sample_table', 'S', '1', 'character')
+ args_list$DESIGN_FORMULA = c('design_formula', 'p', '1', 'character')
+ ##--------2. output report and outputs --------------
+ args_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
+ args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
+ args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character')
+ args_list$WORKSPACE = c('deseq_workspace', 'w', '1', 'character')
+ ##--------3. .Rmd templates in the tool directory ----------
+ args_list$deseq_RMD = c('deseq_rmd', 't', '1', 'character')
+ ##-----------------------------------------------------------
+ opt = getopt(t(as.data.frame(args_list)))
+
+
+
+ ##=======STEP 2: create report directory (optional)==========
+ ##
+ ##===========================================================
+ dir.create(opt$report_dir)
+
+ ##=STEP 3: replace placeholders in .Rmd with argument values=
+ ##
+ ##===========================================================
+ #++ need to replace placeholders with args values one by one+
+ readLines(opt$deseq_rmd) %>%
+ (function(x) {
+ gsub('ECHO', opt$echo, x)
+ }) %>%
+ (function(x) {
+ gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x)
+ }) %>%
+ (function(x) {
+ gsub('DESIGN_FORMULA', opt$design_formula, x)
+ }) %>%
+ (function(x) {
+ gsub('REPORT_DIR', opt$report_dir, x)
+ }) %>%
+ (function(x) {
+ fileConn = file('deseq.Rmd')
+ writeLines(x, con=fileConn)
+ close(fileConn)
+ })
+
+
+ ##=============STEP 4: render .Rmd templates=================
+ ##
+ ##===========================================================
+ render('deseq.Rmd', output_file = opt$report_html)
+
+
+ ##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file