Mercurial > repos > mingchen0919 > aurora_deseq2_site
changeset 2:706ecfe0bac3 draft
initial 2.2.0
author | mingchen0919 |
---|---|
date | Fri, 09 Mar 2018 08:53:17 -0500 |
parents | b81ab8b769f0 |
children | 1faa864c0839 |
files | DESeq.Rmd DESeq.xml DESeq_01.Rmd DESeq_02.Rmd DESeq_03.Rmd DESeq_04.Rmd DESeq_05.Rmd DESeq_index.Rmd DESeq_render.R DESeq_results.Rmd DESeq_results.xml DESeq_results_01.Rmd DESeq_results_02.Rmd DESeq_results_03.Rmd DESeq_results_04.Rmd DESeq_results_index.Rmd DESeq_results_render.R DESeq_results_site.yml DESeq_site.yml deseq2_01_run_deseq.Rmd deseq2_02_results.Rmd deseq2_03_MA_plot.Rmd deseq2_04_heatmap.Rmd deseq2_05_pca.Rmd deseq2_site.sh deseq2_site.xml deseq2_site_index.Rmd deseq2_site_render.R deseq2_site_site.yml |
diffstat | 27 files changed, 386 insertions(+), 887 deletions(-) [+] |
line wrap: on
line diff
--- a/DESeq.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,97 +0,0 @@ ---- -title: 'DESeq2: Perform DESeq analysis' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - -# `DESeqDataSet` object - -```{r 'DESeqDataSet object'} -count_file_paths = strsplit(opt$X_P, ',')[[1]] -count_file_names = strsplit(opt$X_N, ',')[[1]] -sample_table = read.table(opt$X_S, header = TRUE) -row.names(sample_table) = sample_table[,2] -sample_table = sample_table[count_file_names, ] - -## copy count files into OUTPUT_DIR/counts -dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE) -file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE) - -## DESeqDataSet object -dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, - directory = paste0(OUTPUT_DIR, '/counts'), - design = formula(opt$X_p)) -dds -``` - -# Pre-filtering the dataset. - -We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed. - -* Number of rows before pre-filtering -```{r} -nrow(dds) -``` - -* Number of rows after pre-filtering -```{r} -dds = dds[rowSums(counts(dds)) > 1, ] -nrow(dds) -``` - -# Peek at data {.tabset} - -## Count Data - -```{r 'count data'} -datatable(head(counts(dds), 100), style="bootstrap", - class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) -``` - -## Sample Table - -```{r 'sample table'} -datatable(sample_table, style="bootstrap", - class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) -``` - -# Sample distance on variance stabilized data {.tabset} - -## `rlog` Stabilizing transformation - -```{r} -rld = rlog(dds, blind = FALSE) -datatable(head(assay(rld), 100), style="bootstrap", - class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) -``` - -## Sample distance - -```{r} -sampleDists <- dist(t(assay(rld))) -sampleDists -``` - -# Differential expression analysis - -```{r} -dds <- DESeq(dds) -``` - -```{r echo=FALSE} -# save objects except for opt. -save(list=ls()[ls() != "opt"], file=opt$X_w) -``` - -
--- a/DESeq.xml Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,106 +0,0 @@ -<tool name="DESeq2: Analysis" id='deseq2' version="2.0.1"> - <description> - perform differential expression analysis - </description> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="1.20.0">r-getopt</requirement> - <requirement type="package" version="1.6">r-rmarkdown</requirement> - <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> - <requirement type="package" version="0.2">r-dt</requirement> - <requirement type="package" version="1.0.8">r-pheatmap</requirement> - </requirements> - <stdio> - <regex match="XXX" source="stderr" level="warning" - description="Check the warnings_and_errors.txt file for more details."/> - </stdio> - <command><![CDATA[ - - - Rscript '${__tool_directory__}/DESeq_render.R' - - -e $echo - - -o $report - -d $report.files_path - -s $sink_message - -t '${__tool_directory__}' - - - ##----- code chunk to get file paths and raw file names for a multiple inputs data field ---- - #set $sep = '' - #set $count_file_paths = '' - #set $count_file_names = '' - #for $count_file in $count_files: - #set $count_file_paths += $sep + str($count_file) - #set $count_file_names += $sep + str($count_file.name) - #set $sep = ',' - #end for - ##----------------- end for getting file names and file paths ------------------------------ - -P '$count_file_paths' - -N '$count_file_names' - -S $sample_table - -p '$design_formula' - -w $deseq_workspace - - - ]]></command> - <inputs> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" - label="Display analysis code in report?"/> - <param type="data" name="count_files" format="txt" multiple="true" label="Count files from htseq-count"/> - <param type="data" name="sample_table" format="txt" multiple="false" label="sample table file" - help="The sample table file contains a table. The first column is the sample name, the second column is - the count file name and the rest of columns are treatment columns. The file names in this table have - to be in the same order as the count files uploaded in the previous step. "/> - <param type="text" name="design_formula" value="~ condition_1 + condition_2" label="Design formula" - help="The simplest design formula for differential expression would be ~ condition, where condition - is a column in colData(dds) that specifies which of two (or more groups) the samples belong to"> - <sanitizer> - <valid initial="default"> - <add preset="string.printable"/> - <add value="~"/> - </valid> - </sanitizer> - </param> - </inputs> - <outputs> - <data name="report" format="html" label="DESeq Analysis on ${on_string}"/> - <data format="txt" name="sink_message" label="Warnings and Errors on" - from_work_dir="warnings_and_errors.txt"/> - <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis on ${on_string}"/> - </outputs> - <citations> - <citation type="bibtex"> - @article{love2014moderated, - title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, - author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, - journal={Genome biology}, - volume={15}, - number={12}, - pages={550}, - year={2014}, - publisher={BioMed Central} - } - </citation> - <citation type="bibtex"><![CDATA[ - @article{allaire2016rmarkdown, - title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff - and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, - journal={R package version 0.9}, - volume={6}, - year={2016} - } - ]]></citation> - <citation type="bibtex"><![CDATA[ - @book{xie2015dynamic, - title={Dynamic Documents with R and knitr}, - author={Xie, Yihui}, - volume={29}, - year={2015}, - publisher={CRC Press} - } - ]]></citation> - </citations> -</tool>
--- a/DESeq_01.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - -# `DESeqDataSet` object - -```{r 'DESeqDataSet object'} -count_file_paths = strsplit(opt$X_P, ',')[[1]] -count_file_names = strsplit(opt$X_N, ',')[[1]] -sample_table = read.table(opt$X_S, header = TRUE) -row.names(sample_table) = sample_table[,2] -sample_table = sample_table[count_file_names, ] - -## copy count files into OUTPUT_DIR/counts -dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE) -file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE) - -## DESeqDataSet object -dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, - directory = paste0(OUTPUT_DIR, '/counts'), - design = formula(opt$X_p)) -dds -```
--- a/DESeq_02.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - - -# Pre-filtering the dataset. - -We can remove the rows that have 0 or 1 count to reduce object size and increase the calculation speed. - -* Number of rows before pre-filtering -```{r} -nrow(dds) -``` - -* Number of rows after pre-filtering -```{r} -dds = dds[rowSums(counts(dds)) > 1, ] -nrow(dds) -```
--- a/DESeq_03.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,27 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - -# Peek at data {.tabset} - -## Count Data - -```{r 'count data'} -datatable(head(counts(dds), 100), style="bootstrap", - class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) -``` - -## Sample Table - -```{r 'sample table'} -datatable(sample_table, style="bootstrap", - class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) -```
--- a/DESeq_04.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,29 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - - -# Sample distance on variance stabilized data {.tabset} - -## `rlog` Stabilizing transformation - -```{r} -rld = rlog(dds, blind = FALSE) -datatable(head(assay(rld), 100), style="bootstrap", - class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) -``` - -## Sample distance - -```{r} -sampleDists <- dist(t(assay(rld))) -sampleDists -```
--- a/DESeq_05.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,23 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - -# Differential expression analysis - -```{r} -dds <- DESeq(dds) -``` - -```{r echo=FALSE} -# save useful objects. -save(dds, rld, sample_table, sampleDists, zz, file=opt$X_w) -``` -
--- a/DESeq_render.R Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file('warnings_and_errors.txt') -sink(zz) -sink(zz, type = 'message') - -#------------import libraries-------------------- -options(stringsAsFactors = FALSE) - -library(getopt) -library(rmarkdown) -library(DESeq2) -library(pheatmap) -library(DT) -library(ggplot2) -library(genefilter) -library(RColorBrewer) -#------------------------------------------------ - - -#------------get arguments into R-------------------- -# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>% -# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) - - -spec_matrix = as.matrix( - data.frame(stringsAsFactors=FALSE, - long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_P", "X_N", - "X_S", "X_p", "X_w"), - short_flags = c("e", "o", "d", "s", "t", "P", "N", "S", "p", "w"), - argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), - data_type_flags = c("character", "character", "character", "character", - "character", "character", "character", - "character", "character", "character") - ) -) -opt = getopt(spec_matrix) -#---------------------------------------------------- - - -#-----------using passed arguments in R -# to define system environment variables--- -do.call(Sys.setenv, opt[-1]) -#---------------------------------------------------- - -#---------- often used variables ---------------- -# OUTPUT_REPORT: path to galaxy output report -# OUTPUT_DIR: path to the output associated directory, which stores all outputs -# TOOL_DIR: path to the tool installation directory -OUTPUT_DIR = opt$X_d -TOOL_DIR = opt$X_t -OUTPUT_REPORT = opt$X_o - - -# create the output associated directory to store all outputs -dir.create(OUTPUT_DIR, recursive = TRUE) - -#-----------------render site-------------- -# copy site generating materials into OUTPUT_DIR -dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) -command_cp = paste0('cp -r ', TOOL_DIR, '/DESeq_0*.Rmd ', OUTPUT_DIR, '/site_generator') -system(command_cp) -system(paste0('cp -r ', TOOL_DIR, '/DESeq_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml')) -system(paste0('cp -r ', TOOL_DIR, '/DESeq_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd')) -# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file -dir.create(paste0(OUTPUT_DIR, '/_site')) -render_site(input = paste0(OUTPUT_DIR, '/site_generator')) -# remove site generating materials from output associated directory -print(unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)) -# move _site/* into output associated directory -move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR) -system(move_cmd) -#------------------------------------------ - -#-----link index.html to output----- -cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT) -system(cp_index) -#----------------------------------- - -#==============the end============== - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file
--- a/DESeq_results.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,109 +0,0 @@ ---- -title: 'DESeq2: Results' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - -```{r eval=TRUE} -# Import workspace -# fcp = file.copy(opt$X_W, "deseq.RData") -load(opt$X_W) -``` - -# Results {.tabset} - -## Result table - -```{r} -cat('--- View the top 100 rows of the result table ---') -res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K)) -write.csv(as.data.frame(res), file = opt$X_R) -res_df = as.data.frame(res)[1:100, ] -datatable(res_df, style="bootstrap", filter = 'top', - class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) -``` - -## Result summary - -```{r} -summary(res) -``` - - -# MA-plot {.tabset} - - - -```{r} -cat('--- Shrinked with Bayesian procedure ---') -plotMA(res) -``` - - -# Histogram of p values - -```{r} -hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20, - col = "grey50", border = "white", main = "", - xlab = "Mean normalized count larger than 1") -``` - - -# Visualization {.tabset} -## Gene clustering - -```{r} -clustering_groups = strsplit(opt$X_M, ',')[[1]] - -topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) -mat <- assay(rld)[ topVarGenes, ] -mat <- mat - rowMeans(mat) -annotation_col <- as.data.frame(colData(rld)[, clustering_groups]) -colnames(annotation_col) = clustering_groups -rownames(annotation_col) = colnames(mat) -pheatmap(mat, annotation_col = annotation_col) -``` - -## Sample-to-sample distance - -```{r} -sampleDistMatrix <- as.matrix( sampleDists ) -colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) -pheatmap(sampleDistMatrix, - clustering_distance_cols = sampleDists, - col = colors) -``` - -## PCA plot - -```{r} -plotPCA(rld, intgroup = clustering_groups) -``` - -## MDS plot {.tabset} - -### Data table -```{r} -mds <- as.data.frame(colData(rld)) %>% - cbind(cmdscale(sampleDistMatrix)) -knitr::kable(mds) -``` - -### Plot -```{r} -ggplot(mds, aes(x = `1`, y = `2`, col = time)) + - geom_point(size = 3) + coord_fixed() -``` -
--- a/DESeq_results.xml Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,99 +0,0 @@ -<tool id="deseq_results" name="DESeq2: Results" version="2.0.1"> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="1.20.0">r-getopt</requirement> - <requirement type="package" version="1.6">r-rmarkdown</requirement> - <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> - <requirement type="package" version="0.2">r-dt</requirement> - <requirement type="package" version="1.0.8">r-pheatmap</requirement> - </requirements> - <description> - display DESeq2 analysis results. - </description> - <stdio> - <!--redirecting stderr to a file. "XXX" is used to match with nothing so that tool running won't be interrupted during testing--> - <regex match="XXX" - source="stderr" - level="warning" - description="Check the warnings_and_errors.txt file for more details."/> - </stdio> - <command> - <![CDATA[ - - Rscript '${__tool_directory__}/DESeq_results_render.R' - - -e $echo - -o $report - -d $report.files_path - -s $sink_message - -t '${__tool_directory__}' - - ## 1. input data - - -W $deseq_workspace - -C '$contrast_factor' - -T '$treatment' - -K '$condition' - - -M '$clustering_factors' - - ## 2. output report and report site directory - -R $deseq_results - - - ]]> - </command> - <inputs> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" - label="Display analysis code in report?"/> - <param type="data" name="deseq_workspace" format="rdata" multiple="false" optional="false" - label="Workspace from tool DESeq2: DESeq"/> - <param type="text" name="contrast_factor" label="Factor" optional="false" - help="the name of a factor in the design formula"/> - <param type="text" name="treatment" label="Treatment level" optional="false" - help=" the name of the numerator level for the fold change"/> - <param type="text" name="condition" label="Condition level" optional="false" - help=" the name of the denominator level for the fold change"/> - <param type="text" name="clustering_factors" title="Gene clustering factors" optional="false" - label="factors of interest for clustering samples and PCA plot" - help="A single factor or multiple factors from the design formula. Multiple factors are separated by comma (,)."/> - </inputs> - <outputs> - <data format="html" name="report" label="DESeq results report on ${on_string}" /> - <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> - <data format="csv" name="deseq_results" label="DESeq results on ${on_string}" from_work_dir="deseq_results.csv" /> - </outputs> - <citations> - <citation type="bibtex"> - @article{love2014moderated, - title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, - author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, - journal={Genome biology}, - volume={15}, - number={12}, - pages={550}, - year={2014}, - publisher={BioMed Central} - } - </citation> - <citation type="bibtex"> - @article{allaire2016rmarkdown, - title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff - and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, - journal={R package version 0.9}, - volume={6}, - year={2016} - } - </citation> - <citation type="bibtex"> - @book{xie2015dynamic, - title={Dynamic Documents with R and knitr}, - author={Xie, Yihui}, - volume={29}, - year={2015}, - publisher={CRC Press} - } - </citation> - </citations> -</tool> \ No newline at end of file
--- a/DESeq_results_01.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,30 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - -```{r eval=TRUE} -# Import workspace -# fcp = file.copy(opt$X_W, "deseq.RData") -load(opt$X_W) -``` - -# Results {.tabset} - -## Result table - -```{r} -cat('--- View the top 100 rows of the result table ---') -res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K)) -write.csv(as.data.frame(res), file = opt$X_R) -res_df = as.data.frame(res)[1:100, ] -datatable(res_df, style="bootstrap", filter = 'top', - class="table-condensed", options = list(dom = 'tp', scrollX = TRUE)) -```
--- a/DESeq_results_02.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,17 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - -# MA-plot {.tabset} - -```{r} -cat('--- Shrinked with Bayesian procedure ---') -plotMA(res) -```
--- a/DESeq_results_03.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,20 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - -# Histogram of p values - -```{r} -hist(res$pvalue[res$baseMean > 1], breaks = 0:20/20, - col = "grey50", border = "white", main = "", - xlab = "Mean normalized count larger than 1") -``` -
--- a/DESeq_results_04.Rmd Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ ---- -output: html_document ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = as.logical(opt$X_e), - error = TRUE -) -``` - - -# Visualization {.tabset} -## Gene clustering - -```{r} -clustering_groups = strsplit(opt$X_M, ',')[[1]] - -topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) -mat <- assay(rld)[ topVarGenes, ] -mat <- mat - rowMeans(mat) -annotation_col <- as.data.frame(colData(rld)[, clustering_groups]) -colnames(annotation_col) = clustering_groups -rownames(annotation_col) = colnames(mat) -pheatmap(mat, annotation_col = annotation_col) -``` - -## Sample-to-sample distance - -```{r} -sampleDistMatrix <- as.matrix( sampleDists ) -colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255) -pheatmap(sampleDistMatrix, - clustering_distance_cols = sampleDists, - col = colors) -``` - -## PCA plot - -```{r} -plotPCA(rld, intgroup = clustering_groups) -``` - -## MDS plot {.tabset} - -### Data table -```{r} -mds <- as.data.frame(colData(rld)) %>% - cbind(cmdscale(sampleDistMatrix)) -knitr::kable(mds) -``` - -### Plot -```{r} -ggplot(mds, aes(x = `1`, y = `2`, col = time)) + - geom_point(size = 3) + coord_fixed() -```
--- a/DESeq_results_render.R Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,88 +0,0 @@ -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file('warnings_and_errors.txt') -sink(zz) -sink(zz, type = 'message') - -#------------import libraries-------------------- -options(stringsAsFactors = FALSE) - -library(getopt) -library(rmarkdown) -library(DESeq2) -library(pheatmap) -library(DT) -library(ggplot2) -library(genefilter) -library(RColorBrewer) -#------------------------------------------------ - - -#------------get arguments into R-------------------- -# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>% -# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) - - -spec_matrix = as.matrix( - data.frame(stringsAsFactors=FALSE, - long_flags = c("X_e", "X_W", "X_C", "X_T", "X_K", "X_M", "X_o", - "X_d", "X_s", "X_R", "X_t"), - short_flags = c("e", "W", "C", "T", "K", "M", "o", "d", "s", "R", - "t"), - argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), - data_type_flags = c("character", "character", "character", "character", - "character", "character", "character", - "character", "character", "character", "character") - ) -) -opt = getopt(spec_matrix) -opt -#---------------------------------------------------- - - -#-----------using passed arguments in R -# to define system environment variables--- -do.call(Sys.setenv, opt[-1]) -#---------------------------------------------------- - -#---------- often used variables ---------------- -# OUTPUT_REPORT: path to galaxy output report -# OUTPUT_DIR: path to the output associated directory, which stores all outputs -# TOOL_DIR: path to the tool installation directory -OUTPUT_DIR = opt$X_d -TOOL_DIR = opt$X_t -OUTPUT_REPORT = opt$X_o - - -# create the output associated directory to store all outputs -dir.create(OUTPUT_DIR, recursive = TRUE) - -#-----------------render site-------------- -# copy site generating materials into OUTPUT_DIR -dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) -command_cp = paste0('cp -r ', TOOL_DIR, '/DESeq_results_*.Rmd ', OUTPUT_DIR, '/site_generator') -system(command_cp) -system(paste0('cp -r ', TOOL_DIR, '/DESeq_results_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml')) -system(paste0('cp -r ', TOOL_DIR, '/DESeq_results_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd')) -# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file -dir.create(paste0(OUTPUT_DIR, '/_site')) -render_site(input = paste0(OUTPUT_DIR, '/site_generator')) -# remove site generating materials from output associated directory -print(unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE)) -# move _site/* into output associated directory -move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR) -system(move_cmd) -#------------------------------------------ - -#-----link index.html to output----- -cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT) -system(cp_index) -#----------------------------------- - -#==============the end============== - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file
--- a/DESeq_results_site.yml Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -name: "Analysis Report" -output_dir: "../_site" -navbar: - title: "" - type: inverse - left: - - text: "Home" - icon: fa-home - href: index.html - - text: "Results" - href: DESeq_results_01.html - - text: "MA-plot" - href: DESeq_results_02.html - - text: "Histogram of p values" - href: DESeq_results_03.html - - text: "Visualization" - href: DESeq_results_04.html -output: - html_document: - theme: cosmo - highlight: textmate \ No newline at end of file
--- a/DESeq_site.yml Tue Feb 27 23:55:18 2018 -0500 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ -name: "Analysis Report" -output_dir: "../_site" -navbar: - title: "" - type: inverse - left: - - text: "Home" - icon: fa-home - href: index.html - - text: "DESeqDataSet" - href: DESeq_01.html - - text: "Pre-filtering" - href: DESeq_02.html - - text: "Data" - href: DESeq_03.html - - text: "Sample distance" - href: DESeq_04.html -output: - html_document: - theme: cosmo - highlight: textmate \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_01_run_deseq.Rmd Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,25 @@ +--- +title: 'DESeq2 Analysis' +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + +## DESeq2 analysis + +```{r} +# load count data +load(opt$X_A) +# load column data +coldata = read.csv(opt$X_B, row.names = 1, header = TRUE)[colnames(count_data), ] +dds = DESeqDataSetFromMatrix(countData = count_data, + colData = coldata, + design = formula(opt$X_C)) +dds = DESeq(dds, test = opt$X_G, fitType = opt$X_H) +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_02_results.Rmd Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,29 @@ +--- +title: 'DESeq2 Analysis' +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +## Results + +```{r} +res = results(dds, contrast = c(opt$X_D, opt$X_E, opt$X_F), alpha = opt$X_I) +DT::datatable(as.data.frame(res)) +``` + +```{r} +# significant genes +sig_res = res[(res$padj < opt$X_I) & !is.na(res$padj), ] +write.csv(sig_res, file = paste0(opt$X_d, '/significant_genes.csv'), quote = FALSE) +``` + +```{bash echo=FALSE} +cp ${X_d}/significant_genes.csv ${X_J} +``` \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_03_MA_plot.Rmd Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,34 @@ +--- +title: 'DESeq2 Analysis' +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +## MA-plot + +```{r warning=FALSE, message=FALSE} +df = data.frame(ID = rownames(res), + mean = res$baseMean, + lfc = res$log2FoldChange, + padj = res$padj, + stringsAsFactors = FALSE) +cols = vector(mode='character', length = nrow(res)) +cols[(res$padj < opt$X_I) & !is.na(res$padj)] = paste0('< ', opt$X_I) +cols[(res$padj >= opt$X_I) & !is.na(res$padj)] = paste0('>= ', opt$X_I) +cols[cols == ''] = 'NA' +df$col = cols +p = ggplot(data = df) + + geom_point(mapping = aes(x = log(mean), y = lfc, col = cols, key = ID)) + + scale_x_continuous(name = 'Log(mean)') + + scale_y_continuous(name = 'Log fold change') + + scale_color_discrete(name = 'Adjusted P')+ + theme_classic() +ggplotly(p) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_04_heatmap.Rmd Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,22 @@ +--- +title: 'DESeq2 Analysis' +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + + +## Heatmap of count matrix + +```{r} +ntd <- normTransform(dds) +select <- order(rowMeans(counts(dds,normalized=TRUE)), + decreasing=TRUE)[1:20] +df <- as.data.frame(colData(dds)[, -ncol(colData(dds))]) +pheatmap(assay(ntd)[select,], annotation_col=df) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_05_pca.Rmd Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,21 @@ +--- +title: 'DESeq2 Analysis' +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = as.logical(opt$X_e), + error = TRUE +) +``` + +## Principle component plot + +```{r} +vsd <- vst(dds, blind=FALSE) +p = plotPCA(vsd, intgroup=c(opt$X_D)) + + scale_color_discrete(name = 'Group') + + theme_classic() +ggplotly(p) +```
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_site.sh Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,18 @@ +Rscript '${__tool_directory__}/deseq2_render.R' + + -e $echo + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + + -A '$count_data' + -B '$column_data' + -C '$design_formula' + -D '$treatment_name' + -E '$treated' + -F '$untreated' + -G '$test_type' + -H '$fit_type' + -I '$alpha' + -J '$significant_genes' \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_site.xml Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,111 @@ +<tool id="aurora_deseq2_site" name="Aurora DESeq2 site" version="2.2.0"> + <description>Differential analysis of count data with the DESeq2 package</description> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.6">r-rmarkdown</requirement> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> + <requirement type="package" version="4.5.6">r-plotly</requirement> + <requirement type="package" version="2.2.1">r-ggplot2</requirement> + <requirement type="package" version="1.0.8">r-pheatmap</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + </requirements> + <stdio> + <regex match="XXX" source="stderr" level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command><![CDATA[Rscript '${__tool_directory__}/deseq2_render.R' + + -e $echo + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + + -A '$count_data' + -B '$column_data' + -C '$design_formula' + -D '$treatment_name' + -E '$treated' + -F '$untreated' + -G '$test_type' + -H '$fit_type' + -I '$alpha' + -J '$significant_genes']]></command> + <inputs> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="true" + label="Display analysis code in report?"/> + <param type="data" name="count_data" label="Count data" + help="an RData file that stores the count matrix data. The file is generated from the aurora_htseq tool." + optional="False" format="rdata"/> + <param type="data" name="column_data" label="Column data" + help="A table file that stores column data for DESeq2 analysis. The first columns are the column names from the count matrix data. The remaining columns are experimental treatments." + optional="False" format="csv"/> + <param type="text" name="design_formula" label="Design formula" + help="The design formula expresses the variables which will be used in modeling. The formula should be a tilde (~) followed by the variables with plus signs between them, e.g., ~ condition1 + condition2" + optional="False"> + <sanitizer sanitize="true"> + <valid initial="default"> + <add preset="string.printable"/> + <add value="~"/> + </valid> + </sanitizer> + </param> + <param type="text" name="treatment_name" label="Treatment name" + help="one of the treatment column names from the column data." optional="False"/> + <param type="text" name="treated" label="Treated" + help="A level from the specified treatment column that will be used as the treated group in the DESeq2 analysis." + optional="False"/> + <param type="text" name="untreated" label="Untreated" + help="A level from the specified treatment column that will be used as the untreated group in the DESeq2 analysis." + optional="False"/> + <param type="select" name="test_type" label="Test type" optional="False"> + <option value="Wald" selected="true">Wald</option> + <option value="LRT" selected="false">Likelihood Ratio Test (LRT)</option> + </param> + <param type="select" name="fit_type" label="Fitting of dispersions" + help="either "parametric", "local", or "mean" for the type of fitting of dispersions to the mean intensity" + optional="False"> + <option value="parametric" selected="true">parametric</option> + <option value="local" selected="false">local</option> + <option value="mean" selected="false">mean</option> + </param> + <param type="float" name="alpha" label="Alpha" + help="the significance cutoff used for optimizing the independent filtering (by default 0.1). If the adjusted p-value cutoff (FDR) will be a value other than 0.1, alpha should be set to that value." + optional="False" value="0.1" min="0" max="1"/> + </inputs> + <outputs> + <data format="html" name="report" label="Aurora DESeq2 site"/> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> + <data name="significant_genes" format="csv" label="signficant genes from ${on_string} " hidden="false"/> + </outputs> + <citations> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation> + <citation type="bibtex"><![CDATA[@article{love2014differential, + title={Differential analysis of count data--the DESeq2 package}, + author={Love, Michael and Anders, Simon and Huber, Wolfgang}, + journal={Genome Biol}, + volume={15}, + pages={550}, + year={2014} +}]]></citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_site_index.Rmd Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,16 @@ +--- +title: "Analysis Report" +output: html_document +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(echo = TRUE, error = TRUE) +``` + +## References + +Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6. + +Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29. + +Love, Michael and Anders, Simon and Huber, Wolfgang (2014). Differential analysis of count data--the DESeq2 package. In Genome Biol, 15, pp. 550. \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_site_render.R Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,86 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +library(ggplot2) +library(plotly) +library(htmltools) +library(DESeq2) +library(pheatmap) +library(DT) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# library(dplyr) +# getopt_specification_matrix(extract_short_flags('deseq2.xml')) %>% +# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) + + +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_A", "X_B", + "X_C", "X_D", "X_E", "X_F", "X_G", "X_H", "X_I", "X_J"), + short_flags = c("e", "o", "d", "s", "t", "A", "B", "C", "D", "E", + "F", "G", "H", "I", "J"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, + 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", "character", + "character", "character", "character", "character", + "character", "double", "character") + ) +) +opt = getopt(spec_matrix) +#---------------------------------------------------- + + +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +OUTPUT_REPORT = opt$X_o + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render site-------------- +# copy site generating materials into OUTPUT_DIR +dir.create(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) +system(paste0('cp -r ', TOOL_DIR, '/deseq2_0*.Rmd ', OUTPUT_DIR, '/site_generator/')) +system(paste0('cp -r ', TOOL_DIR, '/deseq2_site_site.yml ', OUTPUT_DIR, '/site_generator/_site.yml')) +system(paste0('cp -r ', TOOL_DIR, '/deseq2_site_index.Rmd ', OUTPUT_DIR, '/site_generator/index.Rmd')) +# render site to OUTPUT_DIR/_site, this is configured in the "_site.yml" file +render_site(input = paste0(OUTPUT_DIR, '/site_generator')) +# remove site generating materials from output associated directory +unlink(paste0(OUTPUT_DIR, '/site_generator'), recursive = TRUE) +# move _site/* into output associated directory +move_cmd = paste0('mv ', OUTPUT_DIR, '/_site/* ', OUTPUT_DIR) +system(move_cmd) +#------------------------------------------ + +#-----link index.html to output----- +cp_index = paste0('cp ', OUTPUT_DIR, '/index.html ', OUTPUT_REPORT) +system(cp_index) +#----------------------------------- + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/deseq2_site_site.yml Fri Mar 09 08:53:17 2018 -0500 @@ -0,0 +1,24 @@ +name: "Analysis Report" +output_dir: "../_site" +navbar: + title: "" + type: inverse + left: + - text: "Home" + icon: fa-home + href: index.html + - text: "Differential Expression Analysis" + menu: + - text: "DESeq analysis" + href: deseq2_01_run_deseq.html + - text: "Results" + href: deseq2_02_results.html + - text: "MA-plot" + href: deseq2_03_MA_plot.html + - text: "Heatmap" + href: deseq2_04_heatmap.html + - text: "PCA plot" +output: + html_document: + theme: cosmo + highlight: textmate \ No newline at end of file