Mercurial > repos > mingchen0919 > rmarkdown_deseq2_count_matrix
changeset 1:4a7131658ca6 draft
update tool
| author | mingchen0919 |
|---|---|
| date | Sat, 30 Dec 2017 12:35:31 -0500 |
| parents | 867a77a7a380 |
| children | 18e3fc69da76 |
| files | rmarkdown_deseq2_count_matrix.Rmd rmarkdown_deseq2_count_matrix.xml rmarkdown_deseq2_count_matrix_render.R |
| diffstat | 3 files changed, 123 insertions(+), 59 deletions(-) [+] |
line wrap: on
line diff
--- a/rmarkdown_deseq2_count_matrix.Rmd Fri Dec 29 23:37:30 2017 -0500 +++ b/rmarkdown_deseq2_count_matrix.Rmd Sat Dec 30 12:35:31 2017 -0500 @@ -16,15 +16,41 @@ ``` -## Count Matrix +# User input -```{r 'count matrix'} -count_matrix = read.table(opt$count_matrix) -count_matrix -``` - -```{r 'ste[ 2'} - +```{r 'user input'} +df = data.frame(name = names(opt)[-1], + value = unlist(opt)) +datatable(df, rownames = FALSE) ``` +# Count Matrix + +Display the first 100 rows of count data matrix. + +```{r 'count matrix'} +count_data = read.table(opt$count_data) +col_names = trimws(strsplit(opt$count_matrix_column_names, ',')[[1]])[1:ncol(count_data)] +col_names = col_names[!is.na(col_names)] +colnames(count_data)[1:length(col_names)] = col_names +datatable(head(count_data, 100)) +``` + +# Column Data + +```{r 'column data'} +col_data = read.table(opt$col_data, + stringsAsFactors = FALSE, sep=',', header = TRUE, row.names = 1) +datatable(col_data) +``` + +# Match sample names + +The goal of this step is to rearrange the rows of the column data matrix so that the samples rows in the count data matrix and the sample columns in the count data matrix are in the same order. + +```{r 'match sample names'} +col_data = col_data[col_names, ] +datatable(col_data) +``` +
--- a/rmarkdown_deseq2_count_matrix.xml Fri Dec 29 23:37:30 2017 -0500 +++ b/rmarkdown_deseq2_count_matrix.xml Sat Dec 30 12:35:31 2017 -0500 @@ -1,54 +1,83 @@ <tool id="rmarkdown_deseq2_count_matrix" name="DESeq2 (count matrix)" version="1.0.0"> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="1.20.0">r-getopt</requirement> - <requirement type="package" version="1.3">r-rmarkdown</requirement> - <requirement type="package" version="0.3.5">r-htmltools</requirement> - <requirement type="package" version="0.5.0">r-dplyr</requirement> - <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> - <requirement type="package" version="0.2">r-dt</requirement> - </requirements> - <description>deseq2 analysis given a count matrix input</description> - <stdio> - <regex source="stderr" match="XXX" level="warning" description="Check the warnings_and_errors.txt file for more details."/> - </stdio> - <command><![CDATA[ -Rscript '${__tool_directory__}/rmarkdown_deseq2_count_matrx_render.R' - -e $echo - -c $count_matrix - -r $report - -d $report.files_path - -s $sink_message - -t '${__tool_directory__}/rmarkdown_deseq2_count_matrx.Rmd' + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.3">r-rmarkdown</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="0.5.0">r-dplyr</requirement> + <requirement type="package" version="1.14.1">bioconductor-deseq2</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + </requirements> + <description>deseq2 analysis given a count matrix input</description> + <stdio> + <regex source="stderr" match="XXX" level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command><![CDATA[ +Rscript '${__tool_directory__}/rmarkdown_deseq2_count_matrix_render.R' + -e $echo + -c $count_data + -n '$count_matrix_column_names' + -C $col_data + -D '$design_formula' + -k $contrast_condition + -T $treatment + -K $control + -r $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}/rmarkdown_deseq2_count_matrix.Rmd' ]]></command> - <inputs> - <param type="boolean" name="echo" label="Display analysis code in report?" optional="False" checked="False" truevalue="TRUE" falsevalue="FALSE"/> - <param type="data" name="count_matrix" label="count matrix" optional="False" format="txt,csv" multiple="False"/> - </inputs> - <outputs> - <data name="report" format="html" label="DESeq2 report" hidden="false"/> - <data name="differential_genes" format="csv" label="differential genes" from_work_dir="differential_genes.csv" hidden="false"/> - <data name="sink_message" format="txt" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" hidden="false"/> - </outputs> - <help><![CDATA[See http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html]]></help> - <citations> - <citation type="bibtex"><![CDATA[ @article{love2014moderated, - title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, - author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, - journal={Genome biology}, - volume={15}, - number={12}, - pages={550}, - year={2014}, - publisher={BioMed Central} + <inputs> + <param type="boolean" name="echo" label="Display analysis code in report?" optional="False" checked="False" + truevalue="TRUE" falsevalue="FALSE"/> + <param type="data" name="count_data" label="count data" optional="False" format="txt,csv" multiple="False"/> + <param type="text" name="count_matrix_column_names" label="column names" + help="This field is used to update the column names of count data matrix. Column names are separated by comma (,). For example, 'column_1, column_2, column_3, column_4'" + optional="True" area="True" size="5x25"></param> + <param type="data" name="col_data" label="column data (from file)" optional="False" format="txt,csv" multiple="False"/> + <param type="text" name="design_formula" label="design formula" + help="The simplest design formula for differential expression would be ~ condition, where condition is a column in colData(dds) that specifies which of two (or more groups) the samples belong to" + optional="False" value="~ condition_1 + condition_2"> + <sanitizer> + <valid> + <add value="~"/> + </valid> + </sanitizer> + </param> + <param type="text" name="contrast_condition" label="condition" + help="A condition name specified in the design formula" optional="False" value="condition_1"/> + <param type="text" name="treatment" label="treatment" help="the name of the numerator level for the fold change" + optional="False" value="treated"/> + <param type="text" name="control" label="control" help="the name of the denominator level for the fold change" + optional="False" value="untreated"/> + </inputs> + <outputs> + <data name="report" format="html" label="DESeq2 report" hidden="false"/> + <data name="differential_genes" format="csv" label="differential genes" from_work_dir="differential_genes.csv" + hidden="false"/> + <data name="sink_message" format="txt" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" + hidden="false"/> + </outputs> + <help><![CDATA[See http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html]]></help> + <citations> + <citation type="bibtex"><![CDATA[ @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} }]]></citation> - <citation type="bibtex"><![CDATA[ @article{allaire2016rmarkdown, - title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff - and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, - journal={R package version 0.9}, - volume={6}, - year={2016} + <citation type="bibtex"><![CDATA[ @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} }]]></citation> - </citations> + </citations> </tool>
--- a/rmarkdown_deseq2_count_matrix_render.R Fri Dec 29 23:37:30 2017 -0500 +++ b/rmarkdown_deseq2_count_matrix_render.R Sat Dec 30 12:35:31 2017 -0500 @@ -2,6 +2,8 @@ library(rmarkdown) library(htmltools) library(dplyr) +library(DT) +library(DESeq2) ##============ Sink warnings and errors to a file ============== ## use the sink() function to wrap all code within it. @@ -31,8 +33,15 @@ #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ args_list=list() ##------- 1. input data --------------------- - args_list$ECHO = c('echo', 'e', '1', 'character') - args_list$c = c('count_matrix', 'c', '1', 'character') + args_list$ECHO = c('echo', 'e', '1', 'logical') + args_list$c = c('count_data', 'c', '1', 'character') + args_list$n = c('count_matrix_column_names', 'n', '1', 'character') + args_list$n = c('count_matrix_column_names', 'n', '1', 'character') + args_list$C = c('col_data', 'C', '1', 'character') + args_list$D = c('design_formula', 'D', '1', 'character') + args_list$k = c('contrast_condition', 'k', '1', 'character') + args_list$T = c('treatment', 'T', '1', 'character') + args_list$K = c('control', 'K', '1', 'character') ##--------2. output report and outputs -------------- args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
