Mercurial > repos > mingchen0919 > rmarkdown_deseq2
changeset 3:a520a6c5e111 draft default tip
update
| author | mingchen0919 |
|---|---|
| date | Tue, 27 Feb 2018 15:30:28 -0500 |
| parents | 754a36851c6b |
| children | |
| files | DESeq.Rmd DESeq.xml DESeq_render.R DESeq_results.Rmd DESeq_results.xml DESeq_results_render.R |
| diffstat | 6 files changed, 106 insertions(+), 118 deletions(-) [+] |
line wrap: on
line diff
--- a/DESeq.Rmd Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq.Rmd Tue Feb 27 15:30:28 2018 -0500 @@ -24,12 +24,13 @@ row.names(sample_table) = sample_table[,2] sample_table = sample_table[count_file_names, ] -## copy count files into working directory -file_copy = file.copy(count_file_paths, count_file_names, overwrite = TRUE) +## copy count files into OUTPUT_DIR/counts +dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE) +file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE) ## DESeqDataSet object dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, - directory = './', + directory = paste0(OUTPUT_DIR, '/counts'), design = formula(opt$X_p)) dds ``` @@ -88,9 +89,9 @@ dds <- DESeq(dds) ``` -```{r} -rm("opt") -save(list=ls(all.names = TRUE), file=opt$X_w) +```{r echo=FALSE} +# save objects except for opt. +save(list=ls()[ls() != "opt"], file=opt$X_w) ```
--- a/DESeq.xml Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq.xml Tue Feb 27 15:30:28 2018 -0500 @@ -1,4 +1,4 @@ -<tool name="DESeq Analysis" id='deseq2' version="2.0.1"> +<tool name="DESeq2: Analysis" id='deseq2' version="2.0.1"> <description> "some description" </description> @@ -71,6 +71,18 @@ <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis on ${on_string}"/> </outputs> <citations> + <citation type="bibtex"> + @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} + } + </citation> <citation type="bibtex"><![CDATA[ @article{allaire2016rmarkdown, title={rmarkdown: Dynamic Documents for R, 2016},
--- a/DESeq_render.R Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq_render.R Tue Feb 27 15:30:28 2018 -0500 @@ -17,11 +17,21 @@ #------------get arguments into R-------------------- -# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>% +# getopt_specification_matrix(extract_short_flags('')) %>% # write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) -spec_matrix = as.matrix() +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_P", "X_N", + "X_S", "X_p", "X_w"), + short_flags = c("e", "o", "d", "s", "t", "P", "N", "S", "p", "w"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", + "character", "character", "character") + ) +) opt = getopt(spec_matrix) #---------------------------------------------------- @@ -35,16 +45,17 @@ # OUTPUT_DIR: path to the output associated directory, which stores all outputs # TOOL_DIR: path to the tool installation directory # RMD_NAME: name of Rmd file to be rendered +# OUTPUT_REPORT: path to galaxy output report OUTPUT_DIR = opt$X_d TOOL_DIR = opt$X_t RMD_NAME = 'DESeq.Rmd' OUTPUT_REPORT = opt$X_o # create the output associated directory to store all outputs -dir.create(OUT_DIR, recursive = TRUE) +dir.create(OUTPUT_DIR, recursive = TRUE) #-----------------render Rmd-------------- -render(paste0(TOOL_DIR, RMD_NAME, sep = '/'), output_file = OUTPUT_REPORT) +render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) #------------------------------------------ #==============the end==============
--- a/DESeq_results.Rmd Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq_results.Rmd Tue Feb 27 15:30:28 2018 -0500 @@ -10,7 +10,7 @@ ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = ECHO, + echo = as.logical(opt$X_e), error = TRUE ) ``` @@ -18,8 +18,9 @@ ```{r eval=TRUE} # Import workspace -fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData") -load("deseq.RData") +# fcp = file.copy(opt$X_W, "deseq.RData") +load(opt$X_W) +opt ``` # Results {.tabset} @@ -28,7 +29,7 @@ ```{r} cat('--- View the top 100 rows of the result table ---') -res <- results(dds, contrast = c('CONTRAST_FACTOR', 'TREATMENT_LEVEL', 'CONDITION_LEVEL')) +res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K)) write.csv(as.data.frame(res), file = 'deseq_results.csv') res_df = as.data.frame(res)[1:100, ] datatable(res_df, style="bootstrap", filter = 'top', @@ -65,7 +66,7 @@ ## Gene clustering ```{r} -clustering_groups = strsplit("CLUSTERING_FACTORS", ',')[[1]] +clustering_groups = strsplit(opt$X_M, ',')[[1]] topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) mat <- assay(rld)[ topVarGenes, ]
--- a/DESeq_results.xml Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq_results.xml Tue Feb 27 15:30:28 2018 -0500 @@ -22,8 +22,14 @@ Rscript '${__tool_directory__}/DESeq_results_render.R' + -e $echo + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + ## 1. input data - -e $echo + -W $deseq_workspace -C '$contrast_factor' -T '$treatment' @@ -32,15 +38,8 @@ -M '$clustering_factors' ## 2. output report and report site directory - -r $report - -d $report.files_path - -s $sink_message -R $deseq_results - ## 3. Rmd templates sitting in the tool directory - -t '${__tool_directory__}/DESeq_results.Rmd' - - ]]> </command> @@ -61,7 +60,7 @@ </inputs> <outputs> <data format="html" name="report" label="DESeq results report on ${on_string}" /> - <data format="txt" name="sink_message" label="Warnings and Errors on ${on_string}" from_work_dir="warnings_and_errors.txt"/> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> <data format="csv" name="deseq_results" label="DESeq results on ${on_string}" from_work_dir="deseq_results.csv" /> </outputs> <citations>
--- a/DESeq_results_render.R Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq_results_render.R Tue Feb 27 15:30:28 2018 -0500 @@ -1,105 +1,69 @@ -library(getopt) -library(rmarkdown) -library(htmltools) -library(dplyr) -library(DESeq2) -library(pheatmap) -library(DT) -library(ggplot2) - ##============ Sink warnings and errors to a file ============== ## use the sink() function to wrap all code within it. ##============================================================== zz = file('warnings_and_errors.txt') sink(zz) sink(zz, type = 'message') - ##---------below is the code for rendering .Rmd templates----- - - ##=============STEP 1: handle command line arguments========== - ## - ##============================================================ - # column 1: the long flag name - # column 2: the short flag alias. A SINGLE character string - # column 3: argument mask - # 0: no argument - # 1: argument required - # 2: argument is optional - # column 4: date type to which the flag's argument shall be cast. - # possible values: logical, integer, double, complex, character. - #------------------------------------------------------------- - #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ - # 1. short flag alias should match the flag in the command section in the XML file. - # 2. long flag name can be any legal R variable names - # 3. two names in args_list can have common string but one name should not be a part of another name. - # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. - #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - args_list=list() - ##------- 1. input data --------------------- - args_list$ECHO = c('echo', 'e', '1', 'character') - args_list$DESEQ_WORKSPACE = c('deseq_workspace', 'W', '1', 'character') - args_list$CONTRAST_FACTOR = c('contrast_factor', 'C', '1', 'character') - args_list$TREATMENT_LEVEL = c('treatment_level', 'T', '1', 'character') - args_list$CONDITION_LEVEL = c('condition_level', 'K', '1', 'character') - args_list$CLUSTERING_FACTORS = c('clustering_factors', 'M', '1', 'character') - ##--------2. output report and outputs -------------- - args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') - args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') - args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') - args_list$DESEQ_RESULTS = c('deseq_results', 'R', '1', 'character') - ##--------3. .Rmd templates in the tool directory ---------- - args_list$deseq_results_RMD = c('deseq_results_rmd', 't', '1', 'character') - ##----------------------------------------------------------- - opt = getopt(t(as.data.frame(args_list))) + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +library(DESeq2) +library(pheatmap) +library(DT) +library(ggplot2) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# getopt_specification_matrix(extract_short_flags('DESeq_results.xml')) %>% +# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) - - ##=======STEP 2: create report directory (optional)========== - ## - ##=========================================================== - dir.create(opt$report_dir) - - ##=STEP 3: replace placeholders in .Rmd with argument values= - ## - ##=========================================================== - #++ need to replace placeholders with args values one by one+ - readLines(opt$deseq_results_rmd) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) - }) %>% - (function(x) { - gsub('CONTRAST_FACTOR', opt$contrast_factor, x) - }) %>% - (function(x) { - gsub('TREATMENT_LEVEL', opt$treatment_level, x) - }) %>% - (function(x) { - gsub('CONDITION_LEVEL', opt$condition_level, x) - }) %>% - (function(x) { - gsub('CLUSTERING_FACTORS', opt$clustering_factors, x) - }) %>% - (function(x) { - gsub('REPORT_DIR', opt$report_dir, x) - }) %>% - (function(x) { - gsub('DESEQ_RESULTS', opt$deseq_results, x) - }) %>% - (function(x) { - fileConn = file('deseq_results.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - - ##=============STEP 4: render .Rmd templates================= - ## - ##=========================================================== - render('deseq_results.Rmd', output_file = opt$report_html) +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_W", "X_C", "X_T", "X_K", "X_M", "X_o", + "X_d", "X_s", "X_R", "X_t"), + short_flags = c("e", "W", "C", "T", "K", "M", "o", "d", "s", "R", + "t"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", + "character", "character", "character", "character") + ) +) +opt = getopt(spec_matrix) +opt +#---------------------------------------------------- - ##--------end of code rendering .Rmd templates---------------- +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +# RMD_NAME: name of Rmd file to be rendered +# OUTPUT_REPORT: path to galaxy output report +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +RMD_NAME = 'DESeq_results.Rmd' +OUTPUT_REPORT = opt$X_o + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render Rmd-------------- +render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) +#------------------------------------------ + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- sink() ##=========== End of sinking output============================= \ No newline at end of file
