# HG changeset patch # User mingchen0919 # Date 1519763428 18000 # Node ID a520a6c5e111d82f810f59263b300df4f842d814 # Parent 754a36851c6b2287aee10a6fa718c5b489b27c9c update diff -r 754a36851c6b -r a520a6c5e111 DESeq.Rmd --- a/DESeq.Rmd Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq.Rmd Tue Feb 27 15:30:28 2018 -0500 @@ -24,12 +24,13 @@ row.names(sample_table) = sample_table[,2] sample_table = sample_table[count_file_names, ] -## copy count files into working directory -file_copy = file.copy(count_file_paths, count_file_names, overwrite = TRUE) +## copy count files into OUTPUT_DIR/counts +dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE) +file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE) ## DESeqDataSet object dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table, - directory = './', + directory = paste0(OUTPUT_DIR, '/counts'), design = formula(opt$X_p)) dds ``` @@ -88,9 +89,9 @@ dds <- DESeq(dds) ``` -```{r} -rm("opt") -save(list=ls(all.names = TRUE), file=opt$X_w) +```{r echo=FALSE} +# save objects except for opt. +save(list=ls()[ls() != "opt"], file=opt$X_w) ``` diff -r 754a36851c6b -r a520a6c5e111 DESeq.xml --- a/DESeq.xml Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq.xml Tue Feb 27 15:30:28 2018 -0500 @@ -1,4 +1,4 @@ - + "some description" @@ -71,6 +71,18 @@ + + @article{love2014moderated, + title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2}, + author={Love, Michael I and Huber, Wolfgang and Anders, Simon}, + journal={Genome biology}, + volume={15}, + number={12}, + pages={550}, + year={2014}, + publisher={BioMed Central} + } + % +# getopt_specification_matrix(extract_short_flags('')) %>% # write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) -spec_matrix = as.matrix() +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_P", "X_N", + "X_S", "X_p", "X_w"), + short_flags = c("e", "o", "d", "s", "t", "P", "N", "S", "p", "w"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", + "character", "character", "character") + ) +) opt = getopt(spec_matrix) #---------------------------------------------------- @@ -35,16 +45,17 @@ # OUTPUT_DIR: path to the output associated directory, which stores all outputs # TOOL_DIR: path to the tool installation directory # RMD_NAME: name of Rmd file to be rendered +# OUTPUT_REPORT: path to galaxy output report OUTPUT_DIR = opt$X_d TOOL_DIR = opt$X_t RMD_NAME = 'DESeq.Rmd' OUTPUT_REPORT = opt$X_o # create the output associated directory to store all outputs -dir.create(OUT_DIR, recursive = TRUE) +dir.create(OUTPUT_DIR, recursive = TRUE) #-----------------render Rmd-------------- -render(paste0(TOOL_DIR, RMD_NAME, sep = '/'), output_file = OUTPUT_REPORT) +render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) #------------------------------------------ #==============the end============== diff -r 754a36851c6b -r a520a6c5e111 DESeq_results.Rmd --- a/DESeq_results.Rmd Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq_results.Rmd Tue Feb 27 15:30:28 2018 -0500 @@ -10,7 +10,7 @@ ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = ECHO, + echo = as.logical(opt$X_e), error = TRUE ) ``` @@ -18,8 +18,9 @@ ```{r eval=TRUE} # Import workspace -fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData") -load("deseq.RData") +# fcp = file.copy(opt$X_W, "deseq.RData") +load(opt$X_W) +opt ``` # Results {.tabset} @@ -28,7 +29,7 @@ ```{r} cat('--- View the top 100 rows of the result table ---') -res <- results(dds, contrast = c('CONTRAST_FACTOR', 'TREATMENT_LEVEL', 'CONDITION_LEVEL')) +res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K)) write.csv(as.data.frame(res), file = 'deseq_results.csv') res_df = as.data.frame(res)[1:100, ] datatable(res_df, style="bootstrap", filter = 'top', @@ -65,7 +66,7 @@ ## Gene clustering ```{r} -clustering_groups = strsplit("CLUSTERING_FACTORS", ',')[[1]] +clustering_groups = strsplit(opt$X_M, ',')[[1]] topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20) mat <- assay(rld)[ topVarGenes, ] diff -r 754a36851c6b -r a520a6c5e111 DESeq_results.xml --- a/DESeq_results.xml Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq_results.xml Tue Feb 27 15:30:28 2018 -0500 @@ -22,8 +22,14 @@ Rscript '${__tool_directory__}/DESeq_results_render.R' + -e $echo + -o $report + -d $report.files_path + -s $sink_message + -t '${__tool_directory__}' + ## 1. input data - -e $echo + -W $deseq_workspace -C '$contrast_factor' -T '$treatment' @@ -32,15 +38,8 @@ -M '$clustering_factors' ## 2. output report and report site directory - -r $report - -d $report.files_path - -s $sink_message -R $deseq_results - ## 3. Rmd templates sitting in the tool directory - -t '${__tool_directory__}/DESeq_results.Rmd' - - ]]> @@ -61,7 +60,7 @@ - + diff -r 754a36851c6b -r a520a6c5e111 DESeq_results_render.R --- a/DESeq_results_render.R Tue Feb 27 14:07:10 2018 -0500 +++ b/DESeq_results_render.R Tue Feb 27 15:30:28 2018 -0500 @@ -1,105 +1,69 @@ -library(getopt) -library(rmarkdown) -library(htmltools) -library(dplyr) -library(DESeq2) -library(pheatmap) -library(DT) -library(ggplot2) - ##============ Sink warnings and errors to a file ============== ## use the sink() function to wrap all code within it. ##============================================================== zz = file('warnings_and_errors.txt') sink(zz) sink(zz, type = 'message') - ##---------below is the code for rendering .Rmd templates----- - - ##=============STEP 1: handle command line arguments========== - ## - ##============================================================ - # column 1: the long flag name - # column 2: the short flag alias. A SINGLE character string - # column 3: argument mask - # 0: no argument - # 1: argument required - # 2: argument is optional - # column 4: date type to which the flag's argument shall be cast. - # possible values: logical, integer, double, complex, character. - #------------------------------------------------------------- - #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ - # 1. short flag alias should match the flag in the command section in the XML file. - # 2. long flag name can be any legal R variable names - # 3. two names in args_list can have common string but one name should not be a part of another name. - # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. - #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - args_list=list() - ##------- 1. input data --------------------- - args_list$ECHO = c('echo', 'e', '1', 'character') - args_list$DESEQ_WORKSPACE = c('deseq_workspace', 'W', '1', 'character') - args_list$CONTRAST_FACTOR = c('contrast_factor', 'C', '1', 'character') - args_list$TREATMENT_LEVEL = c('treatment_level', 'T', '1', 'character') - args_list$CONDITION_LEVEL = c('condition_level', 'K', '1', 'character') - args_list$CLUSTERING_FACTORS = c('clustering_factors', 'M', '1', 'character') - ##--------2. output report and outputs -------------- - args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') - args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') - args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character') - args_list$DESEQ_RESULTS = c('deseq_results', 'R', '1', 'character') - ##--------3. .Rmd templates in the tool directory ---------- - args_list$deseq_results_RMD = c('deseq_results_rmd', 't', '1', 'character') - ##----------------------------------------------------------- - opt = getopt(t(as.data.frame(args_list))) + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +library(DESeq2) +library(pheatmap) +library(DT) +library(ggplot2) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# getopt_specification_matrix(extract_short_flags('DESeq_results.xml')) %>% +# write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE) - - ##=======STEP 2: create report directory (optional)========== - ## - ##=========================================================== - dir.create(opt$report_dir) - - ##=STEP 3: replace placeholders in .Rmd with argument values= - ## - ##=========================================================== - #++ need to replace placeholders with args values one by one+ - readLines(opt$deseq_results_rmd) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x) - }) %>% - (function(x) { - gsub('CONTRAST_FACTOR', opt$contrast_factor, x) - }) %>% - (function(x) { - gsub('TREATMENT_LEVEL', opt$treatment_level, x) - }) %>% - (function(x) { - gsub('CONDITION_LEVEL', opt$condition_level, x) - }) %>% - (function(x) { - gsub('CLUSTERING_FACTORS', opt$clustering_factors, x) - }) %>% - (function(x) { - gsub('REPORT_DIR', opt$report_dir, x) - }) %>% - (function(x) { - gsub('DESEQ_RESULTS', opt$deseq_results, x) - }) %>% - (function(x) { - fileConn = file('deseq_results.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - - ##=============STEP 4: render .Rmd templates================= - ## - ##=========================================================== - render('deseq_results.Rmd', output_file = opt$report_html) +spec_matrix = as.matrix( + data.frame(stringsAsFactors=FALSE, + long_flags = c("X_e", "X_W", "X_C", "X_T", "X_K", "X_M", "X_o", + "X_d", "X_s", "X_R", "X_t"), + short_flags = c("e", "W", "C", "T", "K", "M", "o", "d", "s", "R", + "t"), + argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), + data_type_flags = c("character", "character", "character", "character", + "character", "character", "character", + "character", "character", "character", "character") + ) +) +opt = getopt(spec_matrix) +opt +#---------------------------------------------------- - ##--------end of code rendering .Rmd templates---------------- +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +# RMD_NAME: name of Rmd file to be rendered +# OUTPUT_REPORT: path to galaxy output report +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +RMD_NAME = 'DESeq_results.Rmd' +OUTPUT_REPORT = opt$X_o + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render Rmd-------------- +render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) +#------------------------------------------ + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- sink() ##=========== End of sinking output============================= \ No newline at end of file