changeset 3:a520a6c5e111 draft default tip

update
author mingchen0919
date Tue, 27 Feb 2018 15:30:28 -0500
parents 754a36851c6b
children
files DESeq.Rmd DESeq.xml DESeq_render.R DESeq_results.Rmd DESeq_results.xml DESeq_results_render.R
diffstat 6 files changed, 106 insertions(+), 118 deletions(-) [+]
line wrap: on
line diff
--- a/DESeq.Rmd	Tue Feb 27 14:07:10 2018 -0500
+++ b/DESeq.Rmd	Tue Feb 27 15:30:28 2018 -0500
@@ -24,12 +24,13 @@
 row.names(sample_table) = sample_table[,2]
 sample_table = sample_table[count_file_names, ]
 
-## copy count files into working directory
-file_copy = file.copy(count_file_paths, count_file_names, overwrite = TRUE)
+## copy count files into OUTPUT_DIR/counts
+dir.create(paste0(OUTPUT_DIR, '/counts'), recursive = TRUE)
+file_copy = file.copy(count_file_paths, paste0(OUTPUT_DIR, '/counts/', count_file_names), overwrite = TRUE)
 
 ## DESeqDataSet object
 dds = DESeqDataSetFromHTSeqCount(sampleTable = sample_table,
-                                 directory = './',
+                                 directory = paste0(OUTPUT_DIR, '/counts'),
                                  design = formula(opt$X_p))
 dds
 ```
@@ -88,9 +89,9 @@
 dds <- DESeq(dds)
 ```
 
-```{r}
-rm("opt")
-save(list=ls(all.names = TRUE), file=opt$X_w)
+```{r echo=FALSE}
+# save objects except for opt.
+save(list=ls()[ls() != "opt"], file=opt$X_w)
 ```
 
 
--- a/DESeq.xml	Tue Feb 27 14:07:10 2018 -0500
+++ b/DESeq.xml	Tue Feb 27 15:30:28 2018 -0500
@@ -1,4 +1,4 @@
-<tool name="DESeq Analysis" id='deseq2' version="2.0.1">
+<tool name="DESeq2: Analysis" id='deseq2' version="2.0.1">
     <description>
         "some description"
     </description>
@@ -71,6 +71,18 @@
         <data name="deseq_workspace" format="rdata" label="R workspace: DESeq analysis on ${on_string}"/>
     </outputs>
     <citations>
+        <citation type="bibtex">
+            @article{love2014moderated,
+            title={Moderated estimation of fold change and dispersion for RNA-seq data with DESeq2},
+            author={Love, Michael I and Huber, Wolfgang and Anders, Simon},
+            journal={Genome biology},
+            volume={15},
+            number={12},
+            pages={550},
+            year={2014},
+            publisher={BioMed Central}
+            }
+        </citation>
         <citation type="bibtex"><![CDATA[
             @article{allaire2016rmarkdown,
             title={rmarkdown: Dynamic Documents for R, 2016},
--- a/DESeq_render.R	Tue Feb 27 14:07:10 2018 -0500
+++ b/DESeq_render.R	Tue Feb 27 15:30:28 2018 -0500
@@ -17,11 +17,21 @@
 
 
 #------------get arguments into R--------------------
-# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>%
+# getopt_specification_matrix(extract_short_flags('')) %>%
 #   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
 
 
-spec_matrix = as.matrix()
+spec_matrix = as.matrix(
+  data.frame(stringsAsFactors=FALSE,
+              long_flags = c("X_e", "X_o", "X_d", "X_s", "X_t", "X_P", "X_N",
+                             "X_S", "X_p", "X_w"),
+             short_flags = c("e", "o", "d", "s", "t", "P", "N", "S", "p", "w"),
+     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
+         data_type_flags = c("character", "character", "character", "character",
+                             "character", "character", "character",
+                             "character", "character", "character")
+  )
+)
 opt = getopt(spec_matrix)
 #----------------------------------------------------
 
@@ -35,16 +45,17 @@
 # OUTPUT_DIR: path to the output associated directory, which stores all outputs
 # TOOL_DIR: path to the tool installation directory
 # RMD_NAME: name of Rmd file to be rendered
+# OUTPUT_REPORT: path to galaxy output report
 OUTPUT_DIR = opt$X_d
 TOOL_DIR =   opt$X_t
 RMD_NAME = 'DESeq.Rmd'
 OUTPUT_REPORT = opt$X_o
 
 # create the output associated directory to store all outputs
-dir.create(OUT_DIR, recursive = TRUE)
+dir.create(OUTPUT_DIR, recursive = TRUE)
 
 #-----------------render Rmd--------------
-render(paste0(TOOL_DIR, RMD_NAME, sep = '/'), output_file = OUTPUT_REPORT)
+render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT)
 #------------------------------------------
 
 #==============the end==============
--- a/DESeq_results.Rmd	Tue Feb 27 14:07:10 2018 -0500
+++ b/DESeq_results.Rmd	Tue Feb 27 15:30:28 2018 -0500
@@ -10,7 +10,7 @@
 
 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
 knitr::opts_chunk$set(
-  echo = ECHO,
+  echo = as.logical(opt$X_e),
   error = TRUE
 )
 ```
@@ -18,8 +18,9 @@
 
 ```{r eval=TRUE}
 # Import workspace
-fcp = file.copy("DESEQ_WORKSPACE", "deseq.RData")
-load("deseq.RData")
+# fcp = file.copy(opt$X_W, "deseq.RData")
+load(opt$X_W)
+opt
 ```
 
 # Results {.tabset}
@@ -28,7 +29,7 @@
 
 ```{r}
 cat('--- View the top 100 rows of the result table ---')
-res <- results(dds, contrast = c('CONTRAST_FACTOR', 'TREATMENT_LEVEL', 'CONDITION_LEVEL'))
+res <- results(dds, contrast = c(opt$X_C, opt$X_T, opt$X_K))
 write.csv(as.data.frame(res), file = 'deseq_results.csv')
 res_df = as.data.frame(res)[1:100, ]
 datatable(res_df, style="bootstrap", filter = 'top',
@@ -65,7 +66,7 @@
 ## Gene clustering
 
 ```{r}
-clustering_groups = strsplit("CLUSTERING_FACTORS", ',')[[1]]
+clustering_groups = strsplit(opt$X_M, ',')[[1]]
 
 topVarGenes <- head(order(rowVars(assay(rld)), decreasing = TRUE), 20)
 mat  <- assay(rld)[ topVarGenes, ]
--- a/DESeq_results.xml	Tue Feb 27 14:07:10 2018 -0500
+++ b/DESeq_results.xml	Tue Feb 27 15:30:28 2018 -0500
@@ -22,8 +22,14 @@
 
         Rscript '${__tool_directory__}/DESeq_results_render.R'
 
+            -e $echo
+		    -o $report
+		    -d $report.files_path
+		    -s $sink_message
+		    -t '${__tool_directory__}'
+
             ## 1. input data
-            -e $echo
+
             -W $deseq_workspace
             -C '$contrast_factor'
             -T '$treatment'
@@ -32,15 +38,8 @@
             -M '$clustering_factors'
 
             ## 2. output report and report site directory
-		    -r $report
-            -d $report.files_path
-            -s $sink_message
             -R $deseq_results
 
-		    ## 3. Rmd templates sitting in the tool directory
-            -t '${__tool_directory__}/DESeq_results.Rmd'
-
-
 
         ]]>
     </command>
@@ -61,7 +60,7 @@
     </inputs>
     <outputs>
         <data format="html" name="report" label="DESeq results report on ${on_string}" />
-        <data format="txt" name="sink_message" label="Warnings and Errors on ${on_string}" from_work_dir="warnings_and_errors.txt"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
         <data format="csv" name="deseq_results" label="DESeq results on ${on_string}" from_work_dir="deseq_results.csv" />
     </outputs>
     <citations>
--- a/DESeq_results_render.R	Tue Feb 27 14:07:10 2018 -0500
+++ b/DESeq_results_render.R	Tue Feb 27 15:30:28 2018 -0500
@@ -1,105 +1,69 @@
-library(getopt)
-library(rmarkdown)
-library(htmltools)
-library(dplyr)
-library(DESeq2)
-library(pheatmap)
-library(DT)
-library(ggplot2)
-
 ##============ Sink warnings and errors to a file ==============
 ## use the sink() function to wrap all code within it.
 ##==============================================================
 zz = file('warnings_and_errors.txt')
 sink(zz)
 sink(zz, type = 'message')
-  ##---------below is the code for rendering .Rmd templates-----
-  
-  ##=============STEP 1: handle command line arguments==========
-  ##
-  ##============================================================
-  # column 1: the long flag name
-  # column 2: the short flag alias. A SINGLE character string
-  # column 3: argument mask
-  #           0: no argument
-  #           1: argument required
-  #           2: argument is optional
-  # column 4: date type to which the flag's argument shall be cast.
-  #           possible values: logical, integer, double, complex, character.
-  #-------------------------------------------------------------
-  #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++
-  # 1. short flag alias should match the flag in the command section in the XML file.
-  # 2. long flag name can be any legal R variable names
-  # 3. two names in args_list can have common string but one name should not be a part of another name.
-  #    for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems.
-  #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
-  args_list=list()
-  ##------- 1. input data ---------------------
-  args_list$ECHO = c('echo', 'e', '1', 'character')
-  args_list$DESEQ_WORKSPACE = c('deseq_workspace', 'W', '1', 'character')
-  args_list$CONTRAST_FACTOR = c('contrast_factor', 'C', '1', 'character')
-  args_list$TREATMENT_LEVEL = c('treatment_level', 'T', '1', 'character')
-  args_list$CONDITION_LEVEL = c('condition_level', 'K', '1', 'character')
-  args_list$CLUSTERING_FACTORS = c('clustering_factors', 'M', '1', 'character')
-  ##--------2. output report and outputs --------------
-  args_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
-  args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character')
-  args_list$SINK_MESSAGE = c('sink_message', 's', '1', 'character')
-  args_list$DESEQ_RESULTS = c('deseq_results', 'R', '1', 'character')
-  ##--------3. .Rmd templates in the tool directory ----------
-  args_list$deseq_results_RMD = c('deseq_results_rmd', 't', '1', 'character')
-  ##-----------------------------------------------------------
-  opt = getopt(t(as.data.frame(args_list)))
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(getopt)
+library(rmarkdown)
+library(DESeq2)
+library(pheatmap)
+library(DT)
+library(ggplot2)
+#------------------------------------------------
+
+
+#------------get arguments into R--------------------
+# getopt_specification_matrix(extract_short_flags('DESeq_results.xml')) %>%
+#   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
 
 
-  
-  ##=======STEP 2: create report directory (optional)==========
-  ##
-  ##===========================================================
-  dir.create(opt$report_dir)
-  
-  ##=STEP 3: replace placeholders in .Rmd with argument values=
-  ##
-  ##===========================================================
-  #++ need to replace placeholders with args values one by one+
-  readLines(opt$deseq_results_rmd) %>%
-    (function(x) {
-      gsub('ECHO', opt$echo, x)
-    }) %>%
-    (function(x) {
-      gsub('DESEQ_WORKSPACE', opt$deseq_workspace, x)
-    }) %>%
-    (function(x) {
-      gsub('CONTRAST_FACTOR', opt$contrast_factor, x)
-    }) %>%
-    (function(x) {
-      gsub('TREATMENT_LEVEL', opt$treatment_level, x)
-    }) %>%
-    (function(x) {
-      gsub('CONDITION_LEVEL', opt$condition_level, x)
-    }) %>%
-    (function(x) {
-      gsub('CLUSTERING_FACTORS', opt$clustering_factors, x)
-    }) %>%
-    (function(x) {
-      gsub('REPORT_DIR', opt$report_dir, x)
-    }) %>%
-    (function(x) {
-      gsub('DESEQ_RESULTS', opt$deseq_results, x)
-    }) %>%
-    (function(x) {
-      fileConn = file('deseq_results.Rmd')
-      writeLines(x, con=fileConn)
-      close(fileConn)
-    })
-  
-
-  ##=============STEP 4: render .Rmd templates=================
-  ##
-  ##===========================================================
-  render('deseq_results.Rmd', output_file = opt$report_html)
+spec_matrix = as.matrix(
+  data.frame(stringsAsFactors=FALSE,
+              long_flags = c("X_e", "X_W", "X_C", "X_T", "X_K", "X_M", "X_o",
+                             "X_d", "X_s", "X_R", "X_t"),
+             short_flags = c("e", "W", "C", "T", "K", "M", "o", "d", "s", "R",
+                             "t"),
+     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
+         data_type_flags = c("character", "character", "character", "character",
+                             "character", "character", "character",
+                             "character", "character", "character", "character")
+  )
+)
+opt = getopt(spec_matrix)
+opt
+#----------------------------------------------------
 
 
-  ##--------end of code rendering .Rmd templates----------------
+#-----------using passed arguments in R 
+#           to define system environment variables---
+do.call(Sys.setenv, opt[-1])
+#----------------------------------------------------
+
+#---------- often used variables ----------------
+# OUTPUT_DIR: path to the output associated directory, which stores all outputs
+# TOOL_DIR: path to the tool installation directory
+# RMD_NAME: name of Rmd file to be rendered
+# OUTPUT_REPORT: path to galaxy output report
+OUTPUT_DIR = opt$X_d
+TOOL_DIR =   opt$X_t
+RMD_NAME = 'DESeq_results.Rmd'
+OUTPUT_REPORT = opt$X_o
+
+# create the output associated directory to store all outputs
+dir.create(OUTPUT_DIR, recursive = TRUE)
+
+#-----------------render Rmd--------------
+render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT)
+#------------------------------------------
+
+#==============the end==============
+
+
+##--------end of code rendering .Rmd templates----------------
 sink()
 ##=========== End of sinking output=============================
\ No newline at end of file