changeset 0:81c6ecc7b245 draft

planemo upload
author mingchen0919
date Wed, 04 Apr 2018 11:06:18 -0400
parents
children 55b0c9de7057
files fastq_dump.Rmd fastq_dump.sh fastq_dump.xml fastq_dump_render.R getopt_specification.csv helper.R
diffstat 6 files changed, 170 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump.Rmd	Wed Apr 04 11:06:18 2018 -0400
@@ -0,0 +1,37 @@
+---
+title: 'HTML report title'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+      code_folding: hide
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = TRUE,
+  error = TRUE
+)
+```
+
+
+# Code for computational analysis
+
+```{r, 'display output directory contents', results='asis', echo=FALSE}
+## after the job is done, we list all files from the output directory.
+## full relative path to the output directory needs to be displayed.
+
+cat('##All output files')
+cat('\n\n')
+all_files = list.files(path = Sys.getenv('REPORT_FILES_PATH'), 
+                       full.names = TRUE, 
+                       recursive = TRUE)
+
+for (f in sub(Sys.getenv('REPORT_FILES_PATH'), '.', all_files) ) {
+  cat('* [', f, '](', f, ')\n')
+}
+cat('\n')
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump.sh	Wed Apr 04 11:06:18 2018 -0400
@@ -0,0 +1,9 @@
+export TOOL_DIR='${__tool_directory__}' &&
+
+Rscript '${__tool_directory__}/'fastq_dump_render.R
+
+	-o '$report'
+	-d '$report.files_path'
+	-s '$sink_message'
+	-A '$accessions_single_end'
+	-B '$accessions_paired_end'
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump.xml	Wed Apr 04 11:06:18 2018 -0400
@@ -0,0 +1,40 @@
+<tool id="fastq_dump" name="aurora fastq-dump" version="1.0.0">
+  <description> Download reads from NCBI SRA by accessions with fastq-dump from the SRA toolkit</description>
+  <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement><requirement type="package" version="0.6.2">parallel-fastq-dump</requirement></requirements>
+  <stdio>
+        <regex match="XXX" source="stderr" level="warning" description="Check the warnings_and_errors.txt file for more details."/></stdio>
+  <command><![CDATA[export TOOL_DIR='${__tool_directory__}' &&
+
+Rscript '${__tool_directory__}/'fastq_dump_render.R
+
+	-o '$report'
+	-d '$report.files_path'
+	-s '$sink_message'
+	-A '$accessions_single_end'
+	-B '$accessions_paired_end'
+]]></command>
+  <inputs>
+    <param type="text" name="accessions_single_end" label="SRR/DRR/ERR accessions (single end reads)" help="A list of SRR/DRR/ERR accessions separated space (e.g. SRR6077558 ERR343809)" optional="True"/><param type="text" name="accessions_paired_end" label="SRR/DRR/ERR accessions (paired end reads)" help="A list of SRR/DRR/ERR accessions separated by comma or space (e.g. SRR6077558 ERR343809)" optional="True"/></inputs>
+  <outputs>
+        <data format="html" name="report" label="tool report"/><data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/></outputs>
+  <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation><citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation></citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_render.R	Wed Apr 04 11:06:18 2018 -0400
@@ -0,0 +1,50 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(getopt)
+library(rmarkdown)
+#------------------------------------------------
+
+
+#------------get arguments into R--------------------
+# load helper function
+source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R'))
+# import getopt specification matrix from a csv file
+opt = getopt(getopt_specification_matrix('getopt_specification.csv'))
+opt$X_t = Sys.getenv('TOOL_DIR')
+#----------------------------------------------------
+
+
+#-----------using passed arguments in R 
+#           to define system environment variables---
+do.call(Sys.setenv, opt[-1])
+#----------------------------------------------------
+
+#---------- often used variables ----------------
+# OUTPUT_DIR: path to the output associated directory, which stores all outputs
+# TOOL_DIR: path to the tool installation directory
+OUTPUT_DIR = opt$X_d
+TOOL_DIR =   opt$X_t
+OUTPUT_REPORT = opt$X_o
+RMD_NAME = ''
+
+# create the output associated directory to store all outputs
+dir.create(OUTPUT_DIR, recursive = TRUE)
+
+#-----------------render Rmd--------------
+render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT)
+#------------------------------------------
+
+#==============the end==============
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/getopt_specification.csv	Wed Apr 04 11:06:18 2018 -0400
@@ -0,0 +1,6 @@
+short flag,argument mask,data type,variable name
+o,1,character,report
+d,1,character,report.files_path
+s,1,character,sink_message
+A,1,character,accessions_single_end
+B,1,character,accessions_paired_end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/helper.R	Wed Apr 04 11:06:18 2018 -0400
@@ -0,0 +1,28 @@
+#' \code{getopt_specification_matrix} returns a getopt specification matrix.
+#'
+#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data.
+#' The first column are short flags, the second column are argument masks, the third column
+#' is data types. The fourth column are variable names used in the tool XML. These three columns are required.
+#' @param gtg_name the name of a running GTG.
+getopt_specification_matrix = function(specification_file, gtg_name = 'gtg', tool_dir = Sys.getenv('TOOL_DIR')) {
+  df = read.csv(paste0(tool_dir, '/', specification_file),
+                header = TRUE, stringsAsFactors = FALSE)
+  # check if there are duplicated short flags
+  short_flags = df[, 1]
+  if (length(unique(short_flags)) < length(short_flags)) {
+    cat('----Duplicated short flags found ----\n')
+    cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n')
+    stop('Duplicated short flags are not allowed.')
+  }
+  
+  # use short flags to generate long flags
+  long_flags = paste0('X_', df[, 1])
+  
+  # specification matrix
+  df2 = data.frame(long_flags = long_flags,
+                   short_flags = df[, 1],
+                   argument_mask = df[, 2],
+                   data_type = df[, 3])
+  
+  as.matrix(df2)
+}
\ No newline at end of file