changeset 1:55b0c9de7057 draft

planemo upload
author mingchen0919
date Wed, 04 Apr 2018 12:55:55 -0400
parents 81c6ecc7b245
children 8e913d0be3e6
files fastq_dump.Rmd fastq_dump.xml fastq_dump_pe.Rmd fastq_dump_pe.xml fastq_dump_pe_render.R fastq_dump_render.R fastq_dump_se.Rmd fastq_dump_se.xml fastq_dump_se_render.R getopt_specification.csv
diffstat 10 files changed, 364 insertions(+), 129 deletions(-) [+]
line wrap: on
line diff
--- a/fastq_dump.Rmd	Wed Apr 04 11:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
----
-title: 'HTML report title'
-output:
-    html_document:
-      number_sections: true
-      toc: true
-      theme: cosmo
-      highlight: tango
-      code_folding: hide
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_chunk$set(
-  echo = TRUE,
-  error = TRUE
-)
-```
-
-
-# Code for computational analysis
-
-```{r, 'display output directory contents', results='asis', echo=FALSE}
-## after the job is done, we list all files from the output directory.
-## full relative path to the output directory needs to be displayed.
-
-cat('##All output files')
-cat('\n\n')
-all_files = list.files(path = Sys.getenv('REPORT_FILES_PATH'), 
-                       full.names = TRUE, 
-                       recursive = TRUE)
-
-for (f in sub(Sys.getenv('REPORT_FILES_PATH'), '.', all_files) ) {
-  cat('* [', f, '](', f, ')\n')
-}
-cat('\n')
-```
-
--- a/fastq_dump.xml	Wed Apr 04 11:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,40 +0,0 @@
-<tool id="fastq_dump" name="aurora fastq-dump" version="1.0.0">
-  <description> Download reads from NCBI SRA by accessions with fastq-dump from the SRA toolkit</description>
-  <requirements>
-        <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement><requirement type="package" version="0.6.2">parallel-fastq-dump</requirement></requirements>
-  <stdio>
-        <regex match="XXX" source="stderr" level="warning" description="Check the warnings_and_errors.txt file for more details."/></stdio>
-  <command><![CDATA[export TOOL_DIR='${__tool_directory__}' &&
-
-Rscript '${__tool_directory__}/'fastq_dump_render.R
-
-	-o '$report'
-	-d '$report.files_path'
-	-s '$sink_message'
-	-A '$accessions_single_end'
-	-B '$accessions_paired_end'
-]]></command>
-  <inputs>
-    <param type="text" name="accessions_single_end" label="SRR/DRR/ERR accessions (single end reads)" help="A list of SRR/DRR/ERR accessions separated space (e.g. SRR6077558 ERR343809)" optional="True"/><param type="text" name="accessions_paired_end" label="SRR/DRR/ERR accessions (paired end reads)" help="A list of SRR/DRR/ERR accessions separated by comma or space (e.g. SRR6077558 ERR343809)" optional="True"/></inputs>
-  <outputs>
-        <data format="html" name="report" label="tool report"/><data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/></outputs>
-  <citations>
-        <citation type="bibtex"><![CDATA[
-            @article{allaire2016rmarkdown,
-            title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
-            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
-            journal={R package version 0.9},
-            volume={6},
-            year={2016}
-            }
-        ]]></citation><citation type="bibtex"><![CDATA[
-            @book{xie2015dynamic,
-            title={Dynamic Documents with R and knitr},
-            author={Xie, Yihui},
-            volume={29},
-            year={2015},
-            publisher={CRC Press}
-            }
-        ]]></citation></citations>
-</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_pe.Rmd	Wed Apr 04 12:55:55 2018 -0400
@@ -0,0 +1,75 @@
+---
+title: 'Download reads from NCBI SRA'
+output: 
+  html_document:
+    highlight: pygments
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(error = TRUE, echo = FALSE)
+```
+
+
+```{bash echo=FALSE}
+# build job-script
+mkdir -p ${WORKING_DIR}/paired-end-reads
+```
+
+
+```{bash}
+# paired-end.sh
+mkdir -p ${X_d}/paired-end-reads
+cat <<EOF >${X_d}/paired-end.sh
+if [ "${X_B}" ];then
+  fastq-dump \\
+    --split-files \\
+    ${X_B} \\
+    -O ${X_d}/paired-end-reads > ${X_d}/paired-end-log.txt 2>&1
+else
+  echo "No accessions are provided" > ${X_d}/paired-end-log.txt 2>&1
+fi
+EOF
+```
+
+
+```{bash, 'run jobs', echo=FALSE}
+# run job script, always use absolute path. 
+# we want to run all jobs within the working path.
+sh ${X_d}/paired-end.sh
+```
+
+```{r}
+# rename paired-end reads files so that all files have the name pattern BASENAME_forward.EXT and BASENAME_reverse.EXT
+from_files = list.files(paste0(working_dir, '/paired-end-reads'), full.names = TRUE)
+to_files = str_replace_all(from_files, c('_1', '_2'), c('_forward', '_reverse'))
+file.rename(from = from_files, to = to_files)
+```
+
+
+
+```{bash, 'display script', results='asis', echo=FALSE}
+echo '### paired-end reads'
+echo ''
+echo ''
+echo '```bash'
+cat ${X_d}/paired-end.sh
+echo '```'
+```
+
+
+```{r, 'display output directory contents', results='asis', echo=FALSE}
+## after the job is done, we list all files from the output directory.
+## full relative path to the output directory needs to be displayed.
+
+cat('##All output files')
+cat('\n\n')
+all_files = list.files(path = opt$X_d, 
+                       full.names = TRUE, 
+                       recursive = TRUE)
+
+for (f in sub(Sys.getenv('X_d'), '.', all_files) ) {
+  cat('* [', f, '](', f, ')\n')
+}
+cat('\n')
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_pe.xml	Wed Apr 04 12:55:55 2018 -0400
@@ -0,0 +1,59 @@
+<tool id="fastq_dump_pe" name="aurora fastq-dump (paired end)" version="1.0.0">
+    <description>Download reads from NCBI SRA by accessions with fastq-dump from the SRA toolkit</description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.6">r-rmarkdown</requirement>
+        <requirement type="package" version="0.6.2">parallel-fastq-dump</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[export TOOL_DIR='${__tool_directory__}' &&
+
+Rscript '${__tool_directory__}/'fastq_dump_pe_render.R
+
+	-o '$report'
+	-d '$report.files_path'
+	-s '$sink_message'
+	-A '$accessions'
+
+]]></command>
+    <inputs>
+        <param type="text" name="accessions" label="SRR/DRR/ERR accessions (paired end reads)"
+               help="A list of SRR/DRR/ERR accessions separated by comma or space (e.g. SRR6077558 ERR343809)"
+               optional="True"/>
+    </inputs>
+    <outputs>
+        <data name="report" format="html" label="${tool.name}" hidden="false"/>
+        <data name="sink_message" format="txt" label="${tool.name} warnings and errors"
+              from_work_dir="warnings_and_errors.txt" hidden="false"/>
+        <collection name="list_paired_collection" type="list:paired" label="${tool.name} list:paired collection">
+            <discover_datasets
+                    pattern="(?P&amp;lt;identifier_0&amp;gt;[^_]+)_(?P&amp;lt;identifier_1&amp;gt;[^_]+)\.(?P&amp;lt;ext&amp;gt;[^\._]+)?"
+                    directory="paired-end-reads" visible="false"/>
+        </collection>
+    </outputs>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_pe_render.R	Wed Apr 04 12:55:55 2018 -0400
@@ -0,0 +1,54 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(getopt)
+library(rmarkdown)
+library(stringr)
+#------------------------------------------------
+
+
+#------------get arguments into R--------------------
+# load helper function
+source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R'))
+# import getopt specification matrix from a csv file
+opt = getopt(getopt_specification_matrix('getopt_specification.csv'))
+opt$X_t = Sys.getenv('TOOL_DIR')
+working_dir = getwd()
+Sys.setenv(WORKING_DIR = working_dir)
+#----------------------------------------------------
+
+
+#-----------using passed arguments in R 
+#           to define system environment variables---
+do.call(Sys.setenv, opt[-1])
+#----------------------------------------------------
+
+#---------- often used variables ----------------
+# OUTPUT_DIR: path to the output associated directory, which stores all outputs
+# TOOL_DIR: path to the tool installation directory
+OUTPUT_DIR = opt$X_d
+TOOL_DIR =   opt$X_t
+OUTPUT_REPORT = opt$X_o
+RMD_NAME = 'fastq_dump.Rmd'
+
+
+# create the output associated directory to store all outputs
+dir.create(OUTPUT_DIR, recursive = TRUE)
+
+#-----------------render Rmd--------------
+render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT)
+#------------------------------------------
+
+#==============the end==============
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- a/fastq_dump_render.R	Wed Apr 04 11:06:18 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-##============ Sink warnings and errors to a file ==============
-## use the sink() function to wrap all code within it.
-##==============================================================
-zz = file('warnings_and_errors.txt')
-sink(zz)
-sink(zz, type = 'message')
-
-#------------import libraries--------------------
-options(stringsAsFactors = FALSE)
-
-library(getopt)
-library(rmarkdown)
-#------------------------------------------------
-
-
-#------------get arguments into R--------------------
-# load helper function
-source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R'))
-# import getopt specification matrix from a csv file
-opt = getopt(getopt_specification_matrix('getopt_specification.csv'))
-opt$X_t = Sys.getenv('TOOL_DIR')
-#----------------------------------------------------
-
-
-#-----------using passed arguments in R 
-#           to define system environment variables---
-do.call(Sys.setenv, opt[-1])
-#----------------------------------------------------
-
-#---------- often used variables ----------------
-# OUTPUT_DIR: path to the output associated directory, which stores all outputs
-# TOOL_DIR: path to the tool installation directory
-OUTPUT_DIR = opt$X_d
-TOOL_DIR =   opt$X_t
-OUTPUT_REPORT = opt$X_o
-RMD_NAME = ''
-
-# create the output associated directory to store all outputs
-dir.create(OUTPUT_DIR, recursive = TRUE)
-
-#-----------------render Rmd--------------
-render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT)
-#------------------------------------------
-
-#==============the end==============
-
-
-##--------end of code rendering .Rmd templates----------------
-sink()
-##=========== End of sinking output=============================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_se.Rmd	Wed Apr 04 12:55:55 2018 -0400
@@ -0,0 +1,66 @@
+---
+title: 'Download reads from NCBI SRA'
+output: 
+  html_document:
+    highlight: pygments
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(error = TRUE, echo = FALSE)
+```
+
+
+```{bash echo=FALSE}
+# build job-script
+mkdir -p ${WORKING_DIR}/single-end-reads
+```
+
+```{bash}
+# single-end.sh
+cat <<EOF >${X_d}/single-end.sh
+if [ "${X_A}" ];then
+  fastq-dump \\
+    ${X_A} \\
+    -O ${WORKING_DIR}/single-end-reads > ${X_d}/single-end-log.txt 2>&1
+else
+  echo "No accessions are provided" > ${X_d}/single-end-log.txt 2>&1
+fi
+EOF
+```
+
+
+```{bash, 'run jobs', echo=FALSE}
+# run job script, always use absolute path. 
+# we want to run all jobs within the working path.
+sh ${X_d}/single-end.sh
+```
+
+
+```{bash, 'display script', results='asis', echo=FALSE}
+echo '## Job scripts'
+echo ''
+echo '### single-end reads'
+echo ''
+echo ''
+echo '```bash'
+cat ${X_d}/single-end.sh
+echo '```'
+```
+
+
+```{r, 'display output directory contents', results='asis', echo=FALSE}
+## after the job is done, we list all files from the output directory.
+## full relative path to the output directory needs to be displayed.
+
+cat('##All output files')
+cat('\n\n')
+all_files = list.files(path = opt$X_d, 
+                       full.names = TRUE, 
+                       recursive = TRUE)
+
+for (f in sub(Sys.getenv('X_d'), '.', all_files) ) {
+  cat('* [', f, '](', f, ')\n')
+}
+cat('\n')
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_se.xml	Wed Apr 04 12:55:55 2018 -0400
@@ -0,0 +1,55 @@
+<tool id="fastq_dump_se" name="aurora fastq-dump (single end)" version="1.0.0">
+    <description>Download reads from NCBI SRA by accessions with fastq-dump from the SRA toolkit</description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.6">r-rmarkdown</requirement>
+        <requirement type="package" version="0.6.2">parallel-fastq-dump</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[export TOOL_DIR='${__tool_directory__}' &&
+
+Rscript '${__tool_directory__}/'fastq_dump_se_render.R
+
+	-o '$report'
+	-d '$report.files_path'
+	-s '$sink_message'
+	-A '$accessions'
+]]></command>
+    <inputs>
+        <param type="text" name="accessions" label="SRR/DRR/ERR accessions (single end reads)"
+               help="A list of SRR/DRR/ERR accessions separated space (e.g. SRR6077558 ERR343809)" optional="True"/>
+    </inputs>
+    <outputs>
+        <data name="report" format="html" label="${tool.name}" hidden="false"/>
+        <data name="sink_message" format="txt" label="${tool.name} warnings and errors"
+              from_work_dir="warnings_and_errors.txt" hidden="false"/>
+        <collection name="list_collection" type="list" label="${tool.name} list collection">
+            <discover_datasets pattern="__name_and_ext__" directory="single-end-reads" visible="false"/>
+        </collection>
+    </outputs>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastq_dump_se_render.R	Wed Apr 04 12:55:55 2018 -0400
@@ -0,0 +1,54 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+
+#------------import libraries--------------------
+options(stringsAsFactors = FALSE)
+
+library(getopt)
+library(rmarkdown)
+library(stringr)
+#------------------------------------------------
+
+
+#------------get arguments into R--------------------
+# load helper function
+source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R'))
+# import getopt specification matrix from a csv file
+opt = getopt(getopt_specification_matrix('getopt_specification.csv'))
+opt$X_t = Sys.getenv('TOOL_DIR')
+working_dir = getwd()
+Sys.setenv(WORKING_DIR = working_dir)
+#----------------------------------------------------
+
+
+#-----------using passed arguments in R 
+#           to define system environment variables---
+do.call(Sys.setenv, opt[-1])
+#----------------------------------------------------
+
+#---------- often used variables ----------------
+# OUTPUT_DIR: path to the output associated directory, which stores all outputs
+# TOOL_DIR: path to the tool installation directory
+OUTPUT_DIR = opt$X_d
+TOOL_DIR =   opt$X_t
+OUTPUT_REPORT = opt$X_o
+RMD_NAME = 'fastq_dump.Rmd'
+
+
+# create the output associated directory to store all outputs
+dir.create(OUTPUT_DIR, recursive = TRUE)
+
+#-----------------render Rmd--------------
+render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT)
+#------------------------------------------
+
+#==============the end==============
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- a/getopt_specification.csv	Wed Apr 04 11:06:18 2018 -0400
+++ b/getopt_specification.csv	Wed Apr 04 12:55:55 2018 -0400
@@ -2,5 +2,4 @@
 o,1,character,report
 d,1,character,report.files_path
 s,1,character,sink_message
-A,1,character,accessions_single_end
-B,1,character,accessions_paired_end
+A,1,character,accessions