Mercurial > repos > mingchen0919 > aurora_fastq_dump
changeset 1:55b0c9de7057 draft
planemo upload
author | mingchen0919 |
---|---|
date | Wed, 04 Apr 2018 12:55:55 -0400 |
parents | 81c6ecc7b245 |
children | 8e913d0be3e6 |
files | fastq_dump.Rmd fastq_dump.xml fastq_dump_pe.Rmd fastq_dump_pe.xml fastq_dump_pe_render.R fastq_dump_render.R fastq_dump_se.Rmd fastq_dump_se.xml fastq_dump_se_render.R getopt_specification.csv |
diffstat | 10 files changed, 364 insertions(+), 129 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_dump.Rmd Wed Apr 04 11:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,37 +0,0 @@ ---- -title: 'HTML report title' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango - code_folding: hide ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = TRUE, - error = TRUE -) -``` - - -# Code for computational analysis - -```{r, 'display output directory contents', results='asis', echo=FALSE} -## after the job is done, we list all files from the output directory. -## full relative path to the output directory needs to be displayed. - -cat('##All output files') -cat('\n\n') -all_files = list.files(path = Sys.getenv('REPORT_FILES_PATH'), - full.names = TRUE, - recursive = TRUE) - -for (f in sub(Sys.getenv('REPORT_FILES_PATH'), '.', all_files) ) { - cat('* [', f, '](', f, ')\n') -} -cat('\n') -``` -
--- a/fastq_dump.xml Wed Apr 04 11:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,40 +0,0 @@ -<tool id="fastq_dump" name="aurora fastq-dump" version="1.0.0"> - <description> Download reads from NCBI SRA by accessions with fastq-dump from the SRA toolkit</description> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement><requirement type="package" version="0.6.2">parallel-fastq-dump</requirement></requirements> - <stdio> - <regex match="XXX" source="stderr" level="warning" description="Check the warnings_and_errors.txt file for more details."/></stdio> - <command><![CDATA[export TOOL_DIR='${__tool_directory__}' && - -Rscript '${__tool_directory__}/'fastq_dump_render.R - - -o '$report' - -d '$report.files_path' - -s '$sink_message' - -A '$accessions_single_end' - -B '$accessions_paired_end' -]]></command> - <inputs> - <param type="text" name="accessions_single_end" label="SRR/DRR/ERR accessions (single end reads)" help="A list of SRR/DRR/ERR accessions separated space (e.g. SRR6077558 ERR343809)" optional="True"/><param type="text" name="accessions_paired_end" label="SRR/DRR/ERR accessions (paired end reads)" help="A list of SRR/DRR/ERR accessions separated by comma or space (e.g. SRR6077558 ERR343809)" optional="True"/></inputs> - <outputs> - <data format="html" name="report" label="tool report"/><data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/></outputs> - <citations> - <citation type="bibtex"><![CDATA[ - @article{allaire2016rmarkdown, - title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff - and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, - journal={R package version 0.9}, - volume={6}, - year={2016} - } - ]]></citation><citation type="bibtex"><![CDATA[ - @book{xie2015dynamic, - title={Dynamic Documents with R and knitr}, - author={Xie, Yihui}, - volume={29}, - year={2015}, - publisher={CRC Press} - } - ]]></citation></citations> -</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump_pe.Rmd Wed Apr 04 12:55:55 2018 -0400 @@ -0,0 +1,75 @@ +--- +title: 'Download reads from NCBI SRA' +output: + html_document: + highlight: pygments +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(error = TRUE, echo = FALSE) +``` + + +```{bash echo=FALSE} +# build job-script +mkdir -p ${WORKING_DIR}/paired-end-reads +``` + + +```{bash} +# paired-end.sh +mkdir -p ${X_d}/paired-end-reads +cat <<EOF >${X_d}/paired-end.sh +if [ "${X_B}" ];then + fastq-dump \\ + --split-files \\ + ${X_B} \\ + -O ${X_d}/paired-end-reads > ${X_d}/paired-end-log.txt 2>&1 +else + echo "No accessions are provided" > ${X_d}/paired-end-log.txt 2>&1 +fi +EOF +``` + + +```{bash, 'run jobs', echo=FALSE} +# run job script, always use absolute path. +# we want to run all jobs within the working path. +sh ${X_d}/paired-end.sh +``` + +```{r} +# rename paired-end reads files so that all files have the name pattern BASENAME_forward.EXT and BASENAME_reverse.EXT +from_files = list.files(paste0(working_dir, '/paired-end-reads'), full.names = TRUE) +to_files = str_replace_all(from_files, c('_1', '_2'), c('_forward', '_reverse')) +file.rename(from = from_files, to = to_files) +``` + + + +```{bash, 'display script', results='asis', echo=FALSE} +echo '### paired-end reads' +echo '' +echo '' +echo '```bash' +cat ${X_d}/paired-end.sh +echo '```' +``` + + +```{r, 'display output directory contents', results='asis', echo=FALSE} +## after the job is done, we list all files from the output directory. +## full relative path to the output directory needs to be displayed. + +cat('##All output files') +cat('\n\n') +all_files = list.files(path = opt$X_d, + full.names = TRUE, + recursive = TRUE) + +for (f in sub(Sys.getenv('X_d'), '.', all_files) ) { + cat('* [', f, '](', f, ')\n') +} +cat('\n') +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump_pe.xml Wed Apr 04 12:55:55 2018 -0400 @@ -0,0 +1,59 @@ +<tool id="fastq_dump_pe" name="aurora fastq-dump (paired end)" version="1.0.0"> + <description>Download reads from NCBI SRA by accessions with fastq-dump from the SRA toolkit</description> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.6">r-rmarkdown</requirement> + <requirement type="package" version="0.6.2">parallel-fastq-dump</requirement> + </requirements> + <stdio> + <regex match="XXX" source="stderr" level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command><![CDATA[export TOOL_DIR='${__tool_directory__}' && + +Rscript '${__tool_directory__}/'fastq_dump_pe_render.R + + -o '$report' + -d '$report.files_path' + -s '$sink_message' + -A '$accessions' + +]]></command> + <inputs> + <param type="text" name="accessions" label="SRR/DRR/ERR accessions (paired end reads)" + help="A list of SRR/DRR/ERR accessions separated by comma or space (e.g. SRR6077558 ERR343809)" + optional="True"/> + </inputs> + <outputs> + <data name="report" format="html" label="${tool.name}" hidden="false"/> + <data name="sink_message" format="txt" label="${tool.name} warnings and errors" + from_work_dir="warnings_and_errors.txt" hidden="false"/> + <collection name="list_paired_collection" type="list:paired" label="${tool.name} list:paired collection"> + <discover_datasets + pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)\.(?P&lt;ext&gt;[^\._]+)?" + directory="paired-end-reads" visible="false"/> + </collection> + </outputs> + <citations> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump_pe_render.R Wed Apr 04 12:55:55 2018 -0400 @@ -0,0 +1,54 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +library(stringr) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# load helper function +source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R')) +# import getopt specification matrix from a csv file +opt = getopt(getopt_specification_matrix('getopt_specification.csv')) +opt$X_t = Sys.getenv('TOOL_DIR') +working_dir = getwd() +Sys.setenv(WORKING_DIR = working_dir) +#---------------------------------------------------- + + +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +OUTPUT_REPORT = opt$X_o +RMD_NAME = 'fastq_dump.Rmd' + + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render Rmd-------------- +render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) +#------------------------------------------ + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- a/fastq_dump_render.R Wed Apr 04 11:06:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,50 +0,0 @@ -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file('warnings_and_errors.txt') -sink(zz) -sink(zz, type = 'message') - -#------------import libraries-------------------- -options(stringsAsFactors = FALSE) - -library(getopt) -library(rmarkdown) -#------------------------------------------------ - - -#------------get arguments into R-------------------- -# load helper function -source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R')) -# import getopt specification matrix from a csv file -opt = getopt(getopt_specification_matrix('getopt_specification.csv')) -opt$X_t = Sys.getenv('TOOL_DIR') -#---------------------------------------------------- - - -#-----------using passed arguments in R -# to define system environment variables--- -do.call(Sys.setenv, opt[-1]) -#---------------------------------------------------- - -#---------- often used variables ---------------- -# OUTPUT_DIR: path to the output associated directory, which stores all outputs -# TOOL_DIR: path to the tool installation directory -OUTPUT_DIR = opt$X_d -TOOL_DIR = opt$X_t -OUTPUT_REPORT = opt$X_o -RMD_NAME = '' - -# create the output associated directory to store all outputs -dir.create(OUTPUT_DIR, recursive = TRUE) - -#-----------------render Rmd-------------- -render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) -#------------------------------------------ - -#==============the end============== - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump_se.Rmd Wed Apr 04 12:55:55 2018 -0400 @@ -0,0 +1,66 @@ +--- +title: 'Download reads from NCBI SRA' +output: + html_document: + highlight: pygments +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(error = TRUE, echo = FALSE) +``` + + +```{bash echo=FALSE} +# build job-script +mkdir -p ${WORKING_DIR}/single-end-reads +``` + +```{bash} +# single-end.sh +cat <<EOF >${X_d}/single-end.sh +if [ "${X_A}" ];then + fastq-dump \\ + ${X_A} \\ + -O ${WORKING_DIR}/single-end-reads > ${X_d}/single-end-log.txt 2>&1 +else + echo "No accessions are provided" > ${X_d}/single-end-log.txt 2>&1 +fi +EOF +``` + + +```{bash, 'run jobs', echo=FALSE} +# run job script, always use absolute path. +# we want to run all jobs within the working path. +sh ${X_d}/single-end.sh +``` + + +```{bash, 'display script', results='asis', echo=FALSE} +echo '## Job scripts' +echo '' +echo '### single-end reads' +echo '' +echo '' +echo '```bash' +cat ${X_d}/single-end.sh +echo '```' +``` + + +```{r, 'display output directory contents', results='asis', echo=FALSE} +## after the job is done, we list all files from the output directory. +## full relative path to the output directory needs to be displayed. + +cat('##All output files') +cat('\n\n') +all_files = list.files(path = opt$X_d, + full.names = TRUE, + recursive = TRUE) + +for (f in sub(Sys.getenv('X_d'), '.', all_files) ) { + cat('* [', f, '](', f, ')\n') +} +cat('\n') +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump_se.xml Wed Apr 04 12:55:55 2018 -0400 @@ -0,0 +1,55 @@ +<tool id="fastq_dump_se" name="aurora fastq-dump (single end)" version="1.0.0"> + <description>Download reads from NCBI SRA by accessions with fastq-dump from the SRA toolkit</description> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.6">r-rmarkdown</requirement> + <requirement type="package" version="0.6.2">parallel-fastq-dump</requirement> + </requirements> + <stdio> + <regex match="XXX" source="stderr" level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command><![CDATA[export TOOL_DIR='${__tool_directory__}' && + +Rscript '${__tool_directory__}/'fastq_dump_se_render.R + + -o '$report' + -d '$report.files_path' + -s '$sink_message' + -A '$accessions' +]]></command> + <inputs> + <param type="text" name="accessions" label="SRR/DRR/ERR accessions (single end reads)" + help="A list of SRR/DRR/ERR accessions separated space (e.g. SRR6077558 ERR343809)" optional="True"/> + </inputs> + <outputs> + <data name="report" format="html" label="${tool.name}" hidden="false"/> + <data name="sink_message" format="txt" label="${tool.name} warnings and errors" + from_work_dir="warnings_and_errors.txt" hidden="false"/> + <collection name="list_collection" type="list" label="${tool.name} list collection"> + <discover_datasets pattern="__name_and_ext__" directory="single-end-reads" visible="false"/> + </collection> + </outputs> + <citations> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump_se_render.R Wed Apr 04 12:55:55 2018 -0400 @@ -0,0 +1,54 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +library(stringr) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# load helper function +source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R')) +# import getopt specification matrix from a csv file +opt = getopt(getopt_specification_matrix('getopt_specification.csv')) +opt$X_t = Sys.getenv('TOOL_DIR') +working_dir = getwd() +Sys.setenv(WORKING_DIR = working_dir) +#---------------------------------------------------- + + +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +OUTPUT_REPORT = opt$X_o +RMD_NAME = 'fastq_dump.Rmd' + + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render Rmd-------------- +render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) +#------------------------------------------ + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- a/getopt_specification.csv Wed Apr 04 11:06:18 2018 -0400 +++ b/getopt_specification.csv Wed Apr 04 12:55:55 2018 -0400 @@ -2,5 +2,4 @@ o,1,character,report d,1,character,report.files_path s,1,character,sink_message -A,1,character,accessions_single_end -B,1,character,accessions_paired_end +A,1,character,accessions