Mercurial > repos > mingchen0919 > aurora_fastq_dump
changeset 0:81c6ecc7b245 draft
planemo upload
author | mingchen0919 |
---|---|
date | Wed, 04 Apr 2018 11:06:18 -0400 |
parents | |
children | 55b0c9de7057 |
files | fastq_dump.Rmd fastq_dump.sh fastq_dump.xml fastq_dump_render.R getopt_specification.csv helper.R |
diffstat | 6 files changed, 170 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump.Rmd Wed Apr 04 11:06:18 2018 -0400 @@ -0,0 +1,37 @@ +--- +title: 'HTML report title' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango + code_folding: hide +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = TRUE, + error = TRUE +) +``` + + +# Code for computational analysis + +```{r, 'display output directory contents', results='asis', echo=FALSE} +## after the job is done, we list all files from the output directory. +## full relative path to the output directory needs to be displayed. + +cat('##All output files') +cat('\n\n') +all_files = list.files(path = Sys.getenv('REPORT_FILES_PATH'), + full.names = TRUE, + recursive = TRUE) + +for (f in sub(Sys.getenv('REPORT_FILES_PATH'), '.', all_files) ) { + cat('* [', f, '](', f, ')\n') +} +cat('\n') +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump.sh Wed Apr 04 11:06:18 2018 -0400 @@ -0,0 +1,9 @@ +export TOOL_DIR='${__tool_directory__}' && + +Rscript '${__tool_directory__}/'fastq_dump_render.R + + -o '$report' + -d '$report.files_path' + -s '$sink_message' + -A '$accessions_single_end' + -B '$accessions_paired_end'
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump.xml Wed Apr 04 11:06:18 2018 -0400 @@ -0,0 +1,40 @@ +<tool id="fastq_dump" name="aurora fastq-dump" version="1.0.0"> + <description> Download reads from NCBI SRA by accessions with fastq-dump from the SRA toolkit</description> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement><requirement type="package" version="0.6.2">parallel-fastq-dump</requirement></requirements> + <stdio> + <regex match="XXX" source="stderr" level="warning" description="Check the warnings_and_errors.txt file for more details."/></stdio> + <command><![CDATA[export TOOL_DIR='${__tool_directory__}' && + +Rscript '${__tool_directory__}/'fastq_dump_render.R + + -o '$report' + -d '$report.files_path' + -s '$sink_message' + -A '$accessions_single_end' + -B '$accessions_paired_end' +]]></command> + <inputs> + <param type="text" name="accessions_single_end" label="SRR/DRR/ERR accessions (single end reads)" help="A list of SRR/DRR/ERR accessions separated space (e.g. SRR6077558 ERR343809)" optional="True"/><param type="text" name="accessions_paired_end" label="SRR/DRR/ERR accessions (paired end reads)" help="A list of SRR/DRR/ERR accessions separated by comma or space (e.g. SRR6077558 ERR343809)" optional="True"/></inputs> + <outputs> + <data format="html" name="report" label="tool report"/><data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/></outputs> + <citations> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation><citation type="bibtex"><![CDATA[ + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation></citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fastq_dump_render.R Wed Apr 04 11:06:18 2018 -0400 @@ -0,0 +1,50 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +#------------import libraries-------------------- +options(stringsAsFactors = FALSE) + +library(getopt) +library(rmarkdown) +#------------------------------------------------ + + +#------------get arguments into R-------------------- +# load helper function +source(paste0(Sys.getenv('TOOL_DIR'), '/helper.R')) +# import getopt specification matrix from a csv file +opt = getopt(getopt_specification_matrix('getopt_specification.csv')) +opt$X_t = Sys.getenv('TOOL_DIR') +#---------------------------------------------------- + + +#-----------using passed arguments in R +# to define system environment variables--- +do.call(Sys.setenv, opt[-1]) +#---------------------------------------------------- + +#---------- often used variables ---------------- +# OUTPUT_DIR: path to the output associated directory, which stores all outputs +# TOOL_DIR: path to the tool installation directory +OUTPUT_DIR = opt$X_d +TOOL_DIR = opt$X_t +OUTPUT_REPORT = opt$X_o +RMD_NAME = '' + +# create the output associated directory to store all outputs +dir.create(OUTPUT_DIR, recursive = TRUE) + +#-----------------render Rmd-------------- +render(paste0(TOOL_DIR, '/', RMD_NAME), output_file = OUTPUT_REPORT) +#------------------------------------------ + +#==============the end============== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/getopt_specification.csv Wed Apr 04 11:06:18 2018 -0400 @@ -0,0 +1,6 @@ +short flag,argument mask,data type,variable name +o,1,character,report +d,1,character,report.files_path +s,1,character,sink_message +A,1,character,accessions_single_end +B,1,character,accessions_paired_end
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/helper.R Wed Apr 04 11:06:18 2018 -0400 @@ -0,0 +1,28 @@ +#' \code{getopt_specification_matrix} returns a getopt specification matrix. +#' +#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data. +#' The first column are short flags, the second column are argument masks, the third column +#' is data types. The fourth column are variable names used in the tool XML. These three columns are required. +#' @param gtg_name the name of a running GTG. +getopt_specification_matrix = function(specification_file, gtg_name = 'gtg', tool_dir = Sys.getenv('TOOL_DIR')) { + df = read.csv(paste0(tool_dir, '/', specification_file), + header = TRUE, stringsAsFactors = FALSE) + # check if there are duplicated short flags + short_flags = df[, 1] + if (length(unique(short_flags)) < length(short_flags)) { + cat('----Duplicated short flags found ----\n') + cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n') + stop('Duplicated short flags are not allowed.') + } + + # use short flags to generate long flags + long_flags = paste0('X_', df[, 1]) + + # specification matrix + df2 = data.frame(long_flags = long_flags, + short_flags = df[, 1], + argument_mask = df[, 2], + data_type = df[, 3]) + + as.matrix(df2) +} \ No newline at end of file