Mercurial > repos > mingchen0919 > rmarkdown_fastq_dump
changeset 11:d4a9c31d0046 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_fastq_dump commit affb362adeb095389646b0c51738d24c2de3838d-dirty
author | mingchen0919 |
---|---|
date | Wed, 27 Sep 2017 16:43:52 -0400 |
parents | 057a6b742899 |
children | d330ce2d4663 |
files | fastq_dump_pe.Rmd fastq_dump_pe.xml fastq_dump_pe_render.R fastq_dump_se.Rmd fastq_dump_se.xml |
diffstat | 5 files changed, 171 insertions(+), 25 deletions(-) [+] |
line wrap: on
line diff
--- a/fastq_dump_pe.Rmd Wed Sep 27 16:17:40 2017 -0400 +++ b/fastq_dump_pe.Rmd Wed Sep 27 16:43:52 2017 -0400 @@ -0,0 +1,48 @@ +--- +title: 'Fastq-dump: download and extract paired end reads into FASTQ/FASTA file' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO +) +``` + +# Command line arguments + +```{r 'command line arguments'} +str(opt) +``` + +# Download and extract reads + +```{r 'download and extract reads'} +# create a directory to store read files +dir.create('read_files_directory') +# download and extract reads +sra_accessions = strsplit(gsub(',', ' ', 'SRA_ACCESSION'), ' ')[[1]] +sra_accessions = sra_accessions[sra_accessions != ''] +# loop through SRA accessions to download and extract reads. +for(id in sra_accessions) { + if('FORMAT' == 'fasta') { + command = paste0('fastq-dump --fasta ', '-O read_files_directory ') + } else { + command = paste0('fastq-dump ', '-O read_files_directory ') + } + + if('SPLIT_FILES' == TRUE) { + command = paste0(command, '--split_files ', id) + } else { + command = paste0(command, id) + } + print(command) + system(command = command) +} +``` +
--- a/fastq_dump_pe.xml Wed Sep 27 16:17:40 2017 -0400 +++ b/fastq_dump_pe.xml Wed Sep 27 16:43:52 2017 -0400 @@ -12,46 +12,43 @@ collection. </description> <stdio> - <regex match="Execution halted" - source="both" - level="fatal" - description="Execution halted." /> - <regex match="Error in" - source="both" - level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> - <regex match="Fatal error" - source="both" - level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> + <!--All stderr are redirected to a file. "XXX" is used to match with nothing--> + <regex match="XXX" + source="stderr" + level="warning" + description="Check the warnings_and_errors.txt file for more details."/> </stdio> <command> <![CDATA[ - Rscript '${__tool_directory__}/fastq_dump_se_render.R' - -i $sra_accession + Rscript '${__tool_directory__}/fastq_dump_pe_render.R' + -i '$sra_accession' -e $echo -f $format + -p $split_files -r $report -d $report.files_path + -s $sink_message - -t '${__tool_directory__}/fastq_dump_se.Rmd' + -t '${__tool_directory__}/fastq_dump_pe.Rmd' ]]> </command> <inputs> <param type="text" name="sra_accession" label="SRR/DRR/ERR accessions" optional="false" help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/> - <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="false" label="output files in fastq (true) or fasta (false)?"/> + <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="false" + label="output files in fastq (true) or fasta (false)?"/> <param type="boolean" name="split_files" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Split files" - help="Dump each read into separate file.Files - will receive suffix corresponding to read - number"/> + help="Dump each read into separate file. Files will receive suffix corresponding to read number"/> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> </inputs> <outputs> - <collection type="list" name="list_collection"> - <discover_datasets pattern="__name_and_ext__" directory="read_files_directory" /> + <data format="html" name="report" label="Fastq-dump report" /> + <collection type="list" name="list_collection" label="Fastq-dump (paired end reads)"> + <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.(?P<ext>[^\._]+)?" directory="read_files_directory"/> </collection> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" /> </outputs> </tool> \ No newline at end of file
--- a/fastq_dump_pe_render.R Wed Sep 27 16:17:40 2017 -0400 +++ b/fastq_dump_pe_render.R Wed Sep 27 16:43:52 2017 -0400 @@ -0,0 +1,100 @@ +##======= Handle arguments from command line ======== +# setup R error handline to go to stderr +options(show.error.messages=FALSE, + error=function(){ + cat(geterrmessage(), file=stderr()) + quit("no", 1, F) + }) + +# we need that to not crash galaxy with an UTF8 error on German LC settings. +loc = Sys.setlocale("LC_MESSAGES", "en_US.UTF-8") + +# suppress warning +options(warn = -1) + +options(stringsAsFactors=FALSE, useFancyQuotes=FALSE) +args = commandArgs(trailingOnly=TRUE) + +suppressPackageStartupMessages({ + library(getopt) + library(tools) +}) + +#/////////////////////// SINK WARNINGS AND ERRORS TO A FILE FOR DEBUGGING /////////// +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') + +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +##------- 1. input data --------------------- +spec_list=list() +spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character') +spec_list$FORMAT = c('format', 'f', '1', 'character') +spec_list$ECHO = c('echo', 'e', '1', 'character') +spec_list$SPLIT_FILES = c('split_files', 'p', '1', 'character') +##--------2. output report and outputs -------------- +spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character') +spec_list$OUTPUT_DIR = c('output_dir', 'd', '1', 'character') +spec_list$SINK_OUTPUT = c('sink_output', 's', '1', 'character') +##--------3. Rmd templates in the tool directory ---------- +spec_list$FASTQ_DUMP_SE_RMD = c('fastq_dump_pe_rmd', 't', '1', 'character') + +spec = t(as.data.frame(spec_list)) +opt = getopt(spec) + +#------ Load libraries --------- +library(rmarkdown) +library(htmltools) +library(dplyr) + +#----- 1. create the report directory ------------------------ +system(paste0('mkdir -p ', opt$output_dir)) + +#----- 2. generate Rmd files with Rmd templates -------------- +# a. templates without placeholder variables: +# copy templates from tool directory to the working directory. +# b. templates with placeholder variables: +# substitute variables with user input values and place them in the working directory. + +#----- 01 fastq_dump_pe.Rmd ----------------------- +readLines(opt$fastq_dump_pe_rmd) %>% + (function(x) { + gsub('SRA_ACCESSION', opt$sra_accession, x) + }) %>% + (function(x) { + gsub('FORMAT', opt$format, x) + }) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('SPLIT_FILES', opt$split_files, x) + }) %>% + (function(x) { + gsub('OUTPUT_DIR', opt$output_dir, x) + }) %>% + (function(x) { + fileConn = file('fastq_dump_pe.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + +#------ 3. render all Rmd files -------- +render('fastq_dump_pe.Rmd', output_file = opt$report_html) + + +#-------4. manipulate outputs ----------------------------- + + + + + +sink() +#/////////// END OF SINK OUTPUT /////////////////////////// \ No newline at end of file
--- a/fastq_dump_se.Rmd Wed Sep 27 16:17:40 2017 -0400 +++ b/fastq_dump_se.Rmd Wed Sep 27 16:43:52 2017 -0400 @@ -36,7 +36,7 @@ command = paste0('fastq-dump ', '-O read_files_directory ', id) } print(command) - #system(command = command) + system(command = command) } ```
--- a/fastq_dump_se.xml Wed Sep 27 16:17:40 2017 -0400 +++ b/fastq_dump_se.xml Wed Sep 27 16:43:52 2017 -0400 @@ -12,6 +12,7 @@ collection. </description> <stdio> + <!--All stderr are redirected to a file. "XXX" is used to match with nothing--> <regex match="XXX" source="stderr" level="warning" @@ -38,8 +39,8 @@ <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> </inputs> <outputs> - <data format="html" name="report" label="Fastq-dump (single end reads)" /> - <collection type="list" name="list_collection"> + <data format="html" name="report" label="Fastq-dump report" /> + <collection type="list" name="list_collection" label="Fastq-dump (single end reads)"> <discover_datasets pattern="__name_and_ext__" directory="read_files_directory" /> </collection> <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" />