Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
changeset 21:a709a705ce09 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 813dcaa22f297814dd6d6a8c4c5ff01664942aa6-dirty
author | mingchen0919 |
---|---|
date | Sat, 14 Oct 2017 19:54:57 -0400 |
parents | 531c00a2acbf |
children | 89cc5b026494 |
files | bdss_client.Rmd bdss_client.xml bdss_client_sra.Rmd bdss_client_sra.xml bdss_client_sra_pe.Rmd bdss_client_sra_pe.xml bdss_client_sra_pe_render.R bdss_client_sra_render.R bdss_client_sra_se.Rmd bdss_client_sra_se.xml bdss_client_sra_se_render.R |
diffstat | 11 files changed, 243 insertions(+), 488 deletions(-) [+] |
line wrap: on
line diff
--- a/bdss_client.Rmd Sat Oct 14 17:15:39 2017 -0400 +++ b/bdss_client.Rmd Sat Oct 14 19:54:57 2017 -0400 @@ -10,7 +10,8 @@ ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( - echo = ECHO + echo = ECHO, + error=TRUE ) ``` @@ -39,9 +40,13 @@ urls = urls[urls != ''] # loop through SRA accessions to download and extract reads. for(url in urls) { + print(url) bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer --destination read_files_directory -u ', url) + print(bdss_command) print(system(bdss_command, intern = TRUE)) } +# all files that need to be saved should be moved to REPORT_DIR directory +# print(system('mv read_files_directory REPORT_DIR', intern = TRUE)) ```
--- a/bdss_client.xml Sat Oct 14 17:15:39 2017 -0400 +++ b/bdss_client.xml Sat Oct 14 19:54:57 2017 -0400 @@ -1,4 +1,4 @@ -<tool id="rmarkdown_bdss_client" name="BDSS client" version="1.0.1"> +<tool id="rmarkdown_bdss_client" name="BDSS client" version="1.0.2"> <requirements> <requirement type="package" version="1.15.0.6-0">pandoc</requirement> <requirement type="package" version="3.3.2">r-base</requirement> @@ -38,9 +38,9 @@ label="Display analysis code in report?"/> </inputs> <outputs> - <data format="html" name="report" label="BDSS report"/> - <data name="output"> - <discover_datasets pattern="__name_and_ext__" directory="read_files_directory"/> + <data format="html" name="report" label="BDSS client report"/> + <data name="output" label="BDSS downloaded data"> + <discover_datasets pattern="__name_and_ext__" directory="read_files_directory" visible="true"/> </data> <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> </outputs>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bdss_client_sra.Rmd Sat Oct 14 19:54:57 2017 -0400 @@ -0,0 +1,86 @@ +--- +title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions' +output: + html_document: + number_sections: true + toc: true + theme: cosmo + highlight: tango +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set( + echo = ECHO, + error=TRUE +) +``` + +# Command line arguments + +```{r 'command line arguments'} +str(opt) +``` + +# BDSS configuration file + +First, we create a bdss configuration file `bdss.cfg` in the current directory. + +```{r} +system('echo "[metadata_repository]" > bdss.cfg') +system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') +``` + +# Download and extract reads + +```{r 'download and extract reads'} +# create two directories, one for single end and the other for paired end SRA reads. +dir.create('se_read_files_directory') +dir.create('pe_read_files_directory') +# download and extract reads (single end) +sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] +sra_ids_se = sra_ids[sra_ids != ''] +# loop through SRA accessions to download and extract reads. +for(id in sra_ids) { + # build URL from SRA id + url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', + substr(id, 1, 3), '/', + substr(id, 1, 6), '/', id, '/', id, '.sra') + # download sra file with bdss + bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + system(bdss_command, intern = TRUE) + # convert .sra to .fastq/.fasta + if('FORMAT' == 'fasta') { + command = paste0('fastq-dump --fasta -O read_files_directory ', id) + } else { + command = paste0('fastq-dump -O read_files_directory ', id) + } +} + +# download and extract reads (paired end) +sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] +sra_ids_pe = sra_ids[sra_ids != ''] +# loop through SRA accessions to download and extract reads. +for(id in sra_ids) { + # build URL from SRA id + url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', + substr(id, 1, 3), '/', + substr(id, 1, 6), '/', id, '/', id, '.sra') + # download sra file with bdss + bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + system(bdss_command, intern = TRUE) + # convert .sra to .fastq/.fasta + if('FORMAT' == 'fasta') { + command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id) + } else { + command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id) + } +} + +# rename files for paired end reads +old_files = paste0('./read_files_directory/', list.files('./read_files_directory')) +new_files = gsub('_1', '_forward', old_files) +new_files = gsub('_2', '_reverse', new_files) +file.rename(old_files, new_files) +``` + +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bdss_client_sra.xml Sat Oct 14 19:54:57 2017 -0400 @@ -0,0 +1,58 @@ +<tool id="rmarkdown_bdss_client_sra" name="BDSS client SRA" version="1.0.2"> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="3.3.2">r-base</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.2">r-rmarkdown</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="0.5.0">r-dplyr</requirement> + <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement> + <requirement type="package" version="1.95_4.8">r-rcurl</requirement> + </requirements> + <description> + Download data with BDSS client and generate list (single end SRA data) and list:paired dataset collection (paired end SRA data). + </description> + <stdio> + <!--All stderr are redirected to a file. "XXX" is used to match with nothing--> + <regex match="XXX" + source="stderr" + level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command> + <![CDATA[ + Rscript '${__tool_directory__}/bdss_client_sra_render.R' + -i '$sra_ids_se' + -p '$sra_ids_pe' + -e $echo + + -r $report + -d $report.files_path + -s $sink_message + + -t '${__tool_directory__}/bdss_client_sra.Rmd' + ]]> + </command> + <inputs> + <param type="text" name="sra_ids_se" area="true" size="5x25" label="SRR/DRR/ERR accessions of single end SRA" optional="false" + help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/> + <param type="text" name="sra_ids_pe" area="true" size="5x25" label="SRR/DRR/ERR accessions of paired end SRA" optional="false" + help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. ERR2105526"/> + <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true" + label="output files in fastq (Yes) or fasta (No)?"/> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> + </inputs> + <outputs> + <data format="html" name="report" label="BDSS client report"/> + <!--list dataset collection for single end SRA data--> + <collection type="list" name="list_collection" label="Fastq-dump (single end reads)"> + <discover_datasets pattern="__name_and_ext__" directory="read_files_directory" /> + </collection> + <!--list:paired dataset collection for paired end SRA data--> + <collection type="list:paired" name="list_collection" label="Fastq-dump (paired end reads)"> + <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.(?P<ext>[^\._]+)?" directory="read_files_directory"/> + </collection> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> + </outputs> +</tool> \ No newline at end of file
--- a/bdss_client_sra_pe.Rmd Sat Oct 14 17:15:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,92 +0,0 @@ ---- -title: 'Download and extract paired end fastq/fasta data with BDSS client from SRA accessions' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = ECHO -) -``` - -# Command line arguments - -```{r 'command line arguments'} -str(opt) -``` - -# Download and extract reads - -```{r 'download and extract reads'} -# create a directory to store read files -dir.create('read_files_directory') -# download and extract reads -sra_accessions = strsplit(gsub(',', ' ', 'SRA_ACCESSION'), ' ')[[1]] -sra_accessions = sra_accessions[sra_accessions != ''] -# loop through SRA accessions to download and extract reads. -for(id in sra_accessions) { - if (SRA_BOOLEAN) { - - # build URL from SRA accession - url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', - substr(id, 1, 3), '/', - substr(id, 1, 6), '/', id, '/', id, '.sra') - # download sra file with bdss - bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) - system(bdss_command, intern = TRUE) - # convert .sra to .fastq/.fasta - if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id) - } else { - command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id) - } - # command line stdout - system(command = command, intern = TRUE) - } else { - # if SRA_BOOLEAN is FALSE, download fastq directly - # build URL for downloading fastq - url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/', - substr(id, 1, 6), '/', id) - # list all fastq files - all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]] - if (length(all_fastq_files) == 1) { - # this is a single end SRA file. build url to download fastq - print(paste0('only one fastq file found for this SRA accession: ', id)) - print(paste0('Downloading skipped for ', id)) - # url = paste0(url_base, '/', all_fastq_files) - # bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) - # run_bdss = system(bdss_command, intern = TRUE) - # print(run_bdss) - } else { - print(paste0('Two fastq files found for this SRA accession: ', id)) - url_1 = paste0(url_base, '/', all_fastq_files[1]) - bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1) - run_bdss_1 = system(bdss_command_1, intern = TRUE) - print(run_bdss_1) - url_2 = paste0(url_base, '/', all_fastq_files[1]) - bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2) - run_bdss_2 = system(bdss_command_2, intern = TRUE) - print(run_bdss_2) - } - } - -} -# remove all .sra file -system('rm *.sra', intern = TRUE) -``` - - -# Rename files - -```{r} -old_files = paste0('./read_files_directory/', list.files('./read_files_directory')) -new_files = gsub('_1', '_forward', old_files) -new_files = gsub('_2', '_reverse', new_files) -file.rename(old_files, new_files) -``` -
--- a/bdss_client_sra_pe.xml Sat Oct 14 17:15:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,58 +0,0 @@ -<tool id="rmarkdown_bdss_client_sra_pe" name="BDSS client SRA PE" version="1.0.1"> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="3.3.2">r-base</requirement> - <requirement type="package" version="1.20.0">r-getopt</requirement> - <requirement type="package" version="1.2">r-rmarkdown</requirement> - <requirement type="package" version="0.3.5">r-htmltools</requirement> - <requirement type="package" version="0.5.0">r-dplyr</requirement> - <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement> - <requirement type="package" version="1.95_4.8">r-rcurl</requirement> - </requirements> - <description> - Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired - datasets - collection. - </description> - <stdio> - <!--All stderr are redirected to a file. "XXX" is used to match with nothing--> - <regex match="XXX" - source="stderr" - level="warning" - description="Check the warnings_and_errors.txt file for more details."/> - </stdio> - <command> - <![CDATA[ - Rscript '${__tool_directory__}/bdss_client_sra_pe_render.R' - -i '$sra_accession' - -e $echo - -f $format - -S $sra - - -r $report - -d $report.files_path - -s $sink_message - - -t '${__tool_directory__}/bdss_client_sra_pe.Rmd' - ]]> - </command> - <inputs> - <param type="text" name="sra_accession" label="SRR/DRR/ERR accessions" optional="false" - help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/> - <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true" - label="output files in fastq (Yes) or fasta (No)?"/> - <param type="boolean" name="sra" truevalue="TRUE" falsevalue="FALSE" checked="true" - label="Download SRA or Fastq"/> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" - label="Display analysis code in report?"/> - </inputs> - <outputs> - <data format="html" name="report" label="BDSS report"/> - <collection type="list:paired" name="list_collection" label="BDSS fastq/fasta (paired end reads)"> - <discover_datasets - pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.(?P<ext>[^\._]+)?" - directory="read_files_directory"/> - </collection> - <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> - </outputs> -</tool> \ No newline at end of file
--- a/bdss_client_sra_pe_render.R Sat Oct 14 17:15:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -library(getopt) -library(rmarkdown) -library(htmltools) -library(dplyr) -library(RCurl) - - -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file('warnings_and_errors.txt') -sink(zz) -sink(zz, type = 'message') -##---------below is the code for rendering .Rmd templates----- - - ##=============STEP 1: handle command line arguments========== - ## - ##============================================================ - # column 1: the long flag name - # column 2: the short flag alias. A SINGLE character string - # column 3: argument mask - # 0: no argument - # 1: argument required - # 2: argument is optional - # column 4: date type to which the flag's argument shall be cast. - # possible values: logical, integer, double, complex, character. - #------------------------------------------------------------- - #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ - # 1. short flag alias should match the flag in the command section in the XML file. - # 2. long flag name can be any legal R variable names - # 3. two names in args_list can have common string but one name should not be a part of another name. - # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. - #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - ##------- 1. input data --------------------- - args_list=list() - args_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character') - args_list$FORMAT = c('format', 'f', '1', 'character') - args_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character') - args_list$ECHO = c('echo', 'e', '1', 'character') - ##--------2. output report and outputs -------------- - args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') - args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') - args_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character') - ##--------3. Rmd templates in the tool directory ---------- - args_list$bdss_client_sra_se_RMD = c('bdss_client_sra_se_rmd', 't', '1', 'character') - - opt = getopt(t(as.data.frame(args_list))) - - - ##=======STEP 2: create report directory (optional)========== - ## - ##=========================================================== - dir.create(opt$report_dir) - - ##=STEP 3: replace placeholders in .Rmd with argument values= - ## - ##=========================================================== - #++ need to replace placeholders with args values one by one+ - #----- 01 bdss_client_sra_se.Rmd ----------------------- - readLines(opt$bdss_client_sra_se_rmd) %>% - (function(x) { - gsub('SRA_ACCESSION', opt$sra_accession, x) - }) %>% - (function(x) { - gsub('FORMAT', opt$format, x) - }) %>% - (function(x) { - gsub('SRA_BOOLEAN', opt$sra_boolean, x) - }) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('REPORT_DIR', opt$report_dir, x) - }) %>% - (function(x) { - fileConn = file('bdss_client_sra_se.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - ##=============STEP 4: render .Rmd templates================= - ## - ##=========================================================== - render('bdss_client_sra_se.Rmd', output_file = opt$report_html) - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/bdss_client_sra_render.R Sat Oct 14 19:54:57 2017 -0400 @@ -0,0 +1,89 @@ +library(getopt) +library(rmarkdown) +library(htmltools) +library(dplyr) +library(RCurl) + + +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file('warnings_and_errors.txt') +sink(zz) +sink(zz, type = 'message') +##---------below is the code for rendering .Rmd templates----- + +##=============STEP 1: handle command line arguments========== +## +##============================================================ +# column 1: the long flag name +# column 2: the short flag alias. A SINGLE character string +# column 3: argument mask +# 0: no argument +# 1: argument required +# 2: argument is optional +# column 4: date type to which the flag's argument shall be cast. +# possible values: logical, integer, double, complex, character. +#------------------------------------------------------------- +#++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ +# 1. short flag alias should match the flag in the command section in the XML file. +# 2. long flag name can be any legal R variable names +# 3. two names in args_list can have common string but one name should not be a part of another name. +# for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. +#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +##------- 1. input data --------------------- +args_list=list() +args_list$SRA_IDS_SE = c('sra_ids_se', 'i', '1', 'character') +args_list$SRA_IDS_PE = c('sra_ids_pe', 'i', '1', 'character') +args_list$ECHO = c('echo', 'e', '1', 'character') +##--------2. output report and outputs -------------- +args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') +args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') +args_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character') +##--------3. Rmd templates in the tool directory ---------- +args_list$BDSS_CLIENT_RMD = c('bdss_client_rmd', 't', '1', 'character') + +opt = getopt(t(as.data.frame(args_list))) + + +##=======STEP 2: create report directory (optional)========== +## +##=========================================================== +dir.create(opt$report_dir) + +##=STEP 3: replace placeholders in .Rmd with argument values= +## +##=========================================================== +#++ need to replace placeholders with args values one by one+ +#----- 01 bdss_client.Rmd ----------------------- +readLines(opt$bdss_client_rmd) %>% + (function(x) { + gsub('SRA_IDS_SE', opt$sra_ids_se, x) + }) %>% + (function(x) { + gsub('SRA_IDS_PE', opt$sra_ids_pe, x) + }) %>% + (function(x) { + gsub('FORMAT', opt$format, x) + }) %>% + (function(x) { + gsub('ECHO', opt$echo, x) + }) %>% + (function(x) { + gsub('REPORT_DIR', opt$report_dir, x) + }) %>% + (function(x) { + fileConn = file('bdss_client.Rmd') + writeLines(x, con=fileConn) + close(fileConn) + }) + +##=============STEP 4: render .Rmd templates================= +## +##=========================================================== +render('bdss_client.Rmd', output_file = opt$report_html) + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= \ No newline at end of file
--- a/bdss_client_sra_se.Rmd Sat Oct 14 17:15:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,98 +0,0 @@ ---- -title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = ECHO -) -``` - -# Command line arguments - -```{r 'command line arguments'} -str(opt) -``` - -# BDSS configuration file - -First, we create a bdss configuration file `bdss.cfg` in the current directory. - -```{r} -system('echo "[metadata_repository]" > bdss.cfg') -system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') -``` - -# Download and extract reads - -```{r 'download and extract reads'} -# create a directory to store read files -dir.create('read_files_directory') -# download and extract reads -sra_accessions = strsplit(gsub(',', ' ', 'SRA_ACCESSION'), ' ')[[1]] -sra_accessions = sra_accessions[sra_accessions != ''] -# loop through SRA accessions to download and extract reads. -for(id in sra_accessions) { - ## if SRA_BOOLEAN is TRUE, download .sra file and then convert it to fastq/fasta - if (SRA_BOOLEAN) { - # build URL from SRA accession - url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', - substr(id, 1, 3), '/', - substr(id, 1, 6), '/', id, '/', id, '.sra') - # download sra file with bdss - bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) - system(bdss_command, intern = TRUE) - # convert .sra to .fastq/.fasta - if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id) - } else { - command = paste0('fastq-dump ', '-O read_files_directory ', id) - } - } else { - # if SRA_BOOLEAN is FALSE, download fastq directly - # build URL for downloading fastq - url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/', - substr(id, 1, 6), '/', id) - # list all fastq files - all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]] - if (length(all_fastq_files) == 1) { - # this is a single end SRA file. build url to download fastq - url = paste0(url_base, '/', all_fastq_files) - bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) - run_bdss = system(bdss_command, intern = TRUE) - print(run_bdss) - } else { - print(paste0('Two fastq files found for this SRA accession: ', id)) - print('Downloading skipped for ', id) - # url_1 = paste0(url_base, '/', all_fastq_files[1]) - # bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1) - # run_bdss_1 = system(bdss_command, intern = TRUE) - # url_2 = paste0(url_base, '/', all_fastq_files[1]) - # bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2) - # run_bdss_2 = system(bdss_command, intern = TRUE) - } - } - -} - -# remove all .sra file -system('rm *.sra', intern = TRUE) -``` - -* `fastq-dump` command -```{r} -print(command) -``` - -* `command line stdout` - -```{r} -system(command = command, intern = TRUE) -``` -
--- a/bdss_client_sra_se.xml Sat Oct 14 17:15:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,55 +0,0 @@ -<tool id="rmarkdown_bdss_client_sra_se" name="BDSS client SRA SE" version="1.0.1"> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="3.3.2">r-base</requirement> - <requirement type="package" version="1.20.0">r-getopt</requirement> - <requirement type="package" version="1.2">r-rmarkdown</requirement> - <requirement type="package" version="0.3.5">r-htmltools</requirement> - <requirement type="package" version="0.5.0">r-dplyr</requirement> - <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement> - <requirement type="package" version="1.95_4.8">r-rcurl</requirement> - </requirements> - <description> - Download and extract single end reads in fastq or fasta format from NCBI SRA. The output is a list of datasets - collection. - </description> - <stdio> - <!--All stderr are redirected to a file. "XXX" is used to match with nothing--> - <regex match="XXX" - source="stderr" - level="warning" - description="Check the warnings_and_errors.txt file for more details."/> - </stdio> - <command> - <![CDATA[ - Rscript '${__tool_directory__}/bdss_client_sra_se_render.R' - -i '$sra_accession' - -e $echo - -f $format - -S $sra - - -r $report - -d $report.files_path - -s $sink_message - - -t '${__tool_directory__}/bdss_client_sra_se.Rmd' - ]]> - </command> - <inputs> - <param type="text" name="sra_accession" label="SRR/DRR/ERR accessions" - help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/> - <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true" - label="output files in fastq (Yes) or fasta (No)?"/> - <param type="boolean" name="sra" truevalue="TRUE" falsevalue="FALSE" checked="true" - label="Download SRA or Fastq"/> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" - label="Display analysis code in report?"/> - </inputs> - <outputs> - <data format="html" name="report" label="BDSS report"/> - <collection type="list" name="list_collection" label="BDSS fastq/fasta (single end reads)"> - <discover_datasets pattern="__name_and_ext__" directory="read_files_directory"/> - </collection> - <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> - </outputs> -</tool> \ No newline at end of file
--- a/bdss_client_sra_se_render.R Sat Oct 14 17:15:39 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -library(getopt) -library(rmarkdown) -library(htmltools) -library(dplyr) -library(RCurl) - - -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file('warnings_and_errors.txt') -sink(zz) -sink(zz, type = 'message') -##---------below is the code for rendering .Rmd templates----- - - ##=============STEP 1: handle command line arguments========== - ## - ##============================================================ - # column 1: the long flag name - # column 2: the short flag alias. A SINGLE character string - # column 3: argument mask - # 0: no argument - # 1: argument required - # 2: argument is optional - # column 4: date type to which the flag's argument shall be cast. - # possible values: logical, integer, double, complex, character. - #------------------------------------------------------------- - #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ - # 1. short flag alias should match the flag in the command section in the XML file. - # 2. long flag name can be any legal R variable names - # 3. two names in args_list can have common string but one name should not be a part of another name. - # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. - #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - ##------- 1. input data --------------------- - args_list=list() - args_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character') - args_list$FORMAT = c('format', 'f', '1', 'character') - args_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character') - args_list$ECHO = c('echo', 'e', '1', 'character') - ##--------2. output report and outputs -------------- - args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') - args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') - args_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character') - ##--------3. Rmd templates in the tool directory ---------- - args_list$bdss_client_sra_se_RMD = c('bdss_client_sra_se_rmd', 't', '1', 'character') - - opt = getopt(t(as.data.frame(args_list))) - - - ##=======STEP 2: create report directory (optional)========== - ## - ##=========================================================== - dir.create(opt$report_dir) - - ##=STEP 3: replace placeholders in .Rmd with argument values= - ## - ##=========================================================== - #++ need to replace placeholders with args values one by one+ - #----- 01 bdss_client_sra_se.Rmd ----------------------- - readLines(opt$bdss_client_sra_se_rmd) %>% - (function(x) { - gsub('SRA_ACCESSION', opt$sra_accession, x) - }) %>% - (function(x) { - gsub('FORMAT', opt$format, x) - }) %>% - (function(x) { - gsub('SRA_BOOLEAN', opt$sra_boolean, x) - }) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('REPORT_DIR', opt$report_dir, x) - }) %>% - (function(x) { - fileConn = file('bdss_client_sra_se.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - ##=============STEP 4: render .Rmd templates================= - ## - ##=========================================================== - render('bdss_client_sra_se.Rmd', output_file = opt$report_html) - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file