Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
changeset 3:c707a4178832 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit ea6df97960569d247d64be1549aa90768c9786f4-dirty
author | mingchen0919 |
---|---|
date | Wed, 11 Oct 2017 11:23:04 -0400 |
parents | 979136fc0650 |
children | 206c59e498e7 |
files | bdss_client_sra_pe.Rmd bdss_client_sra_pe.xml bdss_client_sra_pe_render.R bdss_client_sra_se.Rmd bdss_client_sra_se.xml bdss_client_sra_se_render.R |
diffstat | 6 files changed, 111 insertions(+), 33 deletions(-) [+] |
line wrap: on
line diff
--- a/bdss_client_sra_pe.Rmd Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_pe.Rmd Wed Oct 11 11:23:04 2017 -0400 @@ -30,20 +30,50 @@ sra_accessions = sra_accessions[sra_accessions != ''] # loop through SRA accessions to download and extract reads. for(id in sra_accessions) { - # build URL from SRA accession - url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/', - substr(id, 1, 6), '/', id, '/', id, '.sra') - # download sra file with bdss - bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) - system(bdss_command, intern = TRUE) - # convert .sra to .fastq/.fasta - if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id) - } else { - command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id) + if (SRA_BOOLEAN) { + # build URL from SRA accession + url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/', + substr(id, 1, 6), '/', id, '/', id, '.sra') + # download sra file with bdss + bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + system(bdss_command, intern = TRUE) + # convert .sra to .fastq/.fasta + if('FORMAT' == 'fasta') { + command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id) + } else { + command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id) + } + # command line stdout + system(command = command, intern = TRUE) + } else { + # if SRA_BOOLEAN is FALSE, download fastq directly + # build URL for downloading fastq + url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/', + substr(id, 1, 6), '/', id) + # list all fastq files + all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]] + if (length(all_fastq_files) == 1) { + # this is a single end SRA file. build url to download fastq + print(paste0('only one fastq file found for this SRA accession: ', id)) + print('Downloading skipped for ', id) + # url = paste0(url_base, '/', all_fastq_files) + # bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + # run_bdss = system(bdss_command, intern = TRUE) + # print(run_bdss) + } else { + print(paste0('Two fastq files found for this SRA accession: ', id)) + print('Downloading skipped for ', id) + url_1 = paste0(url_base, '/', all_fastq_files[1]) + bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1) + run_bdss_1 = system(bdss_command, intern = TRUE) + print(run_bdss_1) + url_2 = paste0(url_base, '/', all_fastq_files[1]) + bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2) + run_bdss_2 = system(bdss_command, intern = TRUE) + print(run_bdss_2) + } } - # command line stdout - system(command = command, intern = TRUE) + } # remove all .sra file system('rm *.sra', intern = TRUE)
--- a/bdss_client_sra_pe.xml Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_pe.xml Wed Oct 11 11:23:04 2017 -0400 @@ -6,9 +6,11 @@ <requirement type="package" version="0.3.5">r-htmltools</requirement> <requirement type="package" version="0.5.0">r-dplyr</requirement> <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement> + <requirement type="package" version="1.95_4.8">r-rcurl</requirement> </requirements> <description> - Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired datasets + Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired + datasets collection. </description> <stdio> @@ -24,6 +26,7 @@ -i '$sra_accession' -e $echo -f $format + -S $sra -r $report -d $report.files_path @@ -37,14 +40,18 @@ help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/> <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true" label="output files in fastq (Yes) or fasta (No)?"/> + <param type="boolean" name="sra" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Download SRA or Fastq"/> <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?"/> </inputs> <outputs> - <data format="html" name="report" label="BDSS report" /> + <data format="html" name="report" label="BDSS report"/> <collection type="list:paired" name="list_collection" label="BDSS fastq/fasta (paired end reads)"> - <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.(?P<ext>[^\._]+)?" directory="read_files_directory"/> + <discover_datasets + pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.(?P<ext>[^\._]+)?" + directory="read_files_directory"/> </collection> - <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" /> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> </outputs> </tool> \ No newline at end of file
--- a/bdss_client_sra_pe_render.R Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_pe_render.R Wed Oct 11 11:23:04 2017 -0400 @@ -37,6 +37,7 @@ spec_list=list() spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character') spec_list$FORMAT = c('format', 'f', '1', 'character') +spec_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character') spec_list$ECHO = c('echo', 'e', '1', 'character') ##--------2. output report and outputs -------------- spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character') @@ -71,6 +72,9 @@ gsub('FORMAT', opt$format, x) }) %>% (function(x) { + gsub('SRA_BOOLEAN', opt$sra_boolean, x) + }) %>% + (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) {
--- a/bdss_client_sra_se.Rmd Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_se.Rmd Wed Oct 11 11:23:04 2017 -0400 @@ -39,18 +39,45 @@ sra_accessions = sra_accessions[sra_accessions != ''] # loop through SRA accessions to download and extract reads. for(id in sra_accessions) { - # build URL from SRA accession - url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/', - substr(id, 1, 6), '/', id, '/', id, '.sra') - # download sra file with bdss - bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) - system(bdss_command, intern = TRUE) - # convert .sra to .fastq/.fasta - if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id) + ## if SRA_BOOLEAN is TRUE, download .sra file and then convert it to fastq/fasta + if (SRA_BOOLEAN) { + # build URL from SRA accession + url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/', + substr(id, 1, 6), '/', id, '/', id, '.sra') + # download sra file with bdss + bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + system(bdss_command, intern = TRUE) + # convert .sra to .fastq/.fasta + if('FORMAT' == 'fasta') { + command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id) + } else { + command = paste0('fastq-dump ', '-O read_files_directory ', id) + } } else { - command = paste0('fastq-dump ', '-O read_files_directory ', id) + # if SRA_BOOLEAN is FALSE, download fastq directly + # build URL for downloading fastq + url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/', + substr(id, 1, 6), '/', id) + # list all fastq files + all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]] + if (length(all_fastq_files) == 1) { + # this is a single end SRA file. build url to download fastq + url = paste0(url_base, '/', all_fastq_files) + bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + run_bdss = system(bdss_command, intern = TRUE) + print(run_bdss) + } else { + print(paste0('Two fastq files found for this SRA accession: ', id)) + print('Downloading skipped for ', id) + # url_1 = paste0(url_base, '/', all_fastq_files[1]) + # bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1) + # run_bdss_1 = system(bdss_command, intern = TRUE) + # url_2 = paste0(url_base, '/', all_fastq_files[1]) + # bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2) + # run_bdss_2 = system(bdss_command, intern = TRUE) + } } + } # remove all .sra file
--- a/bdss_client_sra_se.xml Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_se.xml Wed Oct 11 11:23:04 2017 -0400 @@ -6,6 +6,7 @@ <requirement type="package" version="0.3.5">r-htmltools</requirement> <requirement type="package" version="0.5.0">r-dplyr</requirement> <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement> + <requirement type="package" version="1.95_4.8">r-rcurl</requirement> </requirements> <description> Download and extract single end reads in fastq or fasta format from NCBI SRA. The output is a list of datasets @@ -16,7 +17,7 @@ <regex match="XXX" source="stderr" level="warning" - description="Check the warnings_and_errors.txt file for more details." /> + description="Check the warnings_and_errors.txt file for more details."/> </stdio> <command> <![CDATA[ @@ -24,6 +25,7 @@ -i '$sra_accession' -e $echo -f $format + -S $sra -r $report -d $report.files_path @@ -35,14 +37,18 @@ <inputs> <param type="text" name="sra_accession" label="SRR/DRR/ERR accessions" help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/> - <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true" label="output files in fastq (Yes) or fasta (No)?"/> - <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" /> + <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true" + label="output files in fastq (Yes) or fasta (No)?"/> + <param type="boolean" name="sra" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Download SRA or Fastq"/> + <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" + label="Display analysis code in report?"/> </inputs> <outputs> - <data format="html" name="report" label="BDSS report" /> + <data format="html" name="report" label="BDSS report"/> <collection type="list" name="list_collection" label="BDSS fastq/fasta (single end reads)"> - <discover_datasets pattern="__name_and_ext__" directory="read_files_directory" /> + <discover_datasets pattern="__name_and_ext__" directory="read_files_directory"/> </collection> - <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" /> + <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> </outputs> </tool> \ No newline at end of file
--- a/bdss_client_sra_se_render.R Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_se_render.R Wed Oct 11 11:23:04 2017 -0400 @@ -37,6 +37,7 @@ spec_list=list() spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character') spec_list$FORMAT = c('format', 'f', '1', 'character') +spec_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character') spec_list$ECHO = c('echo', 'e', '1', 'character') ##--------2. output report and outputs -------------- spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character') @@ -71,6 +72,9 @@ gsub('FORMAT', opt$format, x) }) %>% (function(x) { + gsub('SRA_BOOLEAN', opt$sra_boolean, x) + }) %>% + (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) {