# HG changeset patch
# User mingchen0919
# Date 1507735384 14400
# Node ID c707a417883252fa025aec9fe8e78b6c7dc67586
# Parent 979136fc065016977b8b5a76d4ad7137266f6a74
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit ea6df97960569d247d64be1549aa90768c9786f4-dirty
diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_pe.Rmd
--- a/bdss_client_sra_pe.Rmd Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_pe.Rmd Wed Oct 11 11:23:04 2017 -0400
@@ -30,20 +30,50 @@
sra_accessions = sra_accessions[sra_accessions != '']
# loop through SRA accessions to download and extract reads.
for(id in sra_accessions) {
- # build URL from SRA accession
- url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
- substr(id, 1, 6), '/', id, '/', id, '.sra')
- # download sra file with bdss
- bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
- system(bdss_command, intern = TRUE)
- # convert .sra to .fastq/.fasta
- if('FORMAT' == 'fasta') {
- command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id)
- } else {
- command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id)
+ if (SRA_BOOLEAN) {
+ # build URL from SRA accession
+ url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
+ substr(id, 1, 6), '/', id, '/', id, '.sra')
+ # download sra file with bdss
+ bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+ system(bdss_command, intern = TRUE)
+ # convert .sra to .fastq/.fasta
+ if('FORMAT' == 'fasta') {
+ command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id)
+ } else {
+ command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id)
+ }
+ # command line stdout
+ system(command = command, intern = TRUE)
+ } else {
+ # if SRA_BOOLEAN is FALSE, download fastq directly
+ # build URL for downloading fastq
+ url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/',
+ substr(id, 1, 6), '/', id)
+ # list all fastq files
+ all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]]
+ if (length(all_fastq_files) == 1) {
+ # this is a single end SRA file. build url to download fastq
+ print(paste0('only one fastq file found for this SRA accession: ', id))
+ print('Downloading skipped for ', id)
+ # url = paste0(url_base, '/', all_fastq_files)
+ # bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+ # run_bdss = system(bdss_command, intern = TRUE)
+ # print(run_bdss)
+ } else {
+ print(paste0('Two fastq files found for this SRA accession: ', id))
+ print('Downloading skipped for ', id)
+ url_1 = paste0(url_base, '/', all_fastq_files[1])
+ bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1)
+ run_bdss_1 = system(bdss_command, intern = TRUE)
+ print(run_bdss_1)
+ url_2 = paste0(url_base, '/', all_fastq_files[1])
+ bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2)
+ run_bdss_2 = system(bdss_command, intern = TRUE)
+ print(run_bdss_2)
+ }
}
- # command line stdout
- system(command = command, intern = TRUE)
+
}
# remove all .sra file
system('rm *.sra', intern = TRUE)
diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_pe.xml
--- a/bdss_client_sra_pe.xml Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_pe.xml Wed Oct 11 11:23:04 2017 -0400
@@ -6,9 +6,11 @@
r-htmltools
r-dplyr
parallel-fastq-dump
+ r-rcurl
- Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired datasets
+ Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired
+ datasets
collection.
@@ -24,6 +26,7 @@
-i '$sra_accession'
-e $echo
-f $format
+ -S $sra
-r $report
-d $report.files_path
@@ -37,14 +40,18 @@
help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/>
+
-
+
-
+
-
+
\ No newline at end of file
diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_pe_render.R
--- a/bdss_client_sra_pe_render.R Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_pe_render.R Wed Oct 11 11:23:04 2017 -0400
@@ -37,6 +37,7 @@
spec_list=list()
spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character')
spec_list$FORMAT = c('format', 'f', '1', 'character')
+spec_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character')
spec_list$ECHO = c('echo', 'e', '1', 'character')
##--------2. output report and outputs --------------
spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
@@ -71,6 +72,9 @@
gsub('FORMAT', opt$format, x)
}) %>%
(function(x) {
+ gsub('SRA_BOOLEAN', opt$sra_boolean, x)
+ }) %>%
+ (function(x) {
gsub('ECHO', opt$echo, x)
}) %>%
(function(x) {
diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_se.Rmd
--- a/bdss_client_sra_se.Rmd Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_se.Rmd Wed Oct 11 11:23:04 2017 -0400
@@ -39,18 +39,45 @@
sra_accessions = sra_accessions[sra_accessions != '']
# loop through SRA accessions to download and extract reads.
for(id in sra_accessions) {
- # build URL from SRA accession
- url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
- substr(id, 1, 6), '/', id, '/', id, '.sra')
- # download sra file with bdss
- bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
- system(bdss_command, intern = TRUE)
- # convert .sra to .fastq/.fasta
- if('FORMAT' == 'fasta') {
- command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id)
+ ## if SRA_BOOLEAN is TRUE, download .sra file and then convert it to fastq/fasta
+ if (SRA_BOOLEAN) {
+ # build URL from SRA accession
+ url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
+ substr(id, 1, 6), '/', id, '/', id, '.sra')
+ # download sra file with bdss
+ bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+ system(bdss_command, intern = TRUE)
+ # convert .sra to .fastq/.fasta
+ if('FORMAT' == 'fasta') {
+ command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id)
+ } else {
+ command = paste0('fastq-dump ', '-O read_files_directory ', id)
+ }
} else {
- command = paste0('fastq-dump ', '-O read_files_directory ', id)
+ # if SRA_BOOLEAN is FALSE, download fastq directly
+ # build URL for downloading fastq
+ url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/',
+ substr(id, 1, 6), '/', id)
+ # list all fastq files
+ all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]]
+ if (length(all_fastq_files) == 1) {
+ # this is a single end SRA file. build url to download fastq
+ url = paste0(url_base, '/', all_fastq_files)
+ bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+ run_bdss = system(bdss_command, intern = TRUE)
+ print(run_bdss)
+ } else {
+ print(paste0('Two fastq files found for this SRA accession: ', id))
+ print('Downloading skipped for ', id)
+ # url_1 = paste0(url_base, '/', all_fastq_files[1])
+ # bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1)
+ # run_bdss_1 = system(bdss_command, intern = TRUE)
+ # url_2 = paste0(url_base, '/', all_fastq_files[1])
+ # bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2)
+ # run_bdss_2 = system(bdss_command, intern = TRUE)
+ }
}
+
}
# remove all .sra file
diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_se.xml
--- a/bdss_client_sra_se.xml Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_se.xml Wed Oct 11 11:23:04 2017 -0400
@@ -6,6 +6,7 @@
r-htmltools
r-dplyr
parallel-fastq-dump
+ r-rcurl
Download and extract single end reads in fastq or fasta format from NCBI SRA. The output is a list of datasets
@@ -16,7 +17,7 @@
+ description="Check the warnings_and_errors.txt file for more details."/>
-
-
+
+
+
-
+
-
+
-
+
\ No newline at end of file
diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_se_render.R
--- a/bdss_client_sra_se_render.R Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_se_render.R Wed Oct 11 11:23:04 2017 -0400
@@ -37,6 +37,7 @@
spec_list=list()
spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character')
spec_list$FORMAT = c('format', 'f', '1', 'character')
+spec_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character')
spec_list$ECHO = c('echo', 'e', '1', 'character')
##--------2. output report and outputs --------------
spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
@@ -71,6 +72,9 @@
gsub('FORMAT', opt$format, x)
}) %>%
(function(x) {
+ gsub('SRA_BOOLEAN', opt$sra_boolean, x)
+ }) %>%
+ (function(x) {
gsub('ECHO', opt$echo, x)
}) %>%
(function(x) {