# HG changeset patch # User mingchen0919 # Date 1507735384 14400 # Node ID c707a417883252fa025aec9fe8e78b6c7dc67586 # Parent 979136fc065016977b8b5a76d4ad7137266f6a74 planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit ea6df97960569d247d64be1549aa90768c9786f4-dirty diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_pe.Rmd --- a/bdss_client_sra_pe.Rmd Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_pe.Rmd Wed Oct 11 11:23:04 2017 -0400 @@ -30,20 +30,50 @@ sra_accessions = sra_accessions[sra_accessions != ''] # loop through SRA accessions to download and extract reads. for(id in sra_accessions) { - # build URL from SRA accession - url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/', - substr(id, 1, 6), '/', id, '/', id, '.sra') - # download sra file with bdss - bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) - system(bdss_command, intern = TRUE) - # convert .sra to .fastq/.fasta - if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id) - } else { - command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id) + if (SRA_BOOLEAN) { + # build URL from SRA accession + url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/', + substr(id, 1, 6), '/', id, '/', id, '.sra') + # download sra file with bdss + bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + system(bdss_command, intern = TRUE) + # convert .sra to .fastq/.fasta + if('FORMAT' == 'fasta') { + command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id) + } else { + command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id) + } + # command line stdout + system(command = command, intern = TRUE) + } else { + # if SRA_BOOLEAN is FALSE, download fastq directly + # build URL for downloading fastq + url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/', + substr(id, 1, 6), '/', id) + # list all fastq files + all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]] + if (length(all_fastq_files) == 1) { + # this is a single end SRA file. build url to download fastq + print(paste0('only one fastq file found for this SRA accession: ', id)) + print('Downloading skipped for ', id) + # url = paste0(url_base, '/', all_fastq_files) + # bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + # run_bdss = system(bdss_command, intern = TRUE) + # print(run_bdss) + } else { + print(paste0('Two fastq files found for this SRA accession: ', id)) + print('Downloading skipped for ', id) + url_1 = paste0(url_base, '/', all_fastq_files[1]) + bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1) + run_bdss_1 = system(bdss_command, intern = TRUE) + print(run_bdss_1) + url_2 = paste0(url_base, '/', all_fastq_files[1]) + bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2) + run_bdss_2 = system(bdss_command, intern = TRUE) + print(run_bdss_2) + } } - # command line stdout - system(command = command, intern = TRUE) + } # remove all .sra file system('rm *.sra', intern = TRUE) diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_pe.xml --- a/bdss_client_sra_pe.xml Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_pe.xml Wed Oct 11 11:23:04 2017 -0400 @@ -6,9 +6,11 @@ r-htmltools r-dplyr parallel-fastq-dump + r-rcurl - Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired datasets + Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired + datasets collection. @@ -24,6 +26,7 @@ -i '$sra_accession' -e $echo -f $format + -S $sra -r $report -d $report.files_path @@ -37,14 +40,18 @@ help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/> + - + - + - + \ No newline at end of file diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_pe_render.R --- a/bdss_client_sra_pe_render.R Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_pe_render.R Wed Oct 11 11:23:04 2017 -0400 @@ -37,6 +37,7 @@ spec_list=list() spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character') spec_list$FORMAT = c('format', 'f', '1', 'character') +spec_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character') spec_list$ECHO = c('echo', 'e', '1', 'character') ##--------2. output report and outputs -------------- spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character') @@ -71,6 +72,9 @@ gsub('FORMAT', opt$format, x) }) %>% (function(x) { + gsub('SRA_BOOLEAN', opt$sra_boolean, x) + }) %>% + (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) { diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_se.Rmd --- a/bdss_client_sra_se.Rmd Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_se.Rmd Wed Oct 11 11:23:04 2017 -0400 @@ -39,18 +39,45 @@ sra_accessions = sra_accessions[sra_accessions != ''] # loop through SRA accessions to download and extract reads. for(id in sra_accessions) { - # build URL from SRA accession - url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/', - substr(id, 1, 6), '/', id, '/', id, '.sra') - # download sra file with bdss - bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) - system(bdss_command, intern = TRUE) - # convert .sra to .fastq/.fasta - if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id) + ## if SRA_BOOLEAN is TRUE, download .sra file and then convert it to fastq/fasta + if (SRA_BOOLEAN) { + # build URL from SRA accession + url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/', + substr(id, 1, 6), '/', id, '/', id, '.sra') + # download sra file with bdss + bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + system(bdss_command, intern = TRUE) + # convert .sra to .fastq/.fasta + if('FORMAT' == 'fasta') { + command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id) + } else { + command = paste0('fastq-dump ', '-O read_files_directory ', id) + } } else { - command = paste0('fastq-dump ', '-O read_files_directory ', id) + # if SRA_BOOLEAN is FALSE, download fastq directly + # build URL for downloading fastq + url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/', + substr(id, 1, 6), '/', id) + # list all fastq files + all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]] + if (length(all_fastq_files) == 1) { + # this is a single end SRA file. build url to download fastq + url = paste0(url_base, '/', all_fastq_files) + bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) + run_bdss = system(bdss_command, intern = TRUE) + print(run_bdss) + } else { + print(paste0('Two fastq files found for this SRA accession: ', id)) + print('Downloading skipped for ', id) + # url_1 = paste0(url_base, '/', all_fastq_files[1]) + # bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1) + # run_bdss_1 = system(bdss_command, intern = TRUE) + # url_2 = paste0(url_base, '/', all_fastq_files[1]) + # bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2) + # run_bdss_2 = system(bdss_command, intern = TRUE) + } } + } # remove all .sra file diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_se.xml --- a/bdss_client_sra_se.xml Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_se.xml Wed Oct 11 11:23:04 2017 -0400 @@ -6,6 +6,7 @@ r-htmltools r-dplyr parallel-fastq-dump + r-rcurl Download and extract single end reads in fastq or fasta format from NCBI SRA. The output is a list of datasets @@ -16,7 +17,7 @@ + description="Check the warnings_and_errors.txt file for more details."/> - - + + + - + - + - + \ No newline at end of file diff -r 979136fc0650 -r c707a4178832 bdss_client_sra_se_render.R --- a/bdss_client_sra_se_render.R Tue Oct 10 20:22:55 2017 -0400 +++ b/bdss_client_sra_se_render.R Wed Oct 11 11:23:04 2017 -0400 @@ -37,6 +37,7 @@ spec_list=list() spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character') spec_list$FORMAT = c('format', 'f', '1', 'character') +spec_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character') spec_list$ECHO = c('echo', 'e', '1', 'character') ##--------2. output report and outputs -------------- spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character') @@ -71,6 +72,9 @@ gsub('FORMAT', opt$format, x) }) %>% (function(x) { + gsub('SRA_BOOLEAN', opt$sra_boolean, x) + }) %>% + (function(x) { gsub('ECHO', opt$echo, x) }) %>% (function(x) {