changeset 3:c707a4178832 draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit ea6df97960569d247d64be1549aa90768c9786f4-dirty
author mingchen0919
date Wed, 11 Oct 2017 11:23:04 -0400
parents 979136fc0650
children 206c59e498e7
files bdss_client_sra_pe.Rmd bdss_client_sra_pe.xml bdss_client_sra_pe_render.R bdss_client_sra_se.Rmd bdss_client_sra_se.xml bdss_client_sra_se_render.R
diffstat 6 files changed, 111 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/bdss_client_sra_pe.Rmd	Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_pe.Rmd	Wed Oct 11 11:23:04 2017 -0400
@@ -30,20 +30,50 @@
 sra_accessions = sra_accessions[sra_accessions != '']
 # loop through SRA accessions to download and extract reads.
 for(id in sra_accessions) {
-  # build URL from SRA accession
-  url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
-               substr(id, 1, 6), '/', id, '/', id, '.sra')
-  # download sra file with bdss
-  bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
-  system(bdss_command, intern = TRUE)
-  # convert .sra to .fastq/.fasta
-  if('FORMAT' == 'fasta') {
-    command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id)
-  } else {
-    command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id)
+  if (SRA_BOOLEAN) {
+    # build URL from SRA accession
+    url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
+                 substr(id, 1, 6), '/', id, '/', id, '.sra')
+    # download sra file with bdss
+    bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+    system(bdss_command, intern = TRUE)
+    # convert .sra to .fastq/.fasta
+    if('FORMAT' == 'fasta') {
+      command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id)
+    } else {
+      command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id)
+    }
+    # command line stdout
+    system(command = command, intern = TRUE)
+  } else  {
+    # if SRA_BOOLEAN is FALSE, download fastq directly
+    # build URL for downloading fastq
+    url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/',
+                      substr(id, 1, 6), '/', id)
+    # list all fastq files
+    all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]]
+    if (length(all_fastq_files) == 1) {
+      # this is a single end SRA file. build url to download fastq
+      print(paste0('only one fastq file found for this SRA accession: ', id))
+      print('Downloading skipped for ', id)
+      # url = paste0(url_base, '/', all_fastq_files)
+      # bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+      # run_bdss = system(bdss_command, intern = TRUE)
+      # print(run_bdss)
+    } else {
+      print(paste0('Two fastq files found for this SRA accession: ', id))
+      print('Downloading skipped for ', id)
+      url_1 = paste0(url_base, '/', all_fastq_files[1])
+      bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1)
+      run_bdss_1 = system(bdss_command, intern = TRUE)
+      print(run_bdss_1)
+      url_2 = paste0(url_base, '/', all_fastq_files[1])
+      bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2)
+      run_bdss_2 = system(bdss_command, intern = TRUE)
+      print(run_bdss_2)
+    }
   }
-  # command line stdout
-  system(command = command, intern = TRUE)
+
 }
 # remove all .sra file
 system('rm *.sra', intern = TRUE)
--- a/bdss_client_sra_pe.xml	Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_pe.xml	Wed Oct 11 11:23:04 2017 -0400
@@ -6,9 +6,11 @@
         <requirement type="package" version="0.3.5">r-htmltools</requirement>
         <requirement type="package" version="0.5.0">r-dplyr</requirement>
         <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement>
+        <requirement type="package" version="1.95_4.8">r-rcurl</requirement>
     </requirements>
     <description>
-        Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired datasets
+        Download and extract paired end reads in fastq or fasta format from NCBI SRA. The output is a list of paired
+        datasets
         collection.
     </description>
     <stdio>
@@ -24,6 +26,7 @@
                 -i '$sra_accession'
                 -e $echo
                 -f $format
+                -S $sra
 
                 -r $report
                 -d $report.files_path
@@ -37,14 +40,18 @@
                help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/>
         <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true"
                label="output files in fastq (Yes) or fasta (No)?"/>
+        <param type="boolean" name="sra" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Download SRA or Fastq"/>
         <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
                label="Display analysis code in report?"/>
     </inputs>
     <outputs>
-        <data format="html" name="report" label="BDSS report" />
+        <data format="html" name="report" label="BDSS report"/>
         <collection type="list:paired" name="list_collection" label="BDSS fastq/fasta (paired end reads)">
-            <discover_datasets pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)\.(?P&lt;ext&gt;[^\._]+)?" directory="read_files_directory"/>
+            <discover_datasets
+                    pattern="(?P&lt;identifier_0&gt;[^_]+)_(?P&lt;identifier_1&gt;[^_]+)\.(?P&lt;ext&gt;[^\._]+)?"
+                    directory="read_files_directory"/>
         </collection>
-        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" />
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
     </outputs>
 </tool>
\ No newline at end of file
--- a/bdss_client_sra_pe_render.R	Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_pe_render.R	Wed Oct 11 11:23:04 2017 -0400
@@ -37,6 +37,7 @@
 spec_list=list()
 spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character')
 spec_list$FORMAT = c('format', 'f', '1', 'character')
+spec_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character')
 spec_list$ECHO = c('echo', 'e', '1', 'character')
 ##--------2. output report and outputs --------------
 spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
@@ -71,6 +72,9 @@
     gsub('FORMAT', opt$format, x)
   }) %>%
   (function(x) {
+    gsub('SRA_BOOLEAN', opt$sra_boolean, x)
+  }) %>%
+  (function(x) {
     gsub('ECHO', opt$echo, x)
   }) %>%
   (function(x) {
--- a/bdss_client_sra_se.Rmd	Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_se.Rmd	Wed Oct 11 11:23:04 2017 -0400
@@ -39,18 +39,45 @@
 sra_accessions = sra_accessions[sra_accessions != '']
 # loop through SRA accessions to download and extract reads.
 for(id in sra_accessions) {
-  # build URL from SRA accession
-  url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
-               substr(id, 1, 6), '/', id, '/', id, '.sra')
-  # download sra file with bdss
-  bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
-  system(bdss_command, intern = TRUE)
-  # convert .sra to .fastq/.fasta
-  if('FORMAT' == 'fasta') {
-    command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id)
+  ## if SRA_BOOLEAN is TRUE, download .sra file and then convert it to fastq/fasta
+  if (SRA_BOOLEAN) {
+    # build URL from SRA accession
+    url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
+                 substr(id, 1, 6), '/', id, '/', id, '.sra')
+    # download sra file with bdss
+    bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+    system(bdss_command, intern = TRUE)
+    # convert .sra to .fastq/.fasta
+    if('FORMAT' == 'fasta') {
+      command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id)
+    } else {
+      command = paste0('fastq-dump ', '-O read_files_directory ', id)
+    }
   } else {
-    command = paste0('fastq-dump ', '-O read_files_directory ', id)
+    # if SRA_BOOLEAN is FALSE, download fastq directly
+    # build URL for downloading fastq
+    url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/',
+                      substr(id, 1, 6), '/', id)
+    # list all fastq files
+    all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]]
+    if (length(all_fastq_files) == 1) {
+      # this is a single end SRA file. build url to download fastq
+      url = paste0(url_base, '/', all_fastq_files)
+      bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
+      run_bdss = system(bdss_command, intern = TRUE)
+      print(run_bdss)
+    } else {
+      print(paste0('Two fastq files found for this SRA accession: ', id))
+      print('Downloading skipped for ', id)
+      # url_1 = paste0(url_base, '/', all_fastq_files[1])
+      # bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1)
+      # run_bdss_1 = system(bdss_command, intern = TRUE)
+      # url_2 = paste0(url_base, '/', all_fastq_files[1])
+      # bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2)
+      # run_bdss_2 = system(bdss_command, intern = TRUE)
+    }
   }
+  
 }
 
 # remove all .sra file
--- a/bdss_client_sra_se.xml	Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_se.xml	Wed Oct 11 11:23:04 2017 -0400
@@ -6,6 +6,7 @@
         <requirement type="package" version="0.3.5">r-htmltools</requirement>
         <requirement type="package" version="0.5.0">r-dplyr</requirement>
         <requirement type="package" version="0.5.4">parallel-fastq-dump</requirement>
+        <requirement type="package" version="1.95_4.8">r-rcurl</requirement>
     </requirements>
     <description>
         Download and extract single end reads in fastq or fasta format from NCBI SRA. The output is a list of datasets
@@ -16,7 +17,7 @@
         <regex match="XXX"
                source="stderr"
                level="warning"
-               description="Check the warnings_and_errors.txt file for more details." />
+               description="Check the warnings_and_errors.txt file for more details."/>
     </stdio>
     <command>
         <![CDATA[
@@ -24,6 +25,7 @@
                 -i '$sra_accession'
                 -e $echo
                 -f $format
+                -S $sra
 
                 -r $report
                 -d $report.files_path
@@ -35,14 +37,18 @@
     <inputs>
         <param type="text" name="sra_accession" label="SRR/DRR/ERR accessions"
                help="A list of SRR/DRR/ERR accessions separated by comma or space. e.g. SRR6077558,ERR343809"/>
-        <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true" label="output files in fastq (Yes) or fasta (No)?"/>
-        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Display analysis code in report?" />
+        <param type="boolean" name="format" truevalue="fastq" falsevalue="fasta" checked="true"
+               label="output files in fastq (Yes) or fasta (No)?"/>
+        <param type="boolean" name="sra" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Download SRA or Fastq"/>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
     </inputs>
     <outputs>
-        <data format="html" name="report" label="BDSS report" />
+        <data format="html" name="report" label="BDSS report"/>
         <collection type="list" name="list_collection" label="BDSS fastq/fasta (single end reads)">
-            <discover_datasets pattern="__name_and_ext__" directory="read_files_directory" />
+            <discover_datasets pattern="__name_and_ext__" directory="read_files_directory"/>
         </collection>
-        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt" />
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
     </outputs>
 </tool>
\ No newline at end of file
--- a/bdss_client_sra_se_render.R	Tue Oct 10 20:22:55 2017 -0400
+++ b/bdss_client_sra_se_render.R	Wed Oct 11 11:23:04 2017 -0400
@@ -37,6 +37,7 @@
 spec_list=list()
 spec_list$SRA_ACCESSION = c('sra_accession', 'i', '1', 'character')
 spec_list$FORMAT = c('format', 'f', '1', 'character')
+spec_list$SRA_BOOLEAN = c('sra_boolean', 'S', '1', 'character')
 spec_list$ECHO = c('echo', 'e', '1', 'character')
 ##--------2. output report and outputs --------------
 spec_list$REPORT_HTML = c('report_html', 'r', '1', 'character')
@@ -71,6 +72,9 @@
     gsub('FORMAT', opt$format, x)
   }) %>%
   (function(x) {
+    gsub('SRA_BOOLEAN', opt$sra_boolean, x)
+  }) %>%
+  (function(x) {
     gsub('ECHO', opt$echo, x)
   }) %>%
   (function(x) {