view bdss_client_sra_se.Rmd @ 7:efb1938c3020 draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit ea6df97960569d247d64be1549aa90768c9786f4-dirty
author mingchen0919
date Wed, 11 Oct 2017 13:21:54 -0400
parents c707a4178832
children fe5f30584308
line wrap: on
line source

---
title: 'Fastq-dump: download and extract single end reads into FASTQ/FASTA file'
output:
    html_document:
      number_sections: true
      toc: true
      theme: cosmo
      highlight: tango
---

```{r setup, include=FALSE, warning=FALSE, message=FALSE}
knitr::opts_chunk$set(
  echo = ECHO
)
```

# Command line arguments

```{r 'command line arguments'}
str(opt)
```

# BDSS configuration file

First, we create a bdss configuration file `bdss.cfg` in the current directory.

```{r}
system('echo "[metadata_repository]" > bdss.cfg')
system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg')
```

# Download and extract reads

```{r 'download and extract reads'}
# create a directory to store read files
dir.create('read_files_directory')
# download and extract reads
sra_accessions = strsplit(gsub(',', ' ', 'SRA_ACCESSION'), ' ')[[1]]
sra_accessions = sra_accessions[sra_accessions != '']
# loop through SRA accessions to download and extract reads.
for(id in sra_accessions) {
  ## if SRA_BOOLEAN is TRUE, download .sra file and then convert it to fastq/fasta
  if (SRA_BOOLEAN) {
    # build URL from SRA accession
    url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
                 substr(id, 1, 3), '/',
                 substr(id, 1, 6), '/', id, '/', id, '.sra')
    # download sra file with bdss
    bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
    system(bdss_command, intern = TRUE)
    # convert .sra to .fastq/.fasta
    if('FORMAT' == 'fasta') {
      command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id)
    } else {
      command = paste0('fastq-dump ', '-O read_files_directory ', id)
    }
  } else {
    # if SRA_BOOLEAN is FALSE, download fastq directly
    # build URL for downloading fastq
    url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/',
                      substr(id, 1, 6), '/', id)
    # list all fastq files
    all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]]
    if (length(all_fastq_files) == 1) {
      # this is a single end SRA file. build url to download fastq
      url = paste0(url_base, '/', all_fastq_files)
      bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
      run_bdss = system(bdss_command, intern = TRUE)
      print(run_bdss)
    } else {
      print(paste0('Two fastq files found for this SRA accession: ', id))
      print('Downloading skipped for ', id)
      # url_1 = paste0(url_base, '/', all_fastq_files[1])
      # bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1)
      # run_bdss_1 = system(bdss_command, intern = TRUE)
      # url_2 = paste0(url_base, '/', all_fastq_files[1])
      # bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2)
      # run_bdss_2 = system(bdss_command, intern = TRUE)
    }
  }
  
}

# remove all .sra file
system('rm *.sra', intern = TRUE)
```

* `fastq-dump` command
```{r}
print(command)
```

* `command line stdout`

```{r}
system(command = command, intern = TRUE)
```