view bdss_client_sra_pe.Rmd @ 5:04100e773314 draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit ea6df97960569d247d64be1549aa90768c9786f4-dirty
author mingchen0919
date Wed, 11 Oct 2017 11:43:39 -0400
parents c707a4178832
children 415ebc9016da
line wrap: on
line source

---
title: 'Fastq-dump: download and extract paired end reads into FASTQ/FASTA file'
output:
    html_document:
      number_sections: true
      toc: true
      theme: cosmo
      highlight: tango
---

```{r setup, include=FALSE, warning=FALSE, message=FALSE}
knitr::opts_chunk$set(
  echo = ECHO
)
```

# Command line arguments

```{r 'command line arguments'}
str(opt)
```

# Download and extract reads

```{r 'download and extract reads'}
# create a directory to store read files
dir.create('read_files_directory')
# download and extract reads
sra_accessions = strsplit(gsub(',', ' ', 'SRA_ACCESSION'), ' ')[[1]]
sra_accessions = sra_accessions[sra_accessions != '']
# loop through SRA accessions to download and extract reads.
for(id in sra_accessions) {
  if (SRA_BOOLEAN) {
    # build URL from SRA accession
    url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/SRR/',
                 substr(id, 1, 6), '/', id, '/', id, '.sra')
    # download sra file with bdss
    bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
    system(bdss_command, intern = TRUE)
    # convert .sra to .fastq/.fasta
    if('FORMAT' == 'fasta') {
      command = paste0('fastq-dump --fasta --split-files ', '-O read_files_directory ', id)
    } else {
      command = paste0('fastq-dump --split-files ', '-O read_files_directory ', id)
    }
    # command line stdout
    system(command = command, intern = TRUE)
  } else  {
    # if SRA_BOOLEAN is FALSE, download fastq directly
    # build URL for downloading fastq
    url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/',
                      substr(id, 1, 6), '/', id)
    # list all fastq files
    all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]]
    if (length(all_fastq_files) == 1) {
      # this is a single end SRA file. build url to download fastq
      print(paste0('only one fastq file found for this SRA accession: ', id))
      print(paste0('Downloading skipped for ', id))
      # url = paste0(url_base, '/', all_fastq_files)
      # bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
      # run_bdss = system(bdss_command, intern = TRUE)
      # print(run_bdss)
    } else {
      print(paste0('Two fastq files found for this SRA accession: ', id))
      url_1 = paste0(url_base, '/', all_fastq_files[1])
      bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1)
      run_bdss_1 = system(bdss_command, intern = TRUE)
      print(run_bdss_1)
      url_2 = paste0(url_base, '/', all_fastq_files[1])
      bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2)
      run_bdss_2 = system(bdss_command, intern = TRUE)
      print(run_bdss_2)
    }
  }

}
# remove all .sra file
system('rm *.sra', intern = TRUE)
```


# Rename files

```{r}
old_files = paste0('./read_files_directory/', list.files('./read_files_directory'))
new_files = gsub('_1', '_forward', old_files)
new_files = gsub('_2', '_reverse', new_files)
file.rename(old_files, new_files)
```