Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
view bdss_client_sra.Rmd @ 21:a709a705ce09 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 813dcaa22f297814dd6d6a8c4c5ff01664942aa6-dirty
author | mingchen0919 |
---|---|
date | Sat, 14 Oct 2017 19:54:57 -0400 |
parents | |
children | 89cc5b026494 |
line wrap: on
line source
--- title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions' output: html_document: number_sections: true toc: true theme: cosmo highlight: tango --- ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( echo = ECHO, error=TRUE ) ``` # Command line arguments ```{r 'command line arguments'} str(opt) ``` # BDSS configuration file First, we create a bdss configuration file `bdss.cfg` in the current directory. ```{r} system('echo "[metadata_repository]" > bdss.cfg') system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') ``` # Download and extract reads ```{r 'download and extract reads'} # create two directories, one for single end and the other for paired end SRA reads. dir.create('se_read_files_directory') dir.create('pe_read_files_directory') # download and extract reads (single end) sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] sra_ids_se = sra_ids[sra_ids != ''] # loop through SRA accessions to download and extract reads. for(id in sra_ids) { # build URL from SRA id url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', substr(id, 1, 3), '/', substr(id, 1, 6), '/', id, '/', id, '.sra') # download sra file with bdss bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) system(bdss_command, intern = TRUE) # convert .sra to .fastq/.fasta if('FORMAT' == 'fasta') { command = paste0('fastq-dump --fasta -O read_files_directory ', id) } else { command = paste0('fastq-dump -O read_files_directory ', id) } } # download and extract reads (paired end) sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] sra_ids_pe = sra_ids[sra_ids != ''] # loop through SRA accessions to download and extract reads. for(id in sra_ids) { # build URL from SRA id url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', substr(id, 1, 3), '/', substr(id, 1, 6), '/', id, '/', id, '.sra') # download sra file with bdss bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) system(bdss_command, intern = TRUE) # convert .sra to .fastq/.fasta if('FORMAT' == 'fasta') { command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id) } else { command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id) } } # rename files for paired end reads old_files = paste0('./read_files_directory/', list.files('./read_files_directory')) new_files = gsub('_1', '_forward', old_files) new_files = gsub('_2', '_reverse', new_files) file.rename(old_files, new_files) ```