Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
view bdss_client_sra_se.Rmd @ 7:efb1938c3020 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit ea6df97960569d247d64be1549aa90768c9786f4-dirty
author | mingchen0919 |
---|---|
date | Wed, 11 Oct 2017 13:21:54 -0400 |
parents | c707a4178832 |
children | fe5f30584308 |
line wrap: on
line source
--- title: 'Fastq-dump: download and extract single end reads into FASTQ/FASTA file' output: html_document: number_sections: true toc: true theme: cosmo highlight: tango --- ```{r setup, include=FALSE, warning=FALSE, message=FALSE} knitr::opts_chunk$set( echo = ECHO ) ``` # Command line arguments ```{r 'command line arguments'} str(opt) ``` # BDSS configuration file First, we create a bdss configuration file `bdss.cfg` in the current directory. ```{r} system('echo "[metadata_repository]" > bdss.cfg') system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') ``` # Download and extract reads ```{r 'download and extract reads'} # create a directory to store read files dir.create('read_files_directory') # download and extract reads sra_accessions = strsplit(gsub(',', ' ', 'SRA_ACCESSION'), ' ')[[1]] sra_accessions = sra_accessions[sra_accessions != ''] # loop through SRA accessions to download and extract reads. for(id in sra_accessions) { ## if SRA_BOOLEAN is TRUE, download .sra file and then convert it to fastq/fasta if (SRA_BOOLEAN) { # build URL from SRA accession url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', substr(id, 1, 3), '/', substr(id, 1, 6), '/', id, '/', id, '.sra') # download sra file with bdss bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) system(bdss_command, intern = TRUE) # convert .sra to .fastq/.fasta if('FORMAT' == 'fasta') { command = paste0('fastq-dump --fasta ', '-O read_files_directory ', id) } else { command = paste0('fastq-dump ', '-O read_files_directory ', id) } } else { # if SRA_BOOLEAN is FALSE, download fastq directly # build URL for downloading fastq url_base = paste0('ftp://ftp.sra.ebi.ac.uk/vol1/fastq/', substr(id, 1, 6), '/', id) # list all fastq files all_fastq_files = strsplit(getURL(url_base, dirlistonly = TRUE), split = '\n')[[1]] if (length(all_fastq_files) == 1) { # this is a single end SRA file. build url to download fastq url = paste0(url_base, '/', all_fastq_files) bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) run_bdss = system(bdss_command, intern = TRUE) print(run_bdss) } else { print(paste0('Two fastq files found for this SRA accession: ', id)) print('Downloading skipped for ', id) # url_1 = paste0(url_base, '/', all_fastq_files[1]) # bdss_command_1 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_1) # run_bdss_1 = system(bdss_command, intern = TRUE) # url_2 = paste0(url_base, '/', all_fastq_files[1]) # bdss_command_2 = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url_2) # run_bdss_2 = system(bdss_command, intern = TRUE) } } } # remove all .sra file system('rm *.sra', intern = TRUE) ``` * `fastq-dump` command ```{r} print(command) ``` * `command line stdout` ```{r} system(command = command, intern = TRUE) ```