comparison bdss_client_sra.Rmd @ 21:a709a705ce09 draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 813dcaa22f297814dd6d6a8c4c5ff01664942aa6-dirty
author mingchen0919
date Sat, 14 Oct 2017 19:54:57 -0400
parents
children 89cc5b026494
comparison
equal deleted inserted replaced
20:531c00a2acbf 21:a709a705ce09
1 ---
2 title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions'
3 output:
4 html_document:
5 number_sections: true
6 toc: true
7 theme: cosmo
8 highlight: tango
9 ---
10
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE}
12 knitr::opts_chunk$set(
13 echo = ECHO,
14 error=TRUE
15 )
16 ```
17
18 # Command line arguments
19
20 ```{r 'command line arguments'}
21 str(opt)
22 ```
23
24 # BDSS configuration file
25
26 First, we create a bdss configuration file `bdss.cfg` in the current directory.
27
28 ```{r}
29 system('echo "[metadata_repository]" > bdss.cfg')
30 system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg')
31 ```
32
33 # Download and extract reads
34
35 ```{r 'download and extract reads'}
36 # create two directories, one for single end and the other for paired end SRA reads.
37 dir.create('se_read_files_directory')
38 dir.create('pe_read_files_directory')
39 # download and extract reads (single end)
40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]]
41 sra_ids_se = sra_ids[sra_ids != '']
42 # loop through SRA accessions to download and extract reads.
43 for(id in sra_ids) {
44 # build URL from SRA id
45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
46 substr(id, 1, 3), '/',
47 substr(id, 1, 6), '/', id, '/', id, '.sra')
48 # download sra file with bdss
49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
50 system(bdss_command, intern = TRUE)
51 # convert .sra to .fastq/.fasta
52 if('FORMAT' == 'fasta') {
53 command = paste0('fastq-dump --fasta -O read_files_directory ', id)
54 } else {
55 command = paste0('fastq-dump -O read_files_directory ', id)
56 }
57 }
58
59 # download and extract reads (paired end)
60 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]]
61 sra_ids_pe = sra_ids[sra_ids != '']
62 # loop through SRA accessions to download and extract reads.
63 for(id in sra_ids) {
64 # build URL from SRA id
65 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
66 substr(id, 1, 3), '/',
67 substr(id, 1, 6), '/', id, '/', id, '.sra')
68 # download sra file with bdss
69 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
70 system(bdss_command, intern = TRUE)
71 # convert .sra to .fastq/.fasta
72 if('FORMAT' == 'fasta') {
73 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id)
74 } else {
75 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id)
76 }
77 }
78
79 # rename files for paired end reads
80 old_files = paste0('./read_files_directory/', list.files('./read_files_directory'))
81 new_files = gsub('_1', '_forward', old_files)
82 new_files = gsub('_2', '_reverse', new_files)
83 file.rename(old_files, new_files)
84 ```
85
86