Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
comparison bdss_client_sra.Rmd @ 21:a709a705ce09 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 813dcaa22f297814dd6d6a8c4c5ff01664942aa6-dirty
author | mingchen0919 |
---|---|
date | Sat, 14 Oct 2017 19:54:57 -0400 |
parents | |
children | 89cc5b026494 |
comparison
equal
deleted
inserted
replaced
20:531c00a2acbf | 21:a709a705ce09 |
---|---|
1 --- | |
2 title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions' | |
3 output: | |
4 html_document: | |
5 number_sections: true | |
6 toc: true | |
7 theme: cosmo | |
8 highlight: tango | |
9 --- | |
10 | |
11 ```{r setup, include=FALSE, warning=FALSE, message=FALSE} | |
12 knitr::opts_chunk$set( | |
13 echo = ECHO, | |
14 error=TRUE | |
15 ) | |
16 ``` | |
17 | |
18 # Command line arguments | |
19 | |
20 ```{r 'command line arguments'} | |
21 str(opt) | |
22 ``` | |
23 | |
24 # BDSS configuration file | |
25 | |
26 First, we create a bdss configuration file `bdss.cfg` in the current directory. | |
27 | |
28 ```{r} | |
29 system('echo "[metadata_repository]" > bdss.cfg') | |
30 system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') | |
31 ``` | |
32 | |
33 # Download and extract reads | |
34 | |
35 ```{r 'download and extract reads'} | |
36 # create two directories, one for single end and the other for paired end SRA reads. | |
37 dir.create('se_read_files_directory') | |
38 dir.create('pe_read_files_directory') | |
39 # download and extract reads (single end) | |
40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] | |
41 sra_ids_se = sra_ids[sra_ids != ''] | |
42 # loop through SRA accessions to download and extract reads. | |
43 for(id in sra_ids) { | |
44 # build URL from SRA id | |
45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', | |
46 substr(id, 1, 3), '/', | |
47 substr(id, 1, 6), '/', id, '/', id, '.sra') | |
48 # download sra file with bdss | |
49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) | |
50 system(bdss_command, intern = TRUE) | |
51 # convert .sra to .fastq/.fasta | |
52 if('FORMAT' == 'fasta') { | |
53 command = paste0('fastq-dump --fasta -O read_files_directory ', id) | |
54 } else { | |
55 command = paste0('fastq-dump -O read_files_directory ', id) | |
56 } | |
57 } | |
58 | |
59 # download and extract reads (paired end) | |
60 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] | |
61 sra_ids_pe = sra_ids[sra_ids != ''] | |
62 # loop through SRA accessions to download and extract reads. | |
63 for(id in sra_ids) { | |
64 # build URL from SRA id | |
65 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', | |
66 substr(id, 1, 3), '/', | |
67 substr(id, 1, 6), '/', id, '/', id, '.sra') | |
68 # download sra file with bdss | |
69 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) | |
70 system(bdss_command, intern = TRUE) | |
71 # convert .sra to .fastq/.fasta | |
72 if('FORMAT' == 'fasta') { | |
73 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id) | |
74 } else { | |
75 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id) | |
76 } | |
77 } | |
78 | |
79 # rename files for paired end reads | |
80 old_files = paste0('./read_files_directory/', list.files('./read_files_directory')) | |
81 new_files = gsub('_1', '_forward', old_files) | |
82 new_files = gsub('_2', '_reverse', new_files) | |
83 file.rename(old_files, new_files) | |
84 ``` | |
85 | |
86 |