comparison bdss_client_sra.Rmd @ 22:89cc5b026494 draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 8d95d985955944734cd00ac94346e1197a4feb20-dirty
author mingchen0919
date Sat, 14 Oct 2017 22:59:06 -0400
parents a709a705ce09
children fd15cf620d5d
comparison
equal deleted inserted replaced
21:a709a705ce09 22:89cc5b026494
36 # create two directories, one for single end and the other for paired end SRA reads. 36 # create two directories, one for single end and the other for paired end SRA reads.
37 dir.create('se_read_files_directory') 37 dir.create('se_read_files_directory')
38 dir.create('pe_read_files_directory') 38 dir.create('pe_read_files_directory')
39 # download and extract reads (single end) 39 # download and extract reads (single end)
40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] 40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]]
41 sra_ids_se = sra_ids[sra_ids != ''] 41 sra_ids_se = sra_ids_se[sra_ids_se != '']
42 # loop through SRA accessions to download and extract reads. 42 # loop through SRA accessions to download and extract reads.
43 for(id in sra_ids) { 43 for(id in sra_ids_se) {
44 # build URL from SRA id 44 # build URL from SRA id
45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', 45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
46 substr(id, 1, 3), '/', 46 substr(id, 1, 3), '/',
47 substr(id, 1, 6), '/', id, '/', id, '.sra') 47 substr(id, 1, 6), '/', id, '/', id, '.sra')
48 # download sra file with bdss 48 # download sra file with bdss
49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) 49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
50 system(bdss_command, intern = TRUE) 50 system(bdss_command, intern = TRUE)
51 # convert .sra to .fastq/.fasta 51 # convert .sra to .fastq/.fasta
52 if('FORMAT' == 'fasta') { 52 if('FORMAT' == 'fasta') {
53 command = paste0('fastq-dump --fasta -O read_files_directory ', id) 53 command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra')
54 } else { 54 } else {
55 command = paste0('fastq-dump -O read_files_directory ', id) 55 command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra')
56 } 56 }
57 cat('----convert SRA to fastq/fasta------\n')
58 print(system(command, intern = TRUE))
57 } 59 }
58 60
59 # download and extract reads (paired end) 61 # download and extract reads (paired end)
60 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] 62 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]]
61 sra_ids_pe = sra_ids[sra_ids != ''] 63 sra_ids_pe = sra_ids_pe[sra_ids_pe != '']
62 # loop through SRA accessions to download and extract reads. 64 # loop through SRA accessions to download and extract reads.
63 for(id in sra_ids) { 65 for(id in sra_ids_pe) {
64 # build URL from SRA id 66 # build URL from SRA id
65 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', 67 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
66 substr(id, 1, 3), '/', 68 substr(id, 1, 3), '/',
67 substr(id, 1, 6), '/', id, '/', id, '.sra') 69 substr(id, 1, 6), '/', id, '/', id, '.sra')
68 # download sra file with bdss 70 # download sra file with bdss
69 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) 71 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
70 system(bdss_command, intern = TRUE) 72 system(bdss_command, intern = TRUE)
71 # convert .sra to .fastq/.fasta 73 # convert .sra to .fastq/.fasta
72 if('FORMAT' == 'fasta') { 74 if('FORMAT' == 'fasta') {
73 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id) 75 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra')
74 } else { 76 } else {
75 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id) 77 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra')
76 } 78 }
79 cat('----convert SRA to fastq/fasta------\n')
80 command_stdout = system(command, intern = TRUE)
81 print(command_stdout)
82 if(length(command_stdout) < 3) {
83 # this is not a paired end SRA file. The corresponding file will be deleted.
84 cat(paste0(id, 'is not paired end SRA, the corresponding fastq/fasta file will deleted.'))
85 system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE)
86 }
87
77 } 88 }
78 89
90 cat('-----single end files----\n')
91 list.files('./se_read_files_directory')
92 cat('-----paired end files----\n')
93 list.files('./pe_read_files_directory')
94
95 cat('-----Renaming files------\n')
79 # rename files for paired end reads 96 # rename files for paired end reads
80 old_files = paste0('./read_files_directory/', list.files('./read_files_directory')) 97 old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory'))
81 new_files = gsub('_1', '_forward', old_files) 98 new_files = gsub('_1', '_forward', old_files)
82 new_files = gsub('_2', '_reverse', new_files) 99 new_files = gsub('_2', '_reverse', new_files)
83 file.rename(old_files, new_files) 100 file.rename(old_files, new_files)
84 ``` 101 ```
85 102