Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
comparison bdss_client_sra.Rmd @ 22:89cc5b026494 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 8d95d985955944734cd00ac94346e1197a4feb20-dirty
author | mingchen0919 |
---|---|
date | Sat, 14 Oct 2017 22:59:06 -0400 |
parents | a709a705ce09 |
children | fd15cf620d5d |
comparison
equal
deleted
inserted
replaced
21:a709a705ce09 | 22:89cc5b026494 |
---|---|
36 # create two directories, one for single end and the other for paired end SRA reads. | 36 # create two directories, one for single end and the other for paired end SRA reads. |
37 dir.create('se_read_files_directory') | 37 dir.create('se_read_files_directory') |
38 dir.create('pe_read_files_directory') | 38 dir.create('pe_read_files_directory') |
39 # download and extract reads (single end) | 39 # download and extract reads (single end) |
40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] | 40 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] |
41 sra_ids_se = sra_ids[sra_ids != ''] | 41 sra_ids_se = sra_ids_se[sra_ids_se != ''] |
42 # loop through SRA accessions to download and extract reads. | 42 # loop through SRA accessions to download and extract reads. |
43 for(id in sra_ids) { | 43 for(id in sra_ids_se) { |
44 # build URL from SRA id | 44 # build URL from SRA id |
45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', | 45 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', |
46 substr(id, 1, 3), '/', | 46 substr(id, 1, 3), '/', |
47 substr(id, 1, 6), '/', id, '/', id, '.sra') | 47 substr(id, 1, 6), '/', id, '/', id, '.sra') |
48 # download sra file with bdss | 48 # download sra file with bdss |
49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) | 49 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) |
50 system(bdss_command, intern = TRUE) | 50 system(bdss_command, intern = TRUE) |
51 # convert .sra to .fastq/.fasta | 51 # convert .sra to .fastq/.fasta |
52 if('FORMAT' == 'fasta') { | 52 if('FORMAT' == 'fasta') { |
53 command = paste0('fastq-dump --fasta -O read_files_directory ', id) | 53 command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra') |
54 } else { | 54 } else { |
55 command = paste0('fastq-dump -O read_files_directory ', id) | 55 command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra') |
56 } | 56 } |
57 cat('----convert SRA to fastq/fasta------\n') | |
58 print(system(command, intern = TRUE)) | |
57 } | 59 } |
58 | 60 |
59 # download and extract reads (paired end) | 61 # download and extract reads (paired end) |
60 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] | 62 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] |
61 sra_ids_pe = sra_ids[sra_ids != ''] | 63 sra_ids_pe = sra_ids_pe[sra_ids_pe != ''] |
62 # loop through SRA accessions to download and extract reads. | 64 # loop through SRA accessions to download and extract reads. |
63 for(id in sra_ids) { | 65 for(id in sra_ids_pe) { |
64 # build URL from SRA id | 66 # build URL from SRA id |
65 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', | 67 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', |
66 substr(id, 1, 3), '/', | 68 substr(id, 1, 3), '/', |
67 substr(id, 1, 6), '/', id, '/', id, '.sra') | 69 substr(id, 1, 6), '/', id, '/', id, '.sra') |
68 # download sra file with bdss | 70 # download sra file with bdss |
69 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) | 71 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url) |
70 system(bdss_command, intern = TRUE) | 72 system(bdss_command, intern = TRUE) |
71 # convert .sra to .fastq/.fasta | 73 # convert .sra to .fastq/.fasta |
72 if('FORMAT' == 'fasta') { | 74 if('FORMAT' == 'fasta') { |
73 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id) | 75 command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra') |
74 } else { | 76 } else { |
75 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id) | 77 command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra') |
76 } | 78 } |
79 cat('----convert SRA to fastq/fasta------\n') | |
80 command_stdout = system(command, intern = TRUE) | |
81 print(command_stdout) | |
82 if(length(command_stdout) < 3) { | |
83 # this is not a paired end SRA file. The corresponding file will be deleted. | |
84 cat(paste0(id, 'is not paired end SRA, the corresponding fastq/fasta file will deleted.')) | |
85 system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE) | |
86 } | |
87 | |
77 } | 88 } |
78 | 89 |
90 cat('-----single end files----\n') | |
91 list.files('./se_read_files_directory') | |
92 cat('-----paired end files----\n') | |
93 list.files('./pe_read_files_directory') | |
94 | |
95 cat('-----Renaming files------\n') | |
79 # rename files for paired end reads | 96 # rename files for paired end reads |
80 old_files = paste0('./read_files_directory/', list.files('./read_files_directory')) | 97 old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory')) |
81 new_files = gsub('_1', '_forward', old_files) | 98 new_files = gsub('_1', '_forward', old_files) |
82 new_files = gsub('_2', '_reverse', new_files) | 99 new_files = gsub('_2', '_reverse', new_files) |
83 file.rename(old_files, new_files) | 100 file.rename(old_files, new_files) |
84 ``` | 101 ``` |
85 | 102 |