rmarkdown_bdss_client: bdss_client

comparison bdss_client_sra.Rmd @ 22:89cc5b026494 draft

planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 8d95d985955944734cd00ac94346e1197a4feb20-dirty

author	mingchen0919
date	Sat, 14 Oct 2017 22:59:06 -0400
parents	a709a705ce09
children	fd15cf620d5d

comparison

equal deleted inserted replaced

-:a709a705ce09
+:89cc5b026494
 # create two directories, one for single end and the other for paired end SRA reads.
 dir.create('se_read_files_directory')
 dir.create('pe_read_files_directory')
 # download and extract reads (single end)
 sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]]
-sra_ids_se = sra_ids[sra_ids != '']
+sra_ids_se = sra_ids_se[sra_ids_se != '']
 # loop through SRA accessions to download and extract reads.
-for(id in sra_ids) {
+for(id in sra_ids_se) {
 # build URL from SRA id
 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
 substr(id, 1, 3), '/',
 substr(id, 1, 6), '/', id, '/', id, '.sra')
 # download sra file with bdss
 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
 system(bdss_command, intern = TRUE)
 # convert .sra to .fastq/.fasta
 if('FORMAT' == 'fasta') {
-command = paste0('fastq-dump --fasta -O read_files_directory ', id)
+command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra')
 } else {
-command = paste0('fastq-dump -O read_files_directory ', id)
+command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra')
 }
+cat('----convert SRA to fastq/fasta------\n')
+print(system(command, intern = TRUE))
 }
 # download and extract reads (paired end)
 sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]]
-sra_ids_pe = sra_ids[sra_ids != '']
+sra_ids_pe = sra_ids_pe[sra_ids_pe != '']
 # loop through SRA accessions to download and extract reads.
-for(id in sra_ids) {
+for(id in sra_ids_pe) {
 # build URL from SRA id
 url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
 substr(id, 1, 3), '/',
 substr(id, 1, 6), '/', id, '/', id, '.sra')
 # download sra file with bdss
 bdss_command = paste0('/tool_deps/_conda/bin/bdss transfer -u ', url)
 system(bdss_command, intern = TRUE)
 # convert .sra to .fastq/.fasta
 if('FORMAT' == 'fasta') {
-command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id)
+command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra')
 } else {
-command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id)
+command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra')
 }
+cat('----convert SRA to fastq/fasta------\n')
+command_stdout = system(command, intern = TRUE)
+print(command_stdout)
+if(length(command_stdout) < 3) {
+# this is not a paired end SRA file. The corresponding file will be deleted.
+cat(paste0(id, 'is not paired end SRA, the corresponding fastq/fasta file will deleted.'))
+system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE)
+}
 }
+cat('-----single end files----\n')
+list.files('./se_read_files_directory')
+cat('-----paired end files----\n')
+list.files('./pe_read_files_directory')
+cat('-----Renaming files------\n')
 # rename files for paired end reads
-old_files = paste0('./read_files_directory/', list.files('./read_files_directory'))
+old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory'))
 new_files = gsub('_1', '_forward', old_files)
 new_files = gsub('_2', '_reverse', new_files)
 file.rename(old_files, new_files)
 ```

Mercurial > repos > mingchen0919 > rmarkdown_bdss_client

comparison bdss_client_sra.Rmd @ 22:89cc5b026494 draft