Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
diff bdss_client_sra.Rmd @ 22:89cc5b026494 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 8d95d985955944734cd00ac94346e1197a4feb20-dirty
author | mingchen0919 |
---|---|
date | Sat, 14 Oct 2017 22:59:06 -0400 |
parents | a709a705ce09 |
children | fd15cf620d5d |
line wrap: on
line diff
--- a/bdss_client_sra.Rmd Sat Oct 14 19:54:57 2017 -0400 +++ b/bdss_client_sra.Rmd Sat Oct 14 22:59:06 2017 -0400 @@ -38,9 +38,9 @@ dir.create('pe_read_files_directory') # download and extract reads (single end) sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] -sra_ids_se = sra_ids[sra_ids != ''] +sra_ids_se = sra_ids_se[sra_ids_se != ''] # loop through SRA accessions to download and extract reads. -for(id in sra_ids) { +for(id in sra_ids_se) { # build URL from SRA id url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', substr(id, 1, 3), '/', @@ -50,17 +50,19 @@ system(bdss_command, intern = TRUE) # convert .sra to .fastq/.fasta if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta -O read_files_directory ', id) + command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra') } else { - command = paste0('fastq-dump -O read_files_directory ', id) + command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra') } + cat('----convert SRA to fastq/fasta------\n') + print(system(command, intern = TRUE)) } # download and extract reads (paired end) sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] -sra_ids_pe = sra_ids[sra_ids != ''] +sra_ids_pe = sra_ids_pe[sra_ids_pe != ''] # loop through SRA accessions to download and extract reads. -for(id in sra_ids) { +for(id in sra_ids_pe) { # build URL from SRA id url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', substr(id, 1, 3), '/', @@ -70,14 +72,29 @@ system(bdss_command, intern = TRUE) # convert .sra to .fastq/.fasta if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id) + command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra') } else { - command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id) + command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra') } + cat('----convert SRA to fastq/fasta------\n') + command_stdout = system(command, intern = TRUE) + print(command_stdout) + if(length(command_stdout) < 3) { + # this is not a paired end SRA file. The corresponding file will be deleted. + cat(paste0(id, 'is not paired end SRA, the corresponding fastq/fasta file will deleted.')) + system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE) + } + } +cat('-----single end files----\n') +list.files('./se_read_files_directory') +cat('-----paired end files----\n') +list.files('./pe_read_files_directory') + +cat('-----Renaming files------\n') # rename files for paired end reads -old_files = paste0('./read_files_directory/', list.files('./read_files_directory')) +old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory')) new_files = gsub('_1', '_forward', old_files) new_files = gsub('_2', '_reverse', new_files) file.rename(old_files, new_files)