Mercurial > repos > mingchen0919 > rmarkdown_bdss_client
changeset 22:89cc5b026494 draft
planemo upload for repository https://github.com/statonlab/docker-GRReport/tree/master/my_tools/rmarkdown_bdss_client commit 8d95d985955944734cd00ac94346e1197a4feb20-dirty
author | mingchen0919 |
---|---|
date | Sat, 14 Oct 2017 22:59:06 -0400 |
parents | a709a705ce09 |
children | fd15cf620d5d |
files | bdss_client_sra.Rmd bdss_client_sra.xml bdss_client_sra_render.R |
diffstat | 3 files changed, 33 insertions(+), 14 deletions(-) [+] |
line wrap: on
line diff
--- a/bdss_client_sra.Rmd Sat Oct 14 19:54:57 2017 -0400 +++ b/bdss_client_sra.Rmd Sat Oct 14 22:59:06 2017 -0400 @@ -38,9 +38,9 @@ dir.create('pe_read_files_directory') # download and extract reads (single end) sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] -sra_ids_se = sra_ids[sra_ids != ''] +sra_ids_se = sra_ids_se[sra_ids_se != ''] # loop through SRA accessions to download and extract reads. -for(id in sra_ids) { +for(id in sra_ids_se) { # build URL from SRA id url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', substr(id, 1, 3), '/', @@ -50,17 +50,19 @@ system(bdss_command, intern = TRUE) # convert .sra to .fastq/.fasta if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta -O read_files_directory ', id) + command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra') } else { - command = paste0('fastq-dump -O read_files_directory ', id) + command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra') } + cat('----convert SRA to fastq/fasta------\n') + print(system(command, intern = TRUE)) } # download and extract reads (paired end) sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] -sra_ids_pe = sra_ids[sra_ids != ''] +sra_ids_pe = sra_ids_pe[sra_ids_pe != ''] # loop through SRA accessions to download and extract reads. -for(id in sra_ids) { +for(id in sra_ids_pe) { # build URL from SRA id url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', substr(id, 1, 3), '/', @@ -70,14 +72,29 @@ system(bdss_command, intern = TRUE) # convert .sra to .fastq/.fasta if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id) + command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra') } else { - command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id) + command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra') } + cat('----convert SRA to fastq/fasta------\n') + command_stdout = system(command, intern = TRUE) + print(command_stdout) + if(length(command_stdout) < 3) { + # this is not a paired end SRA file. The corresponding file will be deleted. + cat(paste0(id, 'is not paired end SRA, the corresponding fastq/fasta file will deleted.')) + system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE) + } + } +cat('-----single end files----\n') +list.files('./se_read_files_directory') +cat('-----paired end files----\n') +list.files('./pe_read_files_directory') + +cat('-----Renaming files------\n') # rename files for paired end reads -old_files = paste0('./read_files_directory/', list.files('./read_files_directory')) +old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory')) new_files = gsub('_1', '_forward', old_files) new_files = gsub('_2', '_reverse', new_files) file.rename(old_files, new_files)
--- a/bdss_client_sra.xml Sat Oct 14 19:54:57 2017 -0400 +++ b/bdss_client_sra.xml Sat Oct 14 22:59:06 2017 -0400 @@ -24,6 +24,7 @@ Rscript '${__tool_directory__}/bdss_client_sra_render.R' -i '$sra_ids_se' -p '$sra_ids_pe' + -f $format -e $echo -r $report @@ -46,12 +47,12 @@ <outputs> <data format="html" name="report" label="BDSS client report"/> <!--list dataset collection for single end SRA data--> - <collection type="list" name="list_collection" label="Fastq-dump (single end reads)"> - <discover_datasets pattern="__name_and_ext__" directory="read_files_directory" /> + <collection type="list" name="list_collection" label="BDSS download data (single end reads)"> + <discover_datasets pattern="__name_and_ext__" directory="se_read_files_directory" /> </collection> <!--list:paired dataset collection for paired end SRA data--> - <collection type="list:paired" name="list_collection" label="Fastq-dump (paired end reads)"> - <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.(?P<ext>[^\._]+)?" directory="read_files_directory"/> + <collection type="list:paired" name="list:paired_collection" label="BDSS download data (paired end reads)"> + <discover_datasets pattern="(?P<identifier_0>[^_]+)_(?P<identifier_1>[^_]+)\.(?P<ext>[^\._]+)?" directory="pe_read_files_directory" /> </collection> <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/> </outputs>
--- a/bdss_client_sra_render.R Sat Oct 14 19:54:57 2017 -0400 +++ b/bdss_client_sra_render.R Sat Oct 14 22:59:06 2017 -0400 @@ -34,7 +34,8 @@ ##------- 1. input data --------------------- args_list=list() args_list$SRA_IDS_SE = c('sra_ids_se', 'i', '1', 'character') -args_list$SRA_IDS_PE = c('sra_ids_pe', 'i', '1', 'character') +args_list$SRA_IDS_PE = c('sra_ids_pe', 'p', '1', 'character') +args_list$FORMAT = c('format', 'f', '1', 'character') args_list$ECHO = c('echo', 'e', '1', 'character') ##--------2. output report and outputs -------------- args_list$REPORT_HTML = c('report_html', 'r', '1', 'character')