# HG changeset patch # User mingchen0919 # Date 1509978984 18000 # Node ID 7905d420f63eb1f9202137abda05cfd842588a59 # Parent 1b64a808c7636e07d3a8a74fff225c65b3cda126 Deleted selected files diff -r 1b64a808c763 -r 7905d420f63e bdss_client.Rmd --- a/bdss_client.Rmd Tue Oct 17 00:08:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,52 +0,0 @@ ---- -title: 'Download with BDSS client' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = ECHO, - error=TRUE -) -``` - -# Command line arguments - -```{r 'command line arguments'} -str(opt) -``` - -# BDSS configuration file - -First, we create a bdss configuration file `bdss.cfg` in the current directory. - -```{r} -system('echo "[metadata_repository]" > bdss.cfg') -system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') -``` - -# Download data - -```{r 'download and extract reads'} -# create a directory to store read files -dir.create('read_files_directory') -# download and extract reads -urls = strsplit(gsub(',', ' ', 'URLS'), ' ')[[1]] -urls = urls[urls != ''] -# loop through SRA accessions to download and extract reads. -for(url in urls) { - print(url) - bdss_command = paste0('/main/sites/galaxy/galaxy/tools/_conda/bin/bdss transfer --destination read_files_directory -u ', url) - print(bdss_command) - print(system(bdss_command, intern = TRUE)) -} -# all files that need to be saved should be moved to REPORT_DIR directory -# print(system('mv read_files_directory REPORT_DIR', intern = TRUE)) -``` - - diff -r 1b64a808c763 -r 7905d420f63e bdss_client.xml --- a/bdss_client.xml Tue Oct 17 00:08:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,46 +0,0 @@ - - - pandoc - r-getopt - r-rmarkdown - r-htmltools - r-dplyr - parallel-fastq-dump - r-rcurl - - - Download data with BDSS client. - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff -r 1b64a808c763 -r 7905d420f63e bdss_client_render.R --- a/bdss_client_render.R Tue Oct 17 00:08:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -library(getopt) -library(rmarkdown) -library(htmltools) -library(dplyr) -library(RCurl) - - -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file('warnings_and_errors.txt') -sink(zz) -sink(zz, type = 'message') -##---------below is the code for rendering .Rmd templates----- - - ##=============STEP 1: handle command line arguments========== - ## - ##============================================================ - # column 1: the long flag name - # column 2: the short flag alias. A SINGLE character string - # column 3: argument mask - # 0: no argument - # 1: argument required - # 2: argument is optional - # column 4: date type to which the flag's argument shall be cast. - # possible values: logical, integer, double, complex, character. - #------------------------------------------------------------- - #++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ - # 1. short flag alias should match the flag in the command section in the XML file. - # 2. long flag name can be any legal R variable names - # 3. two names in args_list can have common string but one name should not be a part of another name. - # for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. - #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ - ##------- 1. input data --------------------- - args_list=list() - args_list$URLS = c('urls', 'i', '1', 'character') - args_list$ECHO = c('echo', 'e', '1', 'character') - ##--------2. output report and outputs -------------- - args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') - args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') - args_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character') - ##--------3. Rmd templates in the tool directory ---------- - args_list$BDSS_CLIENT_RMD = c('bdss_client_rmd', 't', '1', 'character') - - opt = getopt(t(as.data.frame(args_list))) - - - ##=======STEP 2: create report directory (optional)========== - ## - ##=========================================================== - dir.create(opt$report_dir) - - ##=STEP 3: replace placeholders in .Rmd with argument values= - ## - ##=========================================================== - #++ need to replace placeholders with args values one by one+ - #----- 01 bdss_client.Rmd ----------------------- - readLines(opt$bdss_client_rmd) %>% - (function(x) { - gsub('URLS', opt$urls, x) - }) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('REPORT_DIR', opt$report_dir, x) - }) %>% - (function(x) { - fileConn = file('bdss_client.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - - ##=============STEP 4: render .Rmd templates================= - ## - ##=========================================================== - render('bdss_client.Rmd', output_file = opt$report_html) - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file diff -r 1b64a808c763 -r 7905d420f63e bdss_client_sra.Rmd --- a/bdss_client_sra.Rmd Tue Oct 17 00:08:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ ---- -title: 'Download and extract single end fastq/fasta data with BDSS client from SRA accessions' -output: - html_document: - number_sections: true - toc: true - theme: cosmo - highlight: tango ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_chunk$set( - echo = ECHO, - error=TRUE -) -``` - -# Command line arguments - -```{r 'command line arguments'} -str(opt) -``` - -# BDSS configuration file - -First, we create a bdss configuration file `bdss.cfg` in the current directory. - -```{r} -system('echo "[metadata_repository]" > bdss.cfg') -system('echo url=http://bdss.bioinfo.wsu.edu/ >> bdss.cfg') -``` - -# Download and extract reads - -```{r 'download and extract reads'} -# create two directories, one for single end and the other for paired end SRA reads. -dir.create('se_read_files_directory') -dir.create('pe_read_files_directory') -# download and extract reads (single end) -sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]] -sra_ids_se = sra_ids_se[sra_ids_se != ''] -# loop through SRA accessions to download and extract reads. -for(id in sra_ids_se) { - # build URL from SRA id - url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', - substr(id, 1, 3), '/', - substr(id, 1, 6), '/', id, '/', id, '.sra') - # download sra file with bdss - bdss_command = paste0('/main/sites/galaxy/galaxy/tools/_conda/bin/bdss transfer -u ', url) - system(bdss_command, intern = TRUE) - # convert .sra to .fastq/.fasta - if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta -O se_read_files_directory ', id, '.sra') - } else { - command = paste0('fastq-dump -O se_read_files_directory ', id, '.sra') - } - cat('----convert SRA to fastq/fasta------\n') - print(system(command, intern = TRUE)) -} - -# download and extract reads (paired end) -sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]] -sra_ids_pe = sra_ids_pe[sra_ids_pe != ''] -# loop through SRA accessions to download and extract reads. -for(id in sra_ids_pe) { - # build URL from SRA id - url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/', - substr(id, 1, 3), '/', - substr(id, 1, 6), '/', id, '/', id, '.sra') - # download sra file with bdss - bdss_command = paste0('/main/sites/galaxy/galaxy/tools/_conda/bin/bdss transfer -u ', url) - system(bdss_command, intern = TRUE) - # convert .sra to .fastq/.fasta - if('FORMAT' == 'fasta') { - command = paste0('fastq-dump --fasta --split-files -O pe_read_files_directory ', id, '.sra') - } else { - command = paste0('fastq-dump --split-files -O pe_read_files_directory ', id, '.sra') - } - cat('----convert SRA to fastq/fasta------\n') - command_stdout = system(command, intern = TRUE) - print(command_stdout) - if(!(paste0(id, '_2.FORMAT') %in% list.files('pe_read_files_directory'))) { - # this is not a paired end SRA file. The corresponding file will be deleted. - cat(paste0(id, ' is not paired end SRA, the corresponding fastq/fasta file will deleted.')) - system(paste0('rm pe_read_files_directory/', id, '_1.*'), intern = TRUE) - } - -} - -cat('-----single end files----\n') -list.files('./se_read_files_directory') -cat('-----paired end files----\n') -list.files('./pe_read_files_directory') - -cat('-----Renaming files------\n') -# rename files for paired end reads -old_files = paste0('./pe_read_files_directory/', list.files('./pe_read_files_directory')) -print(old_files) -new_files = gsub('_1', '_forward', old_files) -new_files = gsub('_2', '_reverse', new_files) -print(new_files) -file.rename(old_files, new_files) -``` - - diff -r 1b64a808c763 -r 7905d420f63e bdss_client_sra.xml --- a/bdss_client_sra.xml Tue Oct 17 00:08:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,63 +0,0 @@ - - - pandoc - r-getopt - r-rmarkdown - r-htmltools - r-dplyr - parallel-fastq-dump - r-rcurl - - - Download data with BDSS client and generate list (single end SRA data) and list:paired dataset collection - (paired end SRA data). - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff -r 1b64a808c763 -r 7905d420f63e bdss_client_sra_render.R --- a/bdss_client_sra_render.R Tue Oct 17 00:08:00 2017 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,90 +0,0 @@ -library(getopt) -library(rmarkdown) -library(htmltools) -library(dplyr) -library(RCurl) - - -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file('warnings_and_errors.txt') -sink(zz) -sink(zz, type = 'message') -##---------below is the code for rendering .Rmd templates----- - -##=============STEP 1: handle command line arguments========== -## -##============================================================ -# column 1: the long flag name -# column 2: the short flag alias. A SINGLE character string -# column 3: argument mask -# 0: no argument -# 1: argument required -# 2: argument is optional -# column 4: date type to which the flag's argument shall be cast. -# possible values: logical, integer, double, complex, character. -#------------------------------------------------------------- -#++++++++++++++++++++ Best practice ++++++++++++++++++++++++++ -# 1. short flag alias should match the flag in the command section in the XML file. -# 2. long flag name can be any legal R variable names -# 3. two names in args_list can have common string but one name should not be a part of another name. -# for example, one name is "ECHO", if another name is "ECHO_XXX", it will cause problems. -#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ -##------- 1. input data --------------------- -args_list=list() -args_list$SRA_IDS_SE = c('sra_ids_se', 'i', '1', 'character') -args_list$SRA_IDS_PE = c('sra_ids_pe', 'p', '1', 'character') -args_list$FORMAT = c('format', 'f', '1', 'character') -args_list$ECHO = c('echo', 'e', '1', 'character') -##--------2. output report and outputs -------------- -args_list$REPORT_HTML = c('report_html', 'r', '1', 'character') -args_list$REPORT_DIR = c('report_dir', 'd', '1', 'character') -args_list$SINK_OUTPUT = c('sink_message', 's', '1', 'character') -##--------3. Rmd templates in the tool directory ---------- -args_list$BDSS_CLIENT_RMD = c('bdss_client_rmd', 't', '1', 'character') - -opt = getopt(t(as.data.frame(args_list))) - - -##=======STEP 2: create report directory (optional)========== -## -##=========================================================== -dir.create(opt$report_dir) - -##=STEP 3: replace placeholders in .Rmd with argument values= -## -##=========================================================== -#++ need to replace placeholders with args values one by one+ -#----- 01 bdss_client.Rmd ----------------------- -readLines(opt$bdss_client_rmd) %>% - (function(x) { - gsub('SRA_IDS_SE', opt$sra_ids_se, x) - }) %>% - (function(x) { - gsub('SRA_IDS_PE', opt$sra_ids_pe, x) - }) %>% - (function(x) { - gsub('FORMAT', opt$format, x) - }) %>% - (function(x) { - gsub('ECHO', opt$echo, x) - }) %>% - (function(x) { - gsub('REPORT_DIR', opt$report_dir, x) - }) %>% - (function(x) { - fileConn = file('bdss_client.Rmd') - writeLines(x, con=fileConn) - close(fileConn) - }) - -##=============STEP 4: render .Rmd templates================= -## -##=========================================================== -render('bdss_client.Rmd', output_file = opt$report_html) - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file