Mercurial > repos > mingchen0919 > aurora_bdss_sra

diff rmarkdown_report.Rmd @ 0:759232961286 draft
planemo upload for repository https://github.com/feltus/BDSS
author: mingchen0919
date: Tue, 12 Jun 2018 09:24:06 -0400
children: bd236d81187b
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rmarkdown_report.Rmd	Tue Jun 12 09:24:06 2018 -0400
@@ -0,0 +1,126 @@
+---
+title: 'Aurora Tool Report'
+output:
+    html_document:
+      highlight: pygments
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(error = TRUE, echo = FALSE)
+```
+
+```{css echo=FALSE}
+# code chunks scrollable
+pre code, pre, code {
+  white-space: pre !important;
+  overflow-x: scroll !important;
+  word-break: keep-all !important;
+  word-wrap: initial !important;
+}
+```
+
+
+```{r, echo=FALSE}
+# to make the css theme to work, <link></link> tags cannot be added directly 
+# as <script></script> tags as below.
+# it has to be added using a code chunk with the htmltool functions!!!
+css_link = tags$link()
+css_link$attribs = list(rel="stylesheet", href="vakata-jstree-3.3.5/dist/themes/default/style.min.css")
+css_link
+```
+
+```{r, eval=FALSE, echo=FALSE}
+# this code chunk is purely for adding comments
+# below is to add jQuery and jstree javascripts
+```
+<script src="https://code.jquery.com/jquery-3.3.1.min.js"></script>
+<script src="vakata-jstree-3.3.5/dist/jstree.min.js"></script>
+
+---
+# javascript code below is to build the file tree interface
+# see this for how to implement opening hyperlink: https://stackoverflow.com/questions/18611317/how-to-get-i-get-leaf-nodes-in-jstree-to-open-their-hyperlink-when-clicked-when
+---
+<script>
+  $(function () {
+    // create an instance when the DOM is ready
+    $('#jstree').jstree().bind("select_node.jstree", function (e, data) {
+     window.open( data.node.a_attr.href, data.node.a_attr.target )
+    });
+  });
+</script>
+
+---
+# ADD YOUR DATA ANALYSIS CODE AND MARKUP TEXT BELOW TO EXTEND THIS R MARKDOWN FILE
+---
+
+# Download and extract reads
+
+```{r echo=FALSE}
+# create two directories to store downloaded data
+se_dir = paste0(Sys.getenv('JOB_WORKING_DIR'), '/se_read_files_dir')
+pe_dir = paste0(Sys.getenv('JOB_WORKING_DIR'), '/pe_read_files_dir')
+```
+
+```{r 'download and extract reads'}
+# download and extract reads (single end)
+sra_ids_se = strsplit(gsub(',', ' ', 'SRA_IDS_SE'), ' ')[[1]]
+sra_ids_se = sra_ids_se[sra_ids_se != '']
+# loop through SRA accessions to download and extract reads.
+for(id in sra_ids_se) {
+    # build URL from SRA id
+    url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
+                 substr(id, 1, 3), '/',
+                 substr(id, 1, 6), '/', id, '/', id, '.sra')
+    # download sra file with bdss
+    bdss_command = paste0('bdss transfer -u ', url)
+    command_stdout = system(bdss_command, intern = TRUE)
+    write(command_stdout, file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/bdss-download.log.txt'), append = TRUE)
+    # convert .sra to .fastq/.fasta
+    if(opt$X_f == 'fasta') {
+      command = paste0('fastq-dump --fasta -O se_read_files_dir ', id, '.sra')
+    } else {
+      command = paste0('fastq-dump -O se_read_files_dir ', id, '.sra')
+    }
+    cat('----convert SRA to fastq/fasta------\n')
+    command_stdout = system(command, intern = TRUE)
+    write(command_stdout, file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/fastq-dump.log.txt'), append = TRUE)
+}
+
+# download and extract reads (paired end)
+sra_ids_pe = strsplit(gsub(',', ' ', 'SRA_IDS_PE'), ' ')[[1]]
+sra_ids_pe = sra_ids_pe[sra_ids_pe != '']
+# loop through SRA accessions to download and extract reads.
+for(id in sra_ids_pe) {
+    # build URL from SRA id
+    url = paste0('ftp://ftp.ncbi.nlm.nih.gov/sra/sra-instant/reads/ByRun/sra/',
+                 substr(id, 1, 3), '/',
+                 substr(id, 1, 6), '/', id, '/', id, '.sra')
+    # download sra file with bdss
+    bdss_command = paste0('bdss transfer -u ', url)
+    command_stdout = system(bdss_command, intern = TRUE)
+    write(command_stdout, file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/bdss-download.log.txt'), append = TRUE)
+    # convert .sra to .fastq/.fasta
+    if(opt$X_f == 'fasta') {
+      command = paste0('fastq-dump --fasta --split-files -O pe_read_files_dir ', id, '.sra')
+    } else {
+      command = paste0('fastq-dump --split-files -O pe_read_files_dir ', id, '.sra')
+    }
+    cat('----convert SRA to fastq/fasta------\n')
+    command_stdout = system(command, intern = TRUE)
+    write(command_stdout, file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/fastq-dump.log.txt'), append = TRUE)
+    if(!(paste0(id, '_2.', opt$X_f) %in% list.files('pe_read_files_dir'))) {
+      # this is not a paired end SRA file. The corresponding file will be deleted.
+      cat(paste0(id, ' is not paired end SRA, the corresponding fastq/fasta file will deleted.'))
+      command_stdout = system(paste0('rm pe_read_files_dir/', id, '_1.*'), intern = TRUE)
+    }
+    
+}
+
+
+cat('-----Renaming files------\n')
+# rename files for paired end reads
+old_files = paste0('./pe_read_files_dir/', list.files('./pe_read_files_dir'))
+new_files = gsub('_1', '_forward', old_files)
+new_files = gsub('_2', '_reverse', new_files)
+file.rename(old_files, new_files)
+```
author	mingchen0919
date	Tue, 12 Jun 2018 09:24:06 -0400
parents
children	bd236d81187b