changeset 0:a1448d1f2ee4 draft

planemo upload
author mingchen0919
date Wed, 20 Jun 2018 10:24:45 -0400
parents
children 44632fc0fc15
files 1-index-genome.sh 2-mapping.sh 3-sam2bam.sh 4-flagstat.sh htseq_count.xml run-star-jobs.sh star.Rmd star.xml star_galaxy_outputs.sh star_getopt.csv star_render.R vakata-jstree-3.3.5.zip
diffstat 12 files changed, 445 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/1-index-genome.sh	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,11 @@
+# create genome directory for genome indices.
+mkdir -p ${X_d}/genomeDir
+
+STAR \
+  --runMode genomeGenerate \
+  --genomeDir ${X_d}/genomeDir \
+  --genomeFastaFiles $( echo ${X_A} | sed 's/,/ /g' ) \
+  --sjdbGTFfile ${X_B} \
+  --sjdbOverhang ${X_C} \
+  > genome-indexing.log.txt 2>&1
+  
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/2-mapping.sh	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,6 @@
+STAR \
+  --genomeDir ${X_d}/genomeDir \
+  --readFilesIn \
+  ${X_F} \
+  ${X_R} \
+  > mapping.log.txt 2>&1
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/3-sam2bam.sh	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,1 @@
+samtools sort -o Aligned.out.sorted.bam Aligned.out.sam
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/4-flagstat.sh	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,1 @@
+samtools flagstat Aligned.out.sorted.bam > flagstat.txt
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/htseq_count.xml	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,59 @@
+<tool name="aurora_tool" id="tool_1" version="1.0.0">
+  <description>ultrafasta universal RNA-Seq aligner&#xD;
+    </description>
+  <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement></requirements>
+  <command><![CDATA[
+
+        ######### each aurora tool generates a html file and have an files path directory associated with it.
+        mkdir -p $report.files_path &&
+
+        ######### three important paths:
+        #########   1. path to tool installation directory
+        #########   2. path to report html
+        #########   3. path to files_path directory associated with the report output.
+        export TOOL_INSTALL_DIR='${__tool_directory__}' &&
+        export REPORT='$report' &&
+        export REPORT_FILES_PATH='$report.files_path' &&
+
+        ############ create a hidden file to store r markdown rendering log
+        touch $report.files_path/.r_rendering.log.txt &&
+
+        ############ finally run the render.R script
+        Rscript '${__tool_directory__}/rmarkdown_report_render.R'
+        
+            -o $report
+            -d $report.files_path
+
+
+    ]]></command>
+  <inputs>
+    </inputs>
+  <outputs>
+        <data format="html" name="report" label="${tool.name} report on ${on_string}"/></outputs>
+  <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation><citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation><citation type="bibtex"><![CDATA[
+            @online{jstree,
+            author={Bozhanov, Ivan},
+            year = 2018,
+            url = {https://www.jstree.com/}
+            }
+        ]]></citation></citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run-star-jobs.sh	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,15 @@
+cd ${REPORT_FILES_PATH}
+
+# index genome
+grep -v None ${TOOL_INSTALL_DIR}/1-index-genome.sh > 1-index-genome.sh
+sh 1-index-genome.sh
+
+# mapping
+grep -v None ${TOOL_INSTALL_DIR}/1-mapping.sh > 1-mapping.sh
+sh 1-mapping.sh
+
+# convert sam to bam
+sh 3-sam2bam.sh
+
+# mapping evaluation
+sh 4-flagstat.sh
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star.Rmd	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,108 @@
+---
+title: 'Aurora Tool Report'
+output:
+    html_document:
+      highlight: pygments
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(error = TRUE, echo = FALSE)
+```
+
+```{css echo=FALSE}
+# code chunks scrollable
+pre code, pre, code {
+  white-space: pre !important;
+  overflow-x: scroll !important;
+  word-break: keep-all !important;
+  word-wrap: initial !important;
+}
+```
+
+
+```{r, echo=FALSE}
+# to make the css theme to work, <link></link> tags cannot be added directly 
+# as <script></script> tags as below.
+# it has to be added using a code chunk with the htmltool functions!!!
+css_link = tags$link()
+css_link$attribs = list(rel="stylesheet", href="vakata-jstree-3.3.5/dist/themes/default/style.min.css")
+css_link
+```
+
+```{r, eval=FALSE, echo=FALSE}
+# this code chunk is purely for adding comments
+# below is to add jQuery and jstree javascripts
+```
+<script src="https://code.jquery.com/jquery-3.3.1.min.js"></script>
+<script src="vakata-jstree-3.3.5/dist/jstree.min.js"></script>
+
+---
+# javascript code below is to build the file tree interface
+# see this for how to implement opening hyperlink: https://stackoverflow.com/questions/18611317/how-to-get-i-get-leaf-nodes-in-jstree-to-open-their-hyperlink-when-clicked-when
+---
+<script>
+  $(function () {
+    // create an instance when the DOM is ready
+    $('#jstree').jstree().bind("select_node.jstree", function (e, data) {
+     window.open( data.node.a_attr.href, data.node.a_attr.target )
+    });
+  });
+</script>
+
+---
+# ADD YOUR DATA ANALYSIS CODE AND MARKUP TEXT BELOW TO EXTEND THIS R MARKDOWN FILE
+---
+
+
+
+## Job scripts
+
+```{bash, echo=FALSE}
+sh ${TOOL_INSTALL_DIR}/run-star-jobs.sh
+```
+
+### Index genome
+
+```{r echo=FALSE,results='asis'}
+# display content of the job-script.sh file.
+cat('```bash\n')
+cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/1-index-genome.sh')), sep = '\n')
+cat('\n```')
+```
+
+### Mapping
+
+```{r echo=FALSE,results='asis'}
+# display content of the job-script.sh file.
+cat('```bash\n')
+cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/2-mapping.sh')), sep = '\n')
+cat('\n```')
+```
+
+### SAM to sorted BAM
+
+```{r echo=FALSE,warning=FALSE,results='asis'}
+# display content of the job-script.sh file.
+cat('```bash\n')
+cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/3-sam2bam.sh')), sep = '\n')
+cat('\n```')
+```
+
+### Mapping evaluation
+
+```{r echo=FALSE,warning=FALSE,results='asis'}
+# display content of the job-script.sh file.
+cat('```bash\n')
+cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/4-flagstat.sh')), sep = '\n')
+cat('\n```')
+```
+
+
+### Mapping evaluation results
+
+```{r echo=FALSE,warning=FALSE,results='asis'}
+# display content of the job-script.sh file.
+cat('```bash\n')
+cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/5-flagstat.txt')), sep = '\n')
+cat('\n```')
+```
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star.xml	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,61 @@
+<tool id="aurora_star_demo" name="aurora_star_demo" version="1.0.0">
+  <description>ultrafast universal RNA-Seq aligner&#xD;
+    </description>
+  <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement><requirement type="package" version="1.20.0">r-getopt</requirement><requirement type="package" version="1.6">r-rmarkdown</requirement><requirement type="package" version="2.5.4a">star</requirement><requirement type="package" version="1.7">samtools</requirement></requirements>
+  <command><![CDATA[        
+        ######### each aurora tool generates a html file and have an files path directory associated with it.
+        mkdir -p $report.files_path &&
+        
+        ######### three important paths:
+        #########   1. path to tool installation directory
+        #########   2. path to report html
+        #########   3. path to files_path directory associated with the report output.
+        export TOOL_INSTALL_DIR='${__tool_directory__}' &&
+        export REPORT='$report' &&
+        export REPORT_FILES_PATH='$report.files_path' &&
+        
+        ############ create a hidden file to store r markdown rendering log
+        touch $report.files_path/.r_rendering.log.txt &&
+        
+        ############ finally run the render.R script
+        Rscript '${__tool_directory__}/rmarkdown_report_render.R'
+        
+            -o $report
+            -d $report.files_path
+            -A '$genomeFastaFiles'
+            -B '$sjdbGTFfile'
+            -C '$sjdbOverhang'
+            -F '$first_reads'
+            -R '$second_reads'
+            -S '$sorted_bam']]></command>
+  <inputs>
+    <param type="data" name="first_reads" label="First reads" optional="False" format="fastq,fastqsanger"/><param type="data" name="second_reads" label="Second reads" optional="True" format="fastq,fastqsanger"/><param type="data" name="genomeFastaFiles" argument="--genomeFastaFiles" label="Genome fasta files" optional="False" format="fasta,fa"/><param type="data" name="sjdbGTFfile" argument="--sjdbGTFfile" label="Annotated transcripts" help="the file with annotated transcripts in the standard GTF format. STAR will extract splice junctions from this file and use them to greatly improve accuracy of the mapping. While this is optional, and STAR can be run without annotations, using annotations is highly recommended whenever they are available." optional="True" format="gtf"/><param type="integer" name="sjdbOverhang" argument="--sjdbOverhang" label="sjdbOverhang" help="the length of the genomic sequence around the annotated junction to be used in constructing the splice junctions database. Ideally, this length should be equal to the ReadLength-1, where ReadLength is the length of the reads. For instance, for Illumina 2x100b paired-end reads, the ideal value is 100-1=99. In case of reads of varying length, the ideal value is max(ReadLength)-1. In most cases, a generic value of 100 will work as well as the ideal value." optional="False" value="100" min="1"/></inputs>
+  <outputs>
+        <data name="report" format="html" label="${tool.name} report on ${on_string}" hidden="false"/><data name="sorted_bam" format="bam" label="${tool.name} sorted bam on ${on_string}&quot;" hidden="false"/></outputs>
+  <citations>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation><citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation><citation type="bibtex"><![CDATA[
+            @online{jstree,
+            author={Bozhanov, Ivan},
+            year = 2018,
+            url = {https://www.jstree.com/}
+            }
+        ]]></citation></citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star_galaxy_outputs.sh	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,15 @@
+# change directory to tool outputs directory
+cd ${REPORT_FILES_PATH}
+
+# copy outputs from tool outputs directory to corresponding galaxy output path
+if [ -e "rmarkdown_report.html" ]; then
+  cp rmarkdown_report.html ${REPORT}
+fi
+
+if [ -e "index.html" ]; then
+  cp index.html ${REPORT}
+fi
+
+if [ -e "Aligned.out.sorted.bam" ]; then
+  cp index.html ${REPORT}
+fi
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star_getopt.csv	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,9 @@
+short flag,argument mask,data type,variable name
+o,1,character,report
+d,1,character,report.files_path
+A,1,character,genomeFastaFiles
+B,1,character,sjdbGTFfile
+C,1,character,sjdbOverhang
+F,1,character,first_reads
+R,1,character,second_reads
+S,1,character,sorted_bam
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/star_render.R	Wed Jun 20 10:24:45 2018 -0400
@@ -0,0 +1,159 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file(paste0(Sys.getenv('REPORT_FILES_PATH'), '/rmarkdown_rendering.log.txt'))
+sink(zz)
+sink(zz, type = 'message')
+
+#============== preparation ====================================
+# import libraries
+#------------------------------------------------------------------
+# ADD MORE LIBRARIES HERE IF YOUR TOOL DEPENDS ON OTHER R LIBRARIES
+#------------------------------------------------------------------
+library('getopt')
+library('rmarkdown')
+library('htmltools')
+#------------------------------------------------------------------
+options(stringsAsFactors = FALSE)
+
+
+# define two helper functions
+#-----: helper function 1
+#' \code{getopt_specification_matrix} returns a getopt specification matrix.
+#'
+#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data.
+#' The first column are short flags, the second column are argument masks, the third column
+#' is data types. The fourth column are variable names used in the tool XML. These three columns are required.
+#' @param gtg_name the name of a running GTG.
+getopt_specification_matrix = function(specification_file,
+                                       gtg_name = 'gtg',
+                                       tool_dir = Sys.getenv('TOOL_INSTALL_DIR')) {
+  df = read.csv(
+    paste0(tool_dir, '/', specification_file),
+    header = TRUE,
+    stringsAsFactors = FALSE
+  )
+  # check if there are duplicated short flags
+  short_flags = df[, 1]
+  if (length(unique(short_flags)) < length(short_flags)) {
+    cat('----Duplicated short flags found ----\n')
+    cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n')
+    stop('Duplicated short flags are not allowed.')
+  }
+  
+  # use short flags to generate long flags
+  long_flags = paste0('X_', df[, 1])
+  
+  # specification matrix
+  df2 = data.frame(
+    long_flags = long_flags,
+    short_flags = df[, 1],
+    argument_mask = df[, 2],
+    data_type = df[, 3]
+  )
+  
+  as.matrix(df2)
+}
+
+#-----: helper function 2
+#' \code{file_tree} generate file tree of a directory in the format of HTML lists.
+#'
+#' @param dir the path to the directory for generating the file tree.
+#' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files.
+# define a recursive function to build html string of the file tree
+file_tree = function(dir = '.') {
+  # get the OUTPUT_DIR folder data: dataset_NUMBER_files
+  report_files_path = Sys.getenv('REPORT_FILES_PATH')
+  output_dir = tail(strsplit(report_files_path, '/')[[1]], 1)
+  
+  files = list.files(path = dir,
+                     recursive = FALSE,
+                     full.names = TRUE)
+  # files also include directorys, need to remove directorys
+  files = files[!dir.exists(files)]
+  dirs = list.dirs(path = dir,
+                   recursive = FALSE,
+                   full.names = TRUE)
+  tags$ul({
+    if (length(files) > 0) {
+      lapply(files, function(x) {
+        path_end = tail(strsplit(x, '/')[[1]], 1)
+        href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2]
+        li_item = tags$li(tags$a(path_end, href = href_path))
+        li_item$attribs = list('data-jstree' = '{"icon":"jstree-file"}')
+        li_item
+      })
+    }
+  },
+  {
+    if (length(dirs) > 0) {
+      lapply(dirs, function(x) {
+        path_end = tail(strsplit(x, '/')[[1]], 1)
+        # hide vakata-jstree-3.3.5 folder
+        if (!(path_end %in% c('vakata-jstree-3.3.5', 'rmarkdown_report_files', 'site_libs'))) {
+          # x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2]
+          li_item = tags$li(path_end, file_tree(x))
+          li_item$attribs = list('data-jstree' = '{"icon":"jstree-folder"}')
+          li_item
+        }
+      })
+    }
+  })
+}
+#----------------- end of help functions -------------------------
+
+
+# import getopt specification matrix from a csv file
+opt = getopt(getopt_specification_matrix('star_getopt.csv',
+                                         tool_dir = Sys.getenv('TOOL_INSTALL_DIR')))
+# define environment variables for all input values. this is useful when we
+# want to use input values by other programming language in r markdown
+do.call(Sys.setenv, opt[-1])
+# add current job working directory
+Sys.setenv(JOB_WORKING_DIR=getwd())
+#===============================================================
+
+
+#======================== render Rmd files =========================
+# copy jstree javascript library to tool output directory
+file.copy(
+  from = paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/vakata-jstree-3.3.5'),
+  to = Sys.getenv('REPORT_FILES_PATH'),
+  recursive = TRUE
+)
+
+# if '_site.yml' file exists, this tool is assumed to render a website.
+# otherwise, it renders a single html.
+if (file.exists(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/_site.yml'))) {
+  # render a website
+  system(command = 'cp -r ${TOOL_INSTALL_DIR}/*.Rmd ${REPORT_FILES_PATH}')
+  system(command = 'cp -r ${TOOL_INSTALL_DIR}/_site.yml ${REPORT_FILES_PATH}')
+  render_site(input = Sys.getenv('REPORT_FILES_PATH'))
+} else {
+  # render a single html
+  system(command = 'cp -r ${TOOL_INSTALL_DIR}/star.Rmd ${REPORT_FILES_PATH}')
+  # add a few lines to 'star.Rmd' to generate file tree outputs
+  jstree_lines = '
+## Outputs
+
+```{r, echo=FALSE}
+tags$div(id="jstree", file_tree(Sys.getenv(\'REPORT_FILES_PATH\')))
+```'
+  write(
+    x = jstree_lines,
+    append = TRUE,
+    file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/star.Rmd')
+  )
+  render(input = paste0(Sys.getenv('REPORT_FILES_PATH'), '/star.Rmd'))
+}
+#===============================================================
+
+
+#============== expose outputs to galaxy history ===============
+system(command = 'sh ${TOOL_INSTALL_DIR}/star_galaxy_outputs.sh')
+#===============================================================
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
Binary file vakata-jstree-3.3.5.zip has changed