Mercurial > repos > mingchen0919 > aurora_fastqc

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/build-and-run-job-scripts.sh	Mon Apr 30 16:40:21 2018 -0400
@@ -0,0 +1,33 @@
+# run SHELL_SCRIPT within tool outputs directory
+cd ${REPORT_FILES_PATH}
+
+# build job-script.sh
+cat >temp.sh <<EOF
+
+cp ${X_r} read_1.fq
+cp ${X_R} read_2.fq
+
+fastqc \\
+  -q \\
+  -c ${X_c} \\
+  -l ${X_l} \\
+  ${X_d}/read_1.fq > /dev/null 2>&1
+
+fastqc \\
+  -q \\
+  -c ${X_c} \\
+  -l ${X_l} \\
+  ${X_d}/read_2.fq > /dev/null 2>&1
+
+EOF
+
+grep -v None temp.sh > job-1-script.sh
+rm temp.sh
+
+# run SHELL_SCRIPT
+sh job-1-script.sh
+
+
+# unzip outputs
+unzip -q read_1_fastqc.zip
+unzip -q read_2_fastqc.zip
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/command-line-arguments.csv	Mon Apr 30 16:40:21 2018 -0400
@@ -0,0 +1,9 @@
+short flag,argument mask,data type,variable name
+o,1,character,report
+d,1,character,report.files_path
+r,1,character,reads_1
+n,1,character,reads_1.name
+R,1,character,reads_2
+N,1,character,reads_2.name
+c,1,character,contaminants
+l,1,character,limits
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/expose-outputs-to-galaxy-history.sh	Mon Apr 30 16:40:21 2018 -0400
@@ -0,0 +1,5 @@
+# change directory to tool outputs directory
+cd ${REPORT_FILES_PATH}
+
+# copy outputs from tool outputs directory to corresponding galaxy output path
+cp fastqc.html ${REPORT}
\ No newline at end of file
--- a/expose-outputs.sh	Wed Apr 25 15:55:23 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-# change directory to tool outputs directory
-cd ${REPORT_FILES_PATH}
-
-# copy outputs from tool outputs directory to corresponding galaxy output path
-cp fastqc.html ${REPORT}
\ No newline at end of file
--- a/fastqc.Rmd	Wed Apr 25 15:55:23 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,416 +0,0 @@
----
-title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
-output:
-    html_document:
-      highlight: pygments
----
-
-```{r setup, include=FALSE, warning=FALSE, message=FALSE}
-knitr::opts_knit$set(progress = FALSE)
-knitr::opts_chunk$set(error = TRUE, echo = FALSE)
-```
-
-```{r, echo=FALSE}
-# to make the css theme to work, <link></link> tags cannot be added directly
-# as <script></script> tags as below.
-# it has to be added using a code chunk with the htmltool functions!!!
-css_link = tags$link()
-css_link$attribs = list(rel="stylesheet", href="vakata-jstree-3.3.5/dist/themes/default/style.min.css")
-css_link
-```
-
-```{r, eval=FALSE, echo=FALSE}
-# this code chunk is purely for adding comments
-# below is to add jQuery and jstree javascripts
-```
-<script src="https://code.jquery.com/jquery-3.3.1.min.js"></script>
-<script src="vakata-jstree-3.3.5/dist/jstree.min.js"></script>
-
-```{r, eval=FALSE, echo=FALSE}
-# this code chunk is purely for adding comments
-# javascript code below is to build the file tree interface
-# see this for how to implement opening hyperlink: https://stackoverflow.com/questions/18611317/how-to-get-i-get-leaf-nodes-in-jstree-to-open-their-hyperlink-when-clicked-when
-```
-<script>
-  $(function () {
-    // create an instance when the DOM is ready
-    $('#jstree').jstree().bind("select_node.jstree", function (e, data) {
-     window.open( data.node.a_attr.href, data.node.a_attr.target )
-    });
-  });
-</script>
-
-```{css}
-pre code, pre, code {
-  white-space: pre !important;
-  overflow-x: scroll !important;
-  word-break: keep-all !important;
-  word-wrap: initial !important;
-}
-```
-----------------------
-
-
-
-
-
-# Run FastQC
-
-```{bash}
-sh ${TOOL_INSTALL_DIR}/shell-script-template.sh
-```
-
-```{r echo=FALSE,results='asis'}
-# display fastqc job script
-cat('```bash\n')
-cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/fastqc.sh')), sep = '\n')
-cat('\n```')
-```
-
-# Fastqc Output Visualization
-
-## Overview
-
-```{r eval=TRUE}
-read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'),
-                          stringsAsFactors = FALSE,
-                          header = FALSE, sep = '\t')[, 2:1]
-read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'),
-                          stringsAsFactors = FALSE,
-                          header = FALSE, sep = '\t')[, 1]
-combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE)
-names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming')
-combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
-combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
-knitr::kable(combined_summary)
-```
-
-```{r 'function definition', echo=FALSE}
-extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
-  f = readLines(fastqc_data)
-  start_line = grep(module_name, f)
-  end_module_lines = grep('END_MODULE', f)
-  end_line = end_module_lines[which(end_module_lines > start_line)[1]]
-  module_data = f[(start_line+1):(end_line-1)]
-  writeLines(module_data, '/tmp/temp.txt')
-  read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
-}
-```
-
-
-
-### Per base sequence quality
-
-```{r 'per base sequence quality', fig.width=10}
-## reads 1
-pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality')
-pbsq_1$id = 1:length(pbsq_1$X.Base)
-pbsq_1$trim = 'before'
-
-## reads 2
-pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality')
-pbsq_2$id = 1:length(pbsq_2$X.Base)
-pbsq_2$trim = 'after'
-
-comb_pbsq = rbind(pbsq_1, pbsq_2)
-comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
-
-p = ggplot(data = comb_pbsq) +
-  geom_boxplot(mapping = aes(x = id,
-                             lower = Lower.Quartile,
-                             upper = Upper.Quartile,
-                             middle = Median,
-                             ymin = X10th.Percentile,
-                             ymax = X90th.Percentile,
-                             fill = "yellow"),
-               stat = 'identity') +
-  geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
-  scale_x_continuous(name = 'Position in read (bp)', breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
-  scale_y_continuous(limits = c(0, max(comb_pbsq$Upper.Quartile) + 5)) +
-  scale_fill_identity() +
-  scale_color_identity() +
-  facet_grid(. ~ trim) +
-  theme(axis.text.x = element_text(size = 5),
-        panel.background = element_rect(fill = NA),
-        panel.grid.major.y = element_line(color = 'blue', size = 0.1))
-p
-```
-
-
-### Per tile sequence quality
-
-```{r 'per tile sequence quality', fig.width=10}
-## check if 'per tile sequence quality' module exits or not
-check_ptsq = grep('Per tile sequence quality', readLines(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt')))
-if (length(check_ptsq) > 0) {
-    ## reads 1
-  ptsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per tile sequence quality')
-  ptsq_1$trim = 'before'
-
-  ## reads 2
-  ptsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per tile sequence quality')
-  ptsq_2$trim = 'after'
-
-  comb_ptsq = rbind(ptsq_1, ptsq_2)
-  comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim)
-  comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
-
-  # convert integers to charaters
-  # comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
-
-  p = ggplot(data = comb_ptsq) +
-    geom_raster(mapping = aes(x = Base, y = X.Tile, fill = Mean)) +
-    facet_grid(. ~ trim) +
-    scale_x_discrete(name = "Position in read (bp)") +
-    scale_y_continuous(name = "") +
-    scale_fill_gradient(low = "blue", high = "red") +
-    theme(axis.text.x = element_text(size = 5, angle = 90),
-          axis.text.y = element_text(size = 5),
-        panel.background = element_rect(fill = NA))
-  ggplotly(p)
-} else {
-  print('No "per tile sequence quality" data')
-}
-```
-
-### Per sequence quality score
-
-```{r 'Per sequence quality score', fig.width=10}
-## reads 1
-psqs_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence quality scores')
-psqs_1$trim = 'before'
-
-## reads 2
-psqs_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence quality scores')
-psqs_2$trim = 'after'
-
-comb_psqs = rbind(psqs_1, psqs_2)
-comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim)
-
-p = ggplot(data = comb_psqs) +
-  geom_line(mapping = aes(x = X.Quality, y = Count), color = 'red') +
-  facet_grid(. ~ trim) +
-  scale_x_continuous(name = 'Mean Sequence Qaulity (Phred Score)',
-                     limits = c(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality))) +
-  scale_y_continuous(name = '') +
-  theme(panel.background = element_rect(fill = NA),
-        axis.line = element_line(),
-        panel.grid.major.y = element_line(color = 'blue', size = 0.1))
-p
-```
-
-### Per base sequence content
-
-```{r 'Per base sequence content', fig.width=10}
-## reads 1
-pbsc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence content')
-pbsc_1$id = 1:length(pbsc_1$X.Base)
-
-melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id'))
-melt_pbsc_1$trim = 'before'
-
-
-## reads 2
-pbsc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence content')
-pbsc_2$id = 1:length(pbsc_2$X.Base)
-
-melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id'))
-melt_pbsc_2$trim = 'after'
-
-comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2)
-comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim)
-
-p = ggplot(data = comb_pbsc) +
-  geom_line(mapping = aes(x = id, y = value, color = variable)) +
-  facet_grid(. ~ trim) +
-  xlim(min(comb_pbsc$id), max(comb_pbsc$id)) +
-  ylim(0, 100) +
-  xlab('Position in read (bp)') +
-  ylab('') +
-  scale_color_discrete(name = '') +
-  theme_classic()
-ggplotly(p)
-```
-
-### Per sequence GC content
-
-```{r 'Per sequence GC content', fig.width=10}
-## reads 1
-psGCc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence GC content')
-psGCc_1$trim = 'before'
-
-## reads 2
-psGCc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence GC content')
-psGCc_2$trim = 'after'
-
-comb_psGCc = rbind(psGCc_1, psGCc_2)
-comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim)
-
-p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) +
-  geom_line(color = 'red') +
-  facet_grid(. ~ trim) +
-  xlab('Mean Sequence Qaulity (Phred Score)') +
-  ylab('') +
-  scale_color_discrete(name = '') +
-  theme_classic()
-ggplotly(p)
-```
-
-
-### Per base N content
-
-```{r 'Per base N content', fig.width=10}
-## reads 1
-pbNc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base N content')
-pbNc_1$id = 1:length(pbNc_1$X.Base)
-pbNc_1$trim = 'before'
-
-## reads 2
-pbNc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base N content')
-pbNc_2$id = 1:length(pbNc_2$X.Base)
-pbNc_2$trim = 'after'
-
-comb_pbNc = rbind(pbNc_1, pbNc_2)
-comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim)
-
-p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) +
-  geom_line(color = 'red') +
-  scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) +
-  facet_grid(. ~ trim) +
-  ylim(0, 1) +
-  xlab('N-Count') +
-  ylab('') +
-  theme(axis.text.x = element_text(size = 5),
-        axis.line = element_line(),
-        panel.background = element_rect(fill = NA))
-ggplotly(p)
-```
-
-
-### Sequence Length Distribution
-
-```{r 'Sequence Length Distribution', fig.width=10}
-## reads 1
-sld_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Length Distribution')
-sld_1$id = 1:length(sld_1$X.Length)
-sld_1$trim = 'before'
-
-## reads 2
-sld_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Length Distribution')
-sld_2$id = 1:length(sld_2$X.Length)
-sld_2$trim = 'after'
-
-comb_sld = rbind(sld_1, sld_2)
-comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim)
-
-p = ggplot(data = comb_sld, aes(x = id, y = Count)) +
-  geom_line(color = 'red') +
-  scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) +
-  facet_grid(. ~ trim) +
-  xlab('Sequence Length (bp)') +
-  ylab('') +
-  theme(axis.text.x = element_text(size = 5),
-        panel.background = element_rect(fill = NA),
-        axis.line = element_line(),
-        plot.margin = margin(2,2,2,10) )
-ggplotly(p)
-```
-
-### Sequence Duplication Levels
-
-```{r 'Sequence Duplication Levels', fig.width=10}
-## reads 1
-sdl_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
-names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
-sdl_1$id = 1:length(sdl_1$Duplication_Level)
-
-melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
-melt_sdl_1$trim = 'before'
-
-
-## reads 2
-sdl_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
-names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
-sdl_2$id = 1:length(sdl_2$Duplication_Level)
-
-melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
-melt_sdl_2$trim = 'after'
-
-comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
-comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
-
-p = ggplot(data = comb_sdl) +
-  geom_line(mapping = aes(x = id, y = value, color = variable)) +
-  scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
-  facet_grid(. ~ trim) +
-  xlab('Sequence Duplication Level') +
-  ylab('') +
-  scale_color_discrete(name = '') +
-  theme(axis.text.x = element_text(size = 5),
-        panel.background = element_rect(fill = NA),
-        axis.line = element_line())
-p
-```
-
-### Adapter Content
-
-```{r 'Adapter Content', fig.width=10}
-## reads 1
-ac_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Adapter Content')
-ac_1$id = 1:length(ac_1$X.Position)
-
-melt_ac_1 = melt(ac_1, id=c('X.Position', 'id'))
-melt_ac_1$trim = 'before'
-
-## reads 2
-ac_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Adapter Content')
-ac_2$id = 1:length(ac_2$X.Position)
-
-melt_ac_2 = melt(ac_2, id=c('X.Position', 'id'))
-melt_ac_2$trim = 'after'
-
-comb_ac = rbind(melt_ac_1, melt_ac_2)
-comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim)
-
-p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) +
-  geom_line() +
-  facet_grid(. ~ trim) +
-  xlim(min(comb_ac$id), max(comb_ac$id)) +
-  ylim(0, 1) +
-  xlab('Position in read (bp)') +
-  ylab('') +
-  scale_color_discrete(name = '') +
-  theme(axis.text.x = element_text(size = 5),
-        panel.background = element_rect(fill = NA),
-        axis.line = element_line())
-ggplotly(p)
-```
-
-### Kmer Content {.tabset}
-
-#### Before
-
-```{r 'Kmer Content (before)', fig.width=10}
-kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content')
-knitr::kable(kc_1)
-```
-
-#### After
-```{r 'Kmer Content (after)', fig.width=10}
-kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content')
-knitr::kable(kc_2)
-```
-
-
-
-
------------------------------------------
-## Output
-
-```{r, echo=FALSE}
-# create a div container to store the file tree interface
-tags$div(
-  id="jstree",
-  file_tree(Sys.getenv('REPORT_FILES_PATH'))
-)
-```
\ No newline at end of file
--- a/fastqc.xml	Wed Apr 25 15:55:23 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-<tool name="aurora_fastqc" id='aurora_fastqc' version="2.1.1">
-    <description>
-        Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads
-        files.
-    </description>
-    <requirements>
-        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
-        <requirement type="package" version="1.20.0">r-getopt</requirement>
-        <requirement type="package" version="1.3">r-rmarkdown</requirement>
-        <requirement type="package" version="1.8.4">r-plyr</requirement>
-        <requirement type="package" version="1.1.0">r-stringr</requirement>
-        <requirement type="package" version="0.2">r-dt</requirement>
-        <requirement type="package" version="1.4.2">r-reshape2</requirement>
-        <requirement type="package" version="4.5.6">r-plotly</requirement>
-        <requirement type="package" version="0.3.5">r-htmltools</requirement>
-        <requirement type="package" version="0.11.5">fastqc</requirement>
-        <requirement type="package" version="0.9.10">xorg-libxrender</requirement>
-        <requirement type="package" version="1.2.2">xorg-libsm</requirement>
-        <requirement type="package" version="6.0">unzip</requirement>
-    </requirements>
-    <stdio>
-        <regex match="XXX" source="stderr" level="warning"
-               description="Check the warnings_and_errors.txt file for more details."/>
-    </stdio>
-    <command><![CDATA[
-
-        ######### each aurora tool generates a html file and have an files path directory associated with it.
-        mkdir -p $report.files_path &&
-
-        ######### three important paths:
-        #########   1. path to tool installation directory
-        #########   2. path to report html
-        #########   3. path to files_path directory associated with the report output.
-        export TOOL_INSTALL_DIR='${__tool_directory__}' &&
-        export REPORT='$report' &&
-        export REPORT_FILES_PATH='$report.files_path' &&
-
-        ############ copy jstree directory cp -r ${__tool_directory__}/vakata-jstree-3.3.5 ./ &&
-
-        ############ create a hidden file to store r markdown rendering log
-        touch $report.files_path/.r_rendering.log.txt &&
-
-        Rscript '${__tool_directory__}/fastqc_render.R'
-
-		    -o $report
-		    -d $report.files_path
-
-		    -r $reads_1
-            -n '$reads_1.name'
-            -R $reads_2
-            -N '$reads_2.name'
-            -c $contaminants
-            -l $limits
-
-    ]]></command>
-    <inputs>
-        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_1" type="data" optional="false"
-               label="Short reads before trimming"
-               help="Short reads data from history. This could be reads before trimming."/>
-        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data" label="Short reads after trimming"
-               help="Short reads data from history. This could be reads after trimming."/>
-        <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
-               help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly
-               searched against the library. The file must contain sets of named adapters in the form name[tab]sequence.
-               Lines prefixed with a hash will be ignored."/>
-        <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"
-               help="Specifies a non-default file which contains a set of criteria which will be used to determine the
-               warn/error limits for the various modules. This file can also be used to selectively remove some modules
-               from the output all together. The format needs to mirror the default limits.txt file found in the
-               Configuration folder."/>
-
-    </inputs>
-    <outputs>
-        <data format="html" name="report" label="${tool.name} on ${on_string}"/>
-    </outputs>
-    <citations>
-        <citation type="bibtex"><![CDATA[
-            @misc{bioinformatics2014fastqc,
-            title={FastQC},
-            author={Bioinformatics, Babraham},
-            year={2014}
-            }
-        ]]></citation>
-        <citation type="bibtex"><![CDATA[
-            @article{allaire2016rmarkdown,
-            title={rmarkdown: Dynamic Documents for R, 2016},
-            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
-            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
-            journal={R package version 0.9},
-            volume={6},
-            year={2016}
-            }
-        ]]></citation>
-        <citation type="bibtex"><![CDATA[
-            @book{xie2015dynamic,
-            title={Dynamic Documents with R and knitr},
-            author={Xie, Yihui},
-            volume={29},
-            year={2015},
-            publisher={CRC Press}
-            }
-        ]]></citation>
-        <citation type="bibtex"><![CDATA[
-            @misc{plotly2017,
-            title = {plotly: Create Interactive Web Graphics via 'plotly.js'},
-            author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and
-            Marianne Corvellec and Pedro Despouy},
-            year = {2017},
-            note = {R package version 4.6.0},
-            url = {https://CRAN.R-project.org/package=plotly},
-            }
-        ]]></citation>
-        <citation type="bibtex"><![CDATA[
-            @Book{ggplot22016,
-            author = {Hadley Wickham},
-            title = {ggplot2: Elegant Graphics for Data Analysis},
-            publisher = {Springer-Verlag New York},
-            year = {2009},
-            isbn = {978-0-387-98140-6},
-            url = {http://ggplot2.org},
-            }
-        ]]></citation>
-    </citations>
-</tool>
--- a/fastqc_render.R	Wed Apr 25 15:55:23 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,57 +0,0 @@
-##============ Sink warnings and errors to a file ==============
-## use the sink() function to wrap all code within it.
-##==============================================================
-zz = file(paste0(Sys.getenv('REPORT_FILES_PATH'), '/.r_rendering.log.txt'))
-sink(zz)
-sink(zz, type = 'message')
-
-#============== preparation ====================================
-options(stringsAsFactors = FALSE)
-# import libraries
-#------------------------------------------------------------------
-# ADD MORE LIBRARIES HERE IF YOUR TOOL DEPENDS ON OTHER R LIBRARIES
-#------------------------------------------------------------------
-library('getopt')
-library('rmarkdown')
-library('htmltools')
-library(plyr)
-library(dplyr)
-library(stringr)
-library(DT)
-library(reshape2)
-library(plotly)
-
-# load helper functions
-source(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/helper.R'))
-# import getopt specification matrix from a csv file
-opt = getopt(getopt_specification_matrix('getopt_specification.csv',
-                                         tool_dir=Sys.getenv('TOOL_INSTALL_DIR')))
-# define environment variables for all input values. this is useful when we
-# want to use input values by other programming language in r markdown
-do.call(Sys.setenv, opt[-1])
-#===============================================================
-
-
-#======================== render Rmd files =========================
-# NOTICE:
-#       we should copy all rmarkdown files from tool install directory to REPORT_FILES_PATH directory.
-#       and render rmarkdown files in the REPORT_FILES_PATH directory.
-file.copy(from = paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/vakata-jstree-3.3.5'),
-          to = Sys.getenv('REPORT_FILES_PATH'), recursive = TRUE)
-system(command = 'cp -r ${TOOL_INSTALL_DIR}/*.Rmd ${REPORT_FILES_PATH}')
-
-#----------------BELOW IS WHERE YOU NEED TO CUSTOMIZE ---------------------
-render(input = paste0(Sys.getenv('REPORT_FILES_PATH'), '/fastqc.Rmd'))
-# add more lines below if there are more Rmd files to be rendered
-
-#===============================================================
-
-
-#============== expose outputs to galaxy history ===============
-system(command = 'sh ${TOOL_INSTALL_DIR}/expose-outputs.sh')
-#===============================================================
-
-
-##--------end of code rendering .Rmd templates----------------
-sink()
-##=========== End of sinking output=============================
\ No newline at end of file
--- a/getopt_specification.csv	Wed Apr 25 15:55:23 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-short flag,argument mask,data type,variable name
-o,1,character,report
-d,1,character,report.files_path
-r,1,character,reads_1
-n,1,character,reads_1.name
-R,1,character,reads_2
-N,1,character,reads_2.name
-c,1,character,contaminants
-l,1,character,limits
\ No newline at end of file
--- a/helper.R	Wed Apr 25 15:55:23 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,82 +0,0 @@
-#' \code{getopt_specification_matrix} returns a getopt specification matrix.
-#'
-#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data.
-#' The first column are short flags, the second column are argument masks, the third column
-#' is data types. The fourth column are variable names used in the tool XML. These three columns are required.
-#' @param gtg_name the name of a running GTG.
-getopt_specification_matrix = function(specification_file,
-                                       gtg_name = 'gtg',
-                                       tool_dir = Sys.getenv('TOOL_DIR')) {
-  df = read.csv(
-    paste0(tool_dir, '/', specification_file),
-    header = TRUE,
-    stringsAsFactors = FALSE
-  )
-  # check if there are duplicated short flags
-  short_flags = df[, 1]
-  if (length(unique(short_flags)) < length(short_flags)) {
-    cat('----Duplicated short flags found ----\n')
-    cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n')
-    stop('Duplicated short flags are not allowed.')
-  }
-
-  # use short flags to generate long flags
-  long_flags = paste0('X_', df[, 1])
-
-  # specification matrix
-  df2 = data.frame(
-    long_flags = long_flags,
-    short_flags = df[, 1],
-    argument_mask = df[, 2],
-    data_type = df[, 3]
-  )
-
-  as.matrix(df2)
-}
-
-
-
-#' \code{file_tree} generate file tree of a directory in the format of HTML lists.
-#'
-#' @param dir the path to the directory for generating the file tree.
-#' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files.
-# define a recursive function to build html string of the file tree
-file_tree = function(dir = '.') {
-  # get the OUTPUT_DIR folder data: dataset_NUMBER_files
-  report_files_path = Sys.getenv('REPORT_FILES_PATH')
-  output_dir = tail(strsplit(report_files_path, '/')[[1]], 1)
-
-  files = list.files(path = dir,
-                     recursive = FALSE,
-                     full.names = TRUE)
-  # files also include directorys, need to remove directorys
-  files = files[!dir.exists(files)]
-  dirs = list.dirs(path = dir,
-                   recursive = FALSE,
-                   full.names = TRUE)
-  tags$ul({
-    if (length(files) > 0) {
-      lapply(files, function(x) {
-        path_end = tail(strsplit(x, '/')[[1]], 1)
-        href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2]
-        li_item = tags$li(tags$a(path_end, href = href_path))
-        li_item$attribs = list('data-jstree' = '{"icon":"jstree-file"}')
-        li_item
-      })
-    }
-  },
-  {
-    if (length(dirs) > 0) {
-      lapply(dirs, function(x) {
-        path_end = tail(strsplit(x, '/')[[1]], 1)
-        # hide vakata-jstree-3.3.5 folder
-        if (path_end != 'vakata-jstree-3.3.5') {
-          # x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2]
-          li_item = tags$li(path_end, file_tree(x))
-          li_item$attribs = list('data-jstree' = '{"icon":"jstree-folder"}')
-          li_item
-        }
-      })
-    }
-  })
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rmarkdown_report.Rmd	Mon Apr 30 16:40:21 2018 -0400
@@ -0,0 +1,404 @@
+---
+title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
+output:
+    html_document:
+      highlight: pygments
+---
+
+```{css}
+pre code, pre, code {
+  white-space: pre !important;
+  overflow-x: scroll !important;
+  word-break: keep-all !important;
+  word-wrap: initial !important;
+}
+```
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_knit$set(progress = FALSE)
+knitr::opts_chunk$set(error = TRUE, echo = FALSE)
+```
+
+```{r, echo=FALSE}
+# to make the css theme to work, <link></link> tags cannot be added directly
+# as <script></script> tags as below.
+# it has to be added using a code chunk with the htmltool functions!!!
+css_link = tags$link()
+css_link$attribs = list(rel="stylesheet", href="vakata-jstree-3.3.5/dist/themes/default/style.min.css")
+css_link
+```
+
+```{r, eval=FALSE, echo=FALSE}
+# this code chunk is purely for adding comments
+# below is to add jQuery and jstree javascripts
+```
+<script src="https://code.jquery.com/jquery-3.3.1.min.js"></script>
+<script src="vakata-jstree-3.3.5/dist/jstree.min.js"></script>
+
+```{r, eval=FALSE, echo=FALSE}
+# this code chunk is purely for adding comments
+# javascript code below is to build the file tree interface
+# see this for how to implement opening hyperlink: https://stackoverflow.com/questions/18611317/how-to-get-i-get-leaf-nodes-in-jstree-to-open-their-hyperlink-when-clicked-when
+```
+<script>
+  $(function () {
+    // create an instance when the DOM is ready
+    $('#jstree').jstree().bind("select_node.jstree", function (e, data) {
+     window.open( data.node.a_attr.href, data.node.a_attr.target )
+    });
+  });
+</script>
+
+
+---
+# ADD YOUR DATA ANALYSIS CODE AND MARKUP TEXT BELOW TO EXTEND THIS R MARKDOWN FILE
+---
+
+
+# Run FastQC
+
+```{bash}
+sh ${TOOL_INSTALL_DIR}/build-and-run-job-scripts.sh
+```
+
+```{r echo=FALSE,results='asis'}
+# display fastqc job script
+cat('```bash\n')
+cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/job-1-script.sh')), sep = '\n')
+cat('\n```')
+```
+
+# Fastqc Output Visualization
+
+## Overview
+
+```{r eval=TRUE}
+read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'),
+                          stringsAsFactors = FALSE,
+                          header = FALSE, sep = '\t')[, 2:1]
+read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'),
+                          stringsAsFactors = FALSE,
+                          header = FALSE, sep = '\t')[, 1]
+combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE)
+names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming')
+combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
+combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
+knitr::kable(combined_summary)
+```
+
+```{r 'function definition', echo=FALSE}
+extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
+  f = readLines(fastqc_data)
+  start_line = grep(module_name, f)
+  end_module_lines = grep('END_MODULE', f)
+  end_line = end_module_lines[which(end_module_lines > start_line)[1]]
+  module_data = f[(start_line+1):(end_line-1)]
+  writeLines(module_data, '/tmp/temp.txt')
+  read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
+}
+```
+
+
+
+### Per base sequence quality
+
+```{r 'per base sequence quality', fig.width=10}
+## reads 1
+pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality')
+pbsq_1$id = 1:length(pbsq_1$X.Base)
+pbsq_1$trim = 'before'
+
+## reads 2
+pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality')
+pbsq_2$id = 1:length(pbsq_2$X.Base)
+pbsq_2$trim = 'after'
+
+comb_pbsq = rbind(pbsq_1, pbsq_2)
+comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
+
+p = ggplot(data = comb_pbsq) +
+  geom_boxplot(mapping = aes(x = id,
+                             lower = Lower.Quartile,
+                             upper = Upper.Quartile,
+                             middle = Median,
+                             ymin = X10th.Percentile,
+                             ymax = X90th.Percentile,
+                             fill = "yellow"),
+               stat = 'identity') +
+  geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
+  scale_x_continuous(name = 'Position in read (bp)', breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
+  scale_y_continuous(limits = c(0, max(comb_pbsq$Upper.Quartile) + 5)) +
+  scale_fill_identity() +
+  scale_color_identity() +
+  facet_grid(. ~ trim) +
+  theme(axis.text.x = element_text(size = 5),
+        panel.background = element_rect(fill = NA),
+        panel.grid.major.y = element_line(color = 'blue', size = 0.1))
+p
+```
+
+
+### Per tile sequence quality
+
+```{r 'per tile sequence quality', fig.width=10}
+## check if 'per tile sequence quality' module exits or not
+check_ptsq = grep('Per tile sequence quality', readLines(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt')))
+if (length(check_ptsq) > 0) {
+    ## reads 1
+  ptsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per tile sequence quality')
+  ptsq_1$trim = 'before'
+
+  ## reads 2
+  ptsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per tile sequence quality')
+  ptsq_2$trim = 'after'
+
+  comb_ptsq = rbind(ptsq_1, ptsq_2)
+  comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim)
+  comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
+
+  # convert integers to charaters
+  # comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
+
+  p = ggplot(data = comb_ptsq) +
+    geom_raster(mapping = aes(x = Base, y = X.Tile, fill = Mean)) +
+    facet_grid(. ~ trim) +
+    scale_x_discrete(name = "Position in read (bp)") +
+    scale_y_continuous(name = "") +
+    scale_fill_gradient(low = "blue", high = "red") +
+    theme(axis.text.x = element_text(size = 5, angle = 90),
+          axis.text.y = element_text(size = 5),
+        panel.background = element_rect(fill = NA))
+  ggplotly(p)
+} else {
+  print('No "per tile sequence quality" data')
+}
+```
+
+### Per sequence quality score
+
+```{r 'Per sequence quality score', fig.width=10}
+## reads 1
+psqs_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence quality scores')
+psqs_1$trim = 'before'
+
+## reads 2
+psqs_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence quality scores')
+psqs_2$trim = 'after'
+
+comb_psqs = rbind(psqs_1, psqs_2)
+comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim)
+
+p = ggplot(data = comb_psqs) +
+  geom_line(mapping = aes(x = X.Quality, y = Count), color = 'red') +
+  facet_grid(. ~ trim) +
+  scale_x_continuous(name = 'Mean Sequence Qaulity (Phred Score)',
+                     limits = c(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality))) +
+  scale_y_continuous(name = '') +
+  theme(panel.background = element_rect(fill = NA),
+        axis.line = element_line(),
+        panel.grid.major.y = element_line(color = 'blue', size = 0.1))
+p
+```
+
+### Per base sequence content
+
+```{r 'Per base sequence content', fig.width=10}
+## reads 1
+pbsc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence content')
+pbsc_1$id = 1:length(pbsc_1$X.Base)
+
+melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id'))
+melt_pbsc_1$trim = 'before'
+
+
+## reads 2
+pbsc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence content')
+pbsc_2$id = 1:length(pbsc_2$X.Base)
+
+melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id'))
+melt_pbsc_2$trim = 'after'
+
+comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2)
+comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim)
+
+p = ggplot(data = comb_pbsc) +
+  geom_line(mapping = aes(x = id, y = value, color = variable)) +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_pbsc$id), max(comb_pbsc$id)) +
+  ylim(0, 100) +
+  xlab('Position in read (bp)') +
+  ylab('') +
+  scale_color_discrete(name = '') +
+  theme_classic()
+ggplotly(p)
+```
+
+### Per sequence GC content
+
+```{r 'Per sequence GC content', fig.width=10}
+## reads 1
+psGCc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence GC content')
+psGCc_1$trim = 'before'
+
+## reads 2
+psGCc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence GC content')
+psGCc_2$trim = 'after'
+
+comb_psGCc = rbind(psGCc_1, psGCc_2)
+comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim)
+
+p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) +
+  geom_line(color = 'red') +
+  facet_grid(. ~ trim) +
+  xlab('Mean Sequence Qaulity (Phred Score)') +
+  ylab('') +
+  scale_color_discrete(name = '') +
+  theme_classic()
+ggplotly(p)
+```
+
+
+### Per base N content
+
+```{r 'Per base N content', fig.width=10}
+## reads 1
+pbNc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base N content')
+pbNc_1$id = 1:length(pbNc_1$X.Base)
+pbNc_1$trim = 'before'
+
+## reads 2
+pbNc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base N content')
+pbNc_2$id = 1:length(pbNc_2$X.Base)
+pbNc_2$trim = 'after'
+
+comb_pbNc = rbind(pbNc_1, pbNc_2)
+comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim)
+
+p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) +
+  facet_grid(. ~ trim) +
+  ylim(0, 1) +
+  xlab('N-Count') +
+  ylab('') +
+  theme(axis.text.x = element_text(size = 5),
+        axis.line = element_line(),
+        panel.background = element_rect(fill = NA))
+ggplotly(p)
+```
+
+
+### Sequence Length Distribution
+
+```{r 'Sequence Length Distribution', fig.width=10}
+## reads 1
+sld_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Length Distribution')
+sld_1$id = 1:length(sld_1$X.Length)
+sld_1$trim = 'before'
+
+## reads 2
+sld_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Length Distribution')
+sld_2$id = 1:length(sld_2$X.Length)
+sld_2$trim = 'after'
+
+comb_sld = rbind(sld_1, sld_2)
+comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim)
+
+p = ggplot(data = comb_sld, aes(x = id, y = Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) +
+  facet_grid(. ~ trim) +
+  xlab('Sequence Length (bp)') +
+  ylab('') +
+  theme(axis.text.x = element_text(size = 5),
+        panel.background = element_rect(fill = NA),
+        axis.line = element_line(),
+        plot.margin = margin(2,2,2,10) )
+ggplotly(p)
+```
+
+### Sequence Duplication Levels
+
+```{r 'Sequence Duplication Levels', fig.width=10}
+## reads 1
+sdl_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_1$id = 1:length(sdl_1$Duplication_Level)
+
+melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
+melt_sdl_1$trim = 'before'
+
+
+## reads 2
+sdl_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_2$id = 1:length(sdl_2$Duplication_Level)
+
+melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
+melt_sdl_2$trim = 'after'
+
+comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
+comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
+
+p = ggplot(data = comb_sdl) +
+  geom_line(mapping = aes(x = id, y = value, color = variable)) +
+  scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
+  facet_grid(. ~ trim) +
+  xlab('Sequence Duplication Level') +
+  ylab('') +
+  scale_color_discrete(name = '') +
+  theme(axis.text.x = element_text(size = 5),
+        panel.background = element_rect(fill = NA),
+        axis.line = element_line())
+p
+```
+
+### Adapter Content
+
+```{r 'Adapter Content', fig.width=10}
+## reads 1
+ac_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Adapter Content')
+ac_1$id = 1:length(ac_1$X.Position)
+
+melt_ac_1 = melt(ac_1, id=c('X.Position', 'id'))
+melt_ac_1$trim = 'before'
+
+## reads 2
+ac_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Adapter Content')
+ac_2$id = 1:length(ac_2$X.Position)
+
+melt_ac_2 = melt(ac_2, id=c('X.Position', 'id'))
+melt_ac_2$trim = 'after'
+
+comb_ac = rbind(melt_ac_1, melt_ac_2)
+comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim)
+
+p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_ac$id), max(comb_ac$id)) +
+  ylim(0, 1) +
+  xlab('Position in read (bp)') +
+  ylab('') +
+  scale_color_discrete(name = '') +
+  theme(axis.text.x = element_text(size = 5),
+        panel.background = element_rect(fill = NA),
+        axis.line = element_line())
+ggplotly(p)
+```
+
+### Kmer Content {.tabset}
+
+#### Before
+
+```{r 'Kmer Content (before)', fig.width=10}
+kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content')
+knitr::kable(kc_1)
+```
+
+#### After
+```{r 'Kmer Content (after)', fig.width=10}
+kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content')
+knitr::kable(kc_2)
+```
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rmarkdown_report.xml	Mon Apr 30 16:40:21 2018 -0400
@@ -0,0 +1,111 @@
+<tool name="aurora_fastqc" id='aurora_fastqc' version="2.1.2">
+    <description>
+        Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads
+        files.
+    </description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.3">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="1.1.0">r-stringr</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="1.4.2">r-reshape2</requirement>
+        <requirement type="package" version="4.5.6">r-plotly</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="0.11.5">fastqc</requirement>
+        <requirement type="package" version="0.9.10">xorg-libxrender</requirement>
+        <requirement type="package" version="1.2.2">xorg-libsm</requirement>
+        <requirement type="package" version="6.0">unzip</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[
+
+        ######### each aurora tool generates a html file and have an files path directory associated with it.
+        mkdir -p $report.files_path &&
+
+        ######### three important paths:
+        #########   1. path to tool installation directory
+        #########   2. path to report html
+        #########   3. path to files_path directory associated with the report output.
+        export TOOL_INSTALL_DIR='${__tool_directory__}' &&
+        export REPORT='$report' &&
+        export REPORT_FILES_PATH='$report.files_path' &&
+
+        ############ copy jstree directory cp -r ${__tool_directory__}/vakata-jstree-3.3.5 ./ &&
+
+        ############ create a hidden file to store r markdown rendering log
+        touch $report.files_path/.r_rendering.log.txt &&
+
+        Rscript '${__tool_directory__}/rmarkdown_report_render.R'
+
+		    -o $report
+		    -d $report.files_path
+
+		    -r $reads_1
+            -n '$reads_1.name'
+            -R $reads_2
+            -N '$reads_2.name'
+            -c $contaminants
+            -l $limits
+
+    ]]></command>
+    <inputs>
+        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_1" type="data" optional="false"
+               label="Short reads before trimming"
+               help="Short reads data from history. This could be reads before trimming."/>
+        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data" label="Short reads after trimming"
+               help="Short reads data from history. This could be reads after trimming."/>
+        <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
+               help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly
+               searched against the library. The file must contain sets of named adapters in the form name[tab]sequence.
+               Lines prefixed with a hash will be ignored."/>
+        <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"
+               help="Specifies a non-default file which contains a set of criteria which will be used to determine the
+               warn/error limits for the various modules. This file can also be used to selectively remove some modules
+               from the output all together. The format needs to mirror the default limits.txt file found in the
+               Configuration folder."/>
+
+    </inputs>
+    <outputs>
+        <data format="html" name="report" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @misc{bioinformatics2014fastqc,
+            title={FastQC},
+            author={Bioinformatics, Babraham},
+            year={2014}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @online{jstree,
+            author={Bozhanov, Ivan},
+            year = 2018,
+            url = {https://www.jstree.com/}
+            }
+        ]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/rmarkdown_report_render.R	Mon Apr 30 16:40:21 2018 -0400
@@ -0,0 +1,161 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file(paste0(Sys.getenv('REPORT_FILES_PATH'), '/.r_rendering.log.txt'))
+sink(zz)
+sink(zz, type = 'message')
+
+#============== preparation ====================================
+# import libraries
+#------------------------------------------------------------------
+# ADD MORE LIBRARIES HERE IF YOUR TOOL DEPENDS ON OTHER R LIBRARIES
+#------------------------------------------------------------------
+library('getopt')
+library('rmarkdown')
+library('htmltools')
+#------------------------------------------------------------------
+options(stringsAsFactors = FALSE)
+
+
+# define two helper functions
+#-----: helper function 1
+#' \code{getopt_specification_matrix} returns a getopt specification matrix.
+#'
+#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data.
+#' The first column are short flags, the second column are argument masks, the third column
+#' is data types. The fourth column are variable names used in the tool XML. These three columns are required.
+#' @param gtg_name the name of a running GTG.
+getopt_specification_matrix = function(specification_file,
+                                       gtg_name = 'gtg',
+                                       tool_dir = Sys.getenv('TOOL_INSTALL_DIR')) {
+  df = read.csv(
+    paste0(tool_dir, '/', specification_file),
+    header = TRUE,
+    stringsAsFactors = FALSE
+  )
+  # check if there are duplicated short flags
+  short_flags = df[, 1]
+  if (length(unique(short_flags)) < length(short_flags)) {
+    cat('----Duplicated short flags found ----\n')
+    cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n')
+    stop('Duplicated short flags are not allowed.')
+  }
+
+  # use short flags to generate long flags
+  long_flags = paste0('X_', df[, 1])
+
+  # specification matrix
+  df2 = data.frame(
+    long_flags = long_flags,
+    short_flags = df[, 1],
+    argument_mask = df[, 2],
+    data_type = df[, 3]
+  )
+
+  as.matrix(df2)
+}
+
+#-----: helper function 2
+#' \code{file_tree} generate file tree of a directory in the format of HTML lists.
+#'
+#' @param dir the path to the directory for generating the file tree.
+#' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files.
+# define a recursive function to build html string of the file tree
+file_tree = function(dir = '.') {
+  # get the OUTPUT_DIR folder data: dataset_NUMBER_files
+  report_files_path = Sys.getenv('REPORT_FILES_PATH')
+  output_dir = tail(strsplit(report_files_path, '/')[[1]], 1)
+
+  files = list.files(path = dir,
+                     recursive = FALSE,
+                     full.names = TRUE)
+  # files also include directorys, need to remove directorys
+  files = files[!dir.exists(files)]
+  dirs = list.dirs(path = dir,
+                   recursive = FALSE,
+                   full.names = TRUE)
+  tags$ul({
+    if (length(files) > 0) {
+      lapply(files, function(x) {
+        path_end = tail(strsplit(x, '/')[[1]], 1)
+        href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2]
+        li_item = tags$li(tags$a(path_end, href = href_path))
+        li_item$attribs = list('data-jstree' = '{"icon":"jstree-file"}')
+        li_item
+      })
+    }
+  },
+  {
+    if (length(dirs) > 0) {
+      lapply(dirs, function(x) {
+        path_end = tail(strsplit(x, '/')[[1]], 1)
+        # hide vakata-jstree-3.3.5 folder
+        if (path_end != 'vakata-jstree-3.3.5') {
+          # x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2]
+          li_item = tags$li(path_end, file_tree(x))
+          li_item$attribs = list('data-jstree' = '{"icon":"jstree-folder"}')
+          li_item
+        }
+      })
+    }
+  })
+}
+#----------------- end of help functions -------------------------
+
+
+# import getopt specification matrix from a csv file
+opt = getopt(getopt_specification_matrix('command_line_arguments.csv',
+                                         tool_dir = Sys.getenv('TOOL_INSTALL_DIR')))
+# define environment variables for all input values. this is useful when we
+# want to use input values by other programming language in r markdown
+do.call(Sys.setenv, opt[-1])
+#===============================================================
+
+
+#======================== render Rmd files =========================
+# copy jstree javascript library to tool output directory
+file.copy(
+  from = paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/vakata-jstree-3.3.5'),
+  to = Sys.getenv('REPORT_FILES_PATH'),
+  recursive = TRUE
+)
+
+# if '_site.yml' file exists, this tool is assumed to render a website.
+# otherwise, it renders a single html.
+if (file.exists(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/_site.yml'))) {
+  # render a website
+  system(command = 'cp -r ${TOOL_INSTALL_DIR}/*.Rmd ${REPORT_FILES_PATH}')
+  system(command = 'cp -r ${TOOL_INSTALL_DIR}/_site.yml ${REPORT_FILES_PATH}')
+  render_site(input = Sys.getenv('REPORT_FILES_PATH'))
+} else {
+  # render a single html
+  system(command = 'cp -r ${TOOL_INSTALL_DIR}/rmarkdown_report.Rmd ${REPORT_FILES_PATH}')
+  # add a few lines to 'rmarkdown_report.Rmd' to generate file tree outputs
+  jstree_lines = '
+
+  ## Outputs
+
+  ```{r, echo=FALSE}
+  # create a div container to store the file tree interface
+  library(htmltools)
+  tags$div(
+    id="jstree",
+    file_tree(Sys.getenv(\'REPORT_FILES_PATH\'))
+  )
+  ```
+  '
+  write(x = jstree_lines, append = TRUE,
+        file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/rmarkdown_report.Rmd'))
+  render(input = paste0(Sys.getenv('REPORT_FILES_PATH'), '/rmarkdown_report.Rmd'))
+}
+#===============================================================
+
+
+#============== expose outputs to galaxy history ===============
+system(command = 'sh ${TOOL_INSTALL_DIR}/expose-outputs.sh')
+#===============================================================
+
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
--- a/shell-script-template.sh	Wed Apr 25 15:55:23 2018 -0400
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-# SHELL_SCRIPT file name
-SHELL_SCRIPT='fastqc.sh'
-
-# run SHELL_SCRIPT within tool outputs directory
-cd ${REPORT_FILES_PATH}
-
-# build job-script.sh
-cat >temp.sh <<EOF
-
-cp ${X_r} read_1.fq
-cp ${X_R} read_2.fq
-
-fastqc \\
-  -q \\
-  -c ${X_c} \\
-  -l ${X_l} \\
-  ${X_d}/read_1.fq > /dev/null 2>&1
-
-fastqc \\
-  -q \\
-  -c ${X_c} \\
-  -l ${X_l} \\
-  ${X_d}/read_2.fq > /dev/null 2>&1
-
-EOF
-
-grep -v None temp.sh > ${SHELL_SCRIPT}
-rm temp.sh
-
-# run SHELL_SCRIPT
-sh ${SHELL_SCRIPT}
-
-
-# unzip outputs
-unzip -q read_1_fastqc.zip
-unzip -q read_2_fastqc.zip
\ No newline at end of file