changeset 0:f74fdae99053 draft

planemo upload
author mingchen0919
date Tue, 27 Feb 2018 00:39:56 -0500
parents
children 645291efd2e7
files 01_evaluation_overview.Rmd 02_per_base_sequence_quality.Rmd 03_per_tile_sequence_quality.Rmd 04_per_sequence_quality_score.Rmd 05_per_base_sequence_content.Rmd 06_per_sequence_gc_content.Rmd 07_per_base_n_content.Rmd 08_sequence_length_distribution.Rmd 09_sequence_duplication_levels.Rmd 10_adapter_content.Rmd 11_kmer_content.Rmd _site.yml fastqc_report.Rmd fastqc_site.xml fastqc_site_render.R index.Rmd
diffstat 6 files changed, 714 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/01_evaluation_overview.Rmd	Tue Feb 27 00:39:56 2018 -0500
@@ -0,0 +1,81 @@
+
+
+
+# Run FastQC
+
+```{bash eval=TRUE,echo=FALSE}
+cd ${X_d}
+cp ${X_r} ${X_d}/read_1.fq
+cp ${X_R} ${X_d}/read_2.fq
+
+mkdir -p read_1 read_2
+cat >temp.sh <<EOL
+fastqc \\
+  -q \\
+  -c ${X_c} \\
+  -l ${X_l} \\
+  ${X_d}/read_1.fq > /dev/null 2>&1
+  
+fastqc \\
+  -q \\
+  -c ${X_c} \\
+  -l ${X_l} \\
+  ${X_d}/read_2.fq > /dev/null 2>&1
+EOL
+
+grep -v None temp.sh > fastqc.sh
+
+# run fastqc
+sh fastqc.sh
+
+# unzip outputs
+unzip -q read_1_fastqc.zip
+unzip -q read_2_fastqc.zip
+```
+
+```{r}
+# display fastqc job script
+fastqc_sh = paste0(opt$X_d, '/fastqc.sh')
+tags$code(tags$pre(readChar(fastqc_sh, file.info(fastqc_sh)$size )))
+```
+
+# Raw FastQC reports
+
+## Before trimming
+```{r eval=TRUE}
+ori_html = tags$a(href = 'read_1_fastqc/fastqc_report.html', opt$X_n)
+ori_fastqc_data = tags$a(href = 'read_1_fastqc/fastqc_data.txt', 'fastqc_data.txt')
+ori_summary = tags$a(href = 'read_1_fastqc/summary.txt', 'summary.txt')
+tags$ul(
+    tags$li(ori_html),
+    tags$li(ori_fastqc_data),
+    tags$li(ori_summary)
+  )
+```
+
+## After trimming
+```{r eval=TRUE}
+ori_html = tags$a(href = 'read_2_fastqc/fastqc_report.html', opt$X_n)
+ori_fastqc_data = tags$a(href = 'read_2_fastqc/fastqc_data.txt', 'fastqc_data.txt')
+ori_summary = tags$a(href = 'read_2_fastqc/summary.txt', 'summary.txt')
+tags$ul(
+    tags$li(ori_html),
+    tags$li(ori_fastqc_data),
+    tags$li(ori_summary)
+  )
+```
+
+
+# Fastqc Output Visualization
+
+## Overview
+
+```{r eval=TRUE}
+read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 2:1]
+read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 1]
+combined_summary = cbind(read_1_summary, read_2_summary)
+names(combined_summary) = c('MODULE', paste0(opt$X_n, '(before)'), paste0(opt$X_N, '(after)'))
+combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
+combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
+knitr::kable(combined_summary)
+```
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/_site.yml	Tue Feb 27 00:39:56 2018 -0500
@@ -0,0 +1,37 @@
+name: "FastQC Website"
+output_dir: opt$X_d
+navbar:
+    title: "FastQC"
+    type: inverse
+    left:
+        - text: "Home"
+          icon: fa-home
+          href: index.html
+        - text: "Evaluation Overview"
+          href: x01_evaluation_overview.html
+        - text: "Evaluation by data module"
+#          menu:
+#            - text: "Per Base Sequence Quality"
+#              href: x02_per_base_sequence_quality.html
+#            - text: "Per Tile Sequence Quality"
+#              href: x03_per_tile_sequence_quality.html
+#            - text: "Per Sequence Quality Score"
+#              href: x04_per_sequence_quality_score.html
+#            - text: "Per Base Sequence Content"
+#              href: x05_per_base_sequence_content.html
+#            - text: "Per Sequence GC Content"
+#              href: x06_per_sequence_gc_content.html
+#            - text: "Per Base N Content"
+#              href: x07_per_base_n_content.html
+#            - text: "Sequence Length Distribution"
+#              href: x08_sequence_length_distribution.html
+#            - text: "Sequence Duplication Levels"
+#              href: x09_sequence_duplication_levels.html
+#            - text: "Adapter Content"
+#              href: x10_adapter_content.html
+#            - text: "Kmer Content"
+#              href: x11_kmer_content.html
+output:
+  html_document:
+    theme: cosmo
+    highlight: textmate
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqc_report.Rmd	Tue Feb 27 00:39:56 2018 -0500
@@ -0,0 +1,419 @@
+---
+title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)'
+output:
+    html_document:
+      number_sections: true
+      toc: true
+      theme: cosmo
+      highlight: tango
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(
+  echo = as.logical(opt$X_e),
+  error = TRUE,
+  eval = TRUE
+)
+```
+
+
+```{bash eval=TRUE,echo=FALSE}
+#create extra file directory
+mkdir -p ${X_d}
+```
+
+# Run FastQC
+
+```{bash eval=TRUE,echo=FALSE}
+cd ${X_d}
+cp ${X_r} ${X_d}/read_1.fq
+cp ${X_R} ${X_d}/read_2.fq
+
+mkdir -p read_1 read_2
+cat >temp.sh <<EOL
+fastqc \\
+  -q \\
+  -c ${X_c} \\
+  -l ${X_l} \\
+  ${X_d}/read_1.fq > /dev/null 2>&1
+  
+fastqc \\
+  -q \\
+  -c ${X_c} \\
+  -l ${X_l} \\
+  ${X_d}/read_2.fq > /dev/null 2>&1
+EOL
+
+grep -v None temp.sh > fastqc.sh
+
+# run fastqc
+sh fastqc.sh
+
+# unzip outputs
+unzip -q read_1_fastqc.zip
+unzip -q read_2_fastqc.zip
+```
+
+```{r}
+# display fastqc job script
+fastqc_sh = paste0(opt$X_d, '/fastqc.sh')
+tags$code(tags$pre(readChar(fastqc_sh, file.info(fastqc_sh)$size )))
+```
+
+# Raw FastQC reports
+
+## Before trimming
+```{r eval=TRUE}
+ori_html = tags$a(href = 'read_1_fastqc/fastqc_report.html', opt$X_n)
+ori_fastqc_data = tags$a(href = 'read_1_fastqc/fastqc_data.txt', 'fastqc_data.txt')
+ori_summary = tags$a(href = 'read_1_fastqc/summary.txt', 'summary.txt')
+tags$ul(
+    tags$li(ori_html),
+    tags$li(ori_fastqc_data),
+    tags$li(ori_summary)
+  )
+```
+
+## After trimming
+```{r eval=TRUE}
+ori_html = tags$a(href = 'read_2_fastqc/fastqc_report.html', opt$X_n)
+ori_fastqc_data = tags$a(href = 'read_2_fastqc/fastqc_data.txt', 'fastqc_data.txt')
+ori_summary = tags$a(href = 'read_2_fastqc/summary.txt', 'summary.txt')
+tags$ul(
+    tags$li(ori_html),
+    tags$li(ori_fastqc_data),
+    tags$li(ori_summary)
+  )
+```
+
+
+# Fastqc Output Visualization
+
+## Overview
+
+```{r eval=TRUE}
+read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 2:1]
+read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), header = FALSE, sep = '\t')[, 1]
+combined_summary = cbind(read_1_summary, read_2_summary)
+names(combined_summary) = c('MODULE', paste0(opt$X_n, '(before)'), paste0(opt$X_N, '(after)'))
+combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)'
+combined_summary[combined_summary == 'WARN'] = 'WARN (!)'
+knitr::kable(combined_summary)
+```
+
+## Visualization by data module {.tabset}
+
+* Define a function to extract outputs for each module from fastqc output
+
+```{r 'function definition'}
+extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") {
+  f = readLines(fastqc_data)
+  start_line = grep(module_name, f)
+  end_module_lines = grep('END_MODULE', f)
+  end_line = end_module_lines[which(end_module_lines > start_line)[1]]
+  module_data = f[(start_line+1):(end_line-1)]
+  writeLines(module_data, '/tmp/temp.txt')
+  read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char)
+}
+```
+
+### Per base sequence quality
+
+```{r 'per base sequence quality', fig.width=10}
+## reads 1
+pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality')
+pbsq_1$id = 1:length(pbsq_1$X.Base)
+pbsq_1$trim = 'before'
+
+## reads 2
+pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality')
+pbsq_2$id = 1:length(pbsq_2$X.Base)
+pbsq_2$trim = 'after'
+
+comb_pbsq = rbind(pbsq_1, pbsq_2)
+comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim)
+
+p = ggplot(data = comb_pbsq) +
+  geom_boxplot(mapping = aes(x = id, 
+                             lower = Lower.Quartile, 
+                             upper = Upper.Quartile, 
+                             middle = Median, 
+                             ymin = X10th.Percentile, 
+                             ymax = X90th.Percentile,
+                             fill = "yellow"),
+               stat = 'identity') +
+  geom_line(mapping = aes(x = id, y = Mean, color = "red")) +
+  scale_x_continuous(breaks = pbsq_2$id, labels = pbsq_2$X.Base) +
+  scale_fill_identity() +
+  scale_color_identity() + 
+  ylim(0, max(comb_pbsq$Upper.Quartile) + 5) +
+  xlab('Position in read (bp)') +
+  facet_grid(. ~ trim) +
+  theme(axis.text.x = element_text(angle=45))
+p
+
+```
+
+### Per tile sequence quality
+
+```{r 'per tile sequence quality', fig.width=10}
+## check if 'per tile sequence quality' module exits or not
+check_ptsq = grep('Per tile sequence quality', readLines(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt')))
+if (length(check_ptsq) > 0) {
+    ## reads 1
+  ptsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per tile sequence quality')
+  ptsq_1$trim = 'before'
+  
+  ## reads 2
+  ptsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per tile sequence quality')
+  ptsq_2$trim = 'after'
+  
+  comb_ptsq = rbind(ptsq_1, ptsq_2)
+  comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim)
+  comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base)
+  
+  # convert integers to charaters
+  comb_ptsq$Tile = as.character(comb_ptsq$X.Tile)
+  
+  p = ggplot(data = comb_ptsq, aes(x = Base, y = Tile, fill = Mean)) +
+    geom_raster() + 
+    facet_grid(. ~ trim) + 
+    xlab('Position in read (bp)') + 
+    ylab('') +
+    theme(axis.text.x = element_text(angle=45))
+  ggplotly(p)
+} else {
+  print('No "per tile sequence quality" data')
+}
+
+
+```
+
+### Per sequence quality score
+
+```{r 'Per sequence quality score', fig.width=10}
+## reads 1
+psqs_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence quality scores')
+psqs_1$trim = 'before'
+
+## reads 2
+psqs_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence quality scores')
+psqs_2$trim = 'after'
+
+comb_psqs = rbind(psqs_1, psqs_2)
+comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim)
+
+p = ggplot(data = comb_psqs, aes(x = X.Quality, y = Count)) + 
+  geom_line(color = 'red') + 
+  facet_grid(. ~ trim) + 
+  xlim(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality)) + 
+  xlab('Mean Sequence Qaulity (Phred Score)') + 
+  ylab('')
+ggplotly(p)
+```
+
+
+### Per base sequence content
+
+```{r 'Per base sequence content', fig.width=10}
+## reads 1
+pbsc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence content')
+pbsc_1$id = 1:length(pbsc_1$X.Base)
+
+melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id'))
+melt_pbsc_1$trim = 'before'
+
+
+## reads 2
+pbsc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence content')
+pbsc_2$id = 1:length(pbsc_2$X.Base)
+
+melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id'))
+melt_pbsc_2$trim = 'after'
+
+comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2)
+comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim)
+
+p = ggplot(data = comb_pbsc, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_pbsc$id), max(comb_pbsc$id)) + 
+  ylim(0, 100) +
+  xlab('Position in read (bp)') +
+  ylab('')
+ggplotly(p)
+```
+
+### Per sequence GC content
+
+```{r 'Per sequence GC content', fig.width=10}
+## reads 1
+psGCc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence GC content')
+psGCc_1$trim = 'before'
+
+## reads 2
+psGCc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence GC content')
+psGCc_2$trim = 'after'
+
+comb_psGCc = rbind(psGCc_1, psGCc_2)
+comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim)
+
+p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) +
+  geom_line(color = 'red') +
+  facet_grid(. ~ trim) +
+  xlab('Mean Sequence Qaulity (Phred Score)') +
+  ylab('')
+ggplotly(p)
+```
+
+
+### Per base N content
+
+```{r 'Per base N content', fig.width=10}
+## reads 1
+pbNc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base N content')
+pbNc_1$id = 1:length(pbNc_1$X.Base)
+pbNc_1$trim = 'before'
+
+## reads 2
+pbNc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base N content')
+pbNc_2$id = 1:length(pbNc_2$X.Base)
+pbNc_2$trim = 'after'
+
+comb_pbNc = rbind(pbNc_1, pbNc_2)
+comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim)
+
+p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) + 
+  facet_grid(. ~ trim) +
+  ylim(0, 1) + 
+  xlab('N-Count') +
+  ylab('') + 
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
+
+
+### Sequence Length Distribution
+
+```{r 'Sequence Length Distribution', fig.width=10}
+## reads 1
+sld_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Length Distribution')
+sld_1$id = 1:length(sld_1$X.Length)
+sld_1$trim = 'before'
+
+## reads 2
+sld_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Length Distribution')
+sld_2$id = 1:length(sld_2$X.Length)
+sld_2$trim = 'after'
+
+comb_sld = rbind(sld_1, sld_2)
+comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim)
+
+p = ggplot(data = comb_sld, aes(x = id, y = Count)) +
+  geom_line(color = 'red') +
+  scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) + 
+  facet_grid(. ~ trim) +
+  xlab('Sequence Length (bp)') +
+  ylab('') + 
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
+
+### Sequence Duplication Levels
+
+```{r 'Sequence Duplication Levels', fig.width=10}
+## reads 1
+sdl_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_1$id = 1:length(sdl_1$Duplication_Level)
+
+melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id'))
+melt_sdl_1$trim = 'before'
+
+
+## reads 2
+sdl_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#')
+names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total')
+sdl_2$id = 1:length(sdl_2$Duplication_Level)
+
+melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id'))
+melt_sdl_2$trim = 'after'
+
+comb_sdl = rbind(melt_sdl_1, melt_sdl_2)
+comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim)
+
+p = ggplot(data = comb_sdl, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) +
+  facet_grid(. ~ trim) +
+  xlab('Sequence Duplication Level') +
+  ylab('') + 
+  theme(axis.text.x = element_text(angle=45))
+ggplotly(p)
+```
+
+### Adapter Content
+
+```{r 'Adapter Content', fig.width=10}
+## reads 1
+ac_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Adapter Content')
+ac_1$id = 1:length(ac_1$X.Position)
+
+melt_ac_1 = melt(ac_1, id=c('X.Position', 'id'))
+melt_ac_1$trim = 'before'
+
+## reads 2
+ac_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Adapter Content')
+ac_2$id = 1:length(ac_2$X.Position)
+
+melt_ac_2 = melt(ac_2, id=c('X.Position', 'id'))
+melt_ac_2$trim = 'after'
+
+comb_ac = rbind(melt_ac_1, melt_ac_2)
+comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim)
+
+p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) +
+  geom_line() +
+  facet_grid(. ~ trim) +
+  xlim(min(comb_ac$id), max(comb_ac$id)) + 
+  ylim(0, 1) +
+  xlab('Position in read (bp)') +
+  ylab('')
+ggplotly(p)
+```
+
+### Kmer Content {.tabset}
+
+#### Before
+
+```{r 'Kmer Content (before)', fig.width=10}
+kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content')
+knitr::kable(kc_1)
+```
+
+#### After
+```{r 'Kmer Content (after)', fig.width=10}
+kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content')
+knitr::kable(kc_2)
+```
+
+
+# Session Info
+
+```{r 'session info'}
+sessionInfo()
+```
+
+# References
+
+* Bioinformatics, Babraham (2014). FastQC.
+
+* Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6.
+
+* Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29.
+
+* Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. [Link]
+
+* Wickham, H. (2016). ggplot2: elegant graphics for data analysis. Springer. Chicago
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqc_site.xml	Tue Feb 27 00:39:56 2018 -0500
@@ -0,0 +1,109 @@
+<tool name="Fastqc site" id='aurora_fastqc_site' version="2.1.0">
+    <description>
+        Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads
+        files.
+    </description>
+    <requirements>
+        <requirement type="package" version="1.15.0.6-0">pandoc</requirement>
+        <requirement type="package" version="1.20.0">r-getopt</requirement>
+        <requirement type="package" version="1.3">r-rmarkdown</requirement>
+        <requirement type="package" version="1.8.4">r-plyr</requirement>
+        <requirement type="package" version="1.1.0">r-stringr</requirement>
+        <requirement type="package" version="0.2">r-dt</requirement>
+        <requirement type="package" version="1.4.2">r-reshape2</requirement>
+        <requirement type="package" version="4.5.6">r-plotly</requirement>
+        <requirement type="package" version="0.3.5">r-htmltools</requirement>
+        <requirement type="package" version="0.11.5">fastqc</requirement>
+        <requirement type="package" version="0.9.10">xorg-libxrender</requirement>
+        <requirement type="package" version="1.2.2">xorg-libsm</requirement>
+        <requirement type="package" version="6.0">unzip</requirement>
+    </requirements>
+    <stdio>
+        <regex match="XXX" source="stderr" level="warning"
+               description="Check the warnings_and_errors.txt file for more details."/>
+    </stdio>
+    <command><![CDATA[
+        
+
+        Rscript '${__tool_directory__}/fastqc_report_render.R'
+            -e $echo
+            -r $reads_1
+            -n '$reads_1.name'
+            -R $reads_2
+            -N '$reads_2.name'
+            -c $contaminants
+            -l $limits
+
+		    -o $report
+		    -d $report.files_path
+		    -s $sink_message
+
+		    -p '${__tool_directory__}/fastqc_site.Rmd'
+        
+    ]]></command>
+    <inputs>
+        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_1" type="data" optional="false"
+               label="Short reads before trimming"
+               help="Short reads data from history. This could be reads before trimming."/>
+        <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data" label="Short reads after trimming"
+               help="Short reads data from history. This could be reads after trimming."/>
+        <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list"
+               help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly                      searched against the library. The file must contain sets of named adapters                      in the form name[tab]sequence.  Lines prefixed with a hash will be ignored."/>
+        <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file"
+               help="Specifies a non-default file which contains a set of criteria                     which will be used to determine the warn/error limits for the                     various modules.  This file can also be used to selectively                     remove some modules from the output all together.  The format                     needs to mirror the default limits.txt file found in the                     Configuration folder."/>
+        <param type="boolean" name="echo" truevalue="TRUE" falsevalue="FALSE" checked="false"
+               label="Display analysis code in report?"/>
+    </inputs>
+    <outputs>
+        <data format="html" name="report" label="fastqc site"/>
+        <data format="txt" name="sink_message" label="Warnings and Errors" from_work_dir="warnings_and_errors.txt"/>
+    </outputs>
+    <citations>
+        <citation type="bibtex"><![CDATA[
+            @misc{bioinformatics2014fastqc,
+            title={FastQC},
+            author={Bioinformatics, Babraham},
+            year={2014}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @article{allaire2016rmarkdown,
+            title={rmarkdown: Dynamic Documents for R, 2016},
+            author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff
+            and Wickham, Hadley and Atkins, Aron and Hyndman, Rob},
+            journal={R package version 0.9},
+            volume={6},
+            year={2016}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @book{xie2015dynamic,
+            title={Dynamic Documents with R and knitr},
+            author={Xie, Yihui},
+            volume={29},
+            year={2015},
+            publisher={CRC Press}
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @misc{plotly2017,
+            title = {plotly: Create Interactive Web Graphics via 'plotly.js'},
+            author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and
+            Marianne Corvellec and Pedro Despouy},
+            year = {2017},
+            note = {R package version 4.6.0},
+            url = {https://CRAN.R-project.org/package=plotly},
+            }
+        ]]></citation>
+        <citation type="bibtex"><![CDATA[
+            @Book{ggplot22016,
+            author = {Hadley Wickham},
+            title = {ggplot2: Elegant Graphics for Data Analysis},
+            publisher = {Springer-Verlag New York},
+            year = {2009},
+            isbn = {978-0-387-98140-6},
+            url = {http://ggplot2.org},
+            }
+        ]]></citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fastqc_site_render.R	Tue Feb 27 00:39:56 2018 -0500
@@ -0,0 +1,46 @@
+##============ Sink warnings and errors to a file ==============
+## use the sink() function to wrap all code within it.
+##==============================================================
+zz = file('warnings_and_errors.txt')
+sink(zz)
+sink(zz, type = 'message')
+  
+library(getopt)
+library(rmarkdown)
+library(htmltools)
+library(plyr)
+library(dplyr)
+library(stringr)
+library(DT)
+library(reshape2)
+library(plotly)
+options(stringsAsFactors = FALSE)
+
+# getopt_specification_matrix(extract_short_flags('fastqc_report.xml')) %>%
+#   write.table(file = 'spec.txt', sep = ',', row.names = FALSE, col.names = TRUE, quote = FALSE)
+
+spec_matrix = as.matrix(
+  data.frame(stringsAsFactors=FALSE,
+              long_flags = c("X_e", "X_r", "X_n", "X_R", "X_N", "X_c", "X_l",
+                             "X_o", "X_d", "X_s", "X_p"),
+             short_flags = c("e", "r", "n", "R", "N", "c", "l", "o", "d", "s",
+                             "p"),
+     argument_mask_flags = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L),
+         data_type_flags = c("character", "character", "character", "character",
+                             "character", "character", "character",
+                             "character", "character", "character", "character")
+  )
+)
+# get arguments into R
+opt = getopt(spec_matrix)
+# define system environment variables
+do.call(Sys.setenv, opt[-1])
+
+# render website
+# render(input = opt$X_p, output_file = opt$X_o)
+render_site()
+file.copy(paste0(opt$X_d, '/index.html'), opt$X_o, recursive = TRUE)
+
+##--------end of code rendering .Rmd templates----------------
+sink()
+##=========== End of sinking output=============================
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/index.Rmd	Tue Feb 27 00:39:56 2018 -0500
@@ -0,0 +1,22 @@
+---
+title: "FastQC Report"
+output: html_document
+---
+
+```{r setup, include=FALSE, warning=FALSE, message=FALSE}
+knitr::opts_chunk$set(echo = TRUE, error = TRUE)
+```
+
+
+
+## References
+
+* Bioinformatics, Babraham (2014). FastQC.
+
+* Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff and Wickham, Hadley and Atkins, Aron and Hyndman, Rob (2016). rmarkdown: Dynamic Documents for R, 2016. In R package version 0.9, 6.
+
+* Xie, Yihui (2015). Dynamic Documents with R and knitr, CRC Press, Vol.29.
+
+* Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and Marianne Corvellec and Pedro Despouy (2017). plotly: Create Interactive Web Graphics via 'plotly.js'. R package version 4.6.0. [Link]
+
+* Wickham, H. (2016). ggplot2: elegant graphics for data analysis. Springer. Chicago