Mercurial > repos > mingchen0919 > aurora_fastqc
changeset 3:85351cdd4f19 draft
planemo upload commit 61448862dc9016969191967aeefb7f7acce1e1f9
author | mingchen0919 |
---|---|
date | Mon, 30 Apr 2018 16:40:21 -0400 |
parents | 5f00aef42904 |
children | df6f86eb057e |
files | build-and-run-job-scripts.sh command-line-arguments.csv expose-outputs-to-galaxy-history.sh expose-outputs.sh fastqc.Rmd fastqc.xml fastqc_render.R getopt_specification.csv helper.R rmarkdown_report.Rmd rmarkdown_report.xml rmarkdown_report_render.R shell-script-template.sh |
diffstat | 13 files changed, 723 insertions(+), 729 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build-and-run-job-scripts.sh Mon Apr 30 16:40:21 2018 -0400 @@ -0,0 +1,33 @@ +# run SHELL_SCRIPT within tool outputs directory +cd ${REPORT_FILES_PATH} + +# build job-script.sh +cat >temp.sh <<EOF + +cp ${X_r} read_1.fq +cp ${X_R} read_2.fq + +fastqc \\ + -q \\ + -c ${X_c} \\ + -l ${X_l} \\ + ${X_d}/read_1.fq > /dev/null 2>&1 + +fastqc \\ + -q \\ + -c ${X_c} \\ + -l ${X_l} \\ + ${X_d}/read_2.fq > /dev/null 2>&1 + +EOF + +grep -v None temp.sh > job-1-script.sh +rm temp.sh + +# run SHELL_SCRIPT +sh job-1-script.sh + + +# unzip outputs +unzip -q read_1_fastqc.zip +unzip -q read_2_fastqc.zip \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/command-line-arguments.csv Mon Apr 30 16:40:21 2018 -0400 @@ -0,0 +1,9 @@ +short flag,argument mask,data type,variable name +o,1,character,report +d,1,character,report.files_path +r,1,character,reads_1 +n,1,character,reads_1.name +R,1,character,reads_2 +N,1,character,reads_2.name +c,1,character,contaminants +l,1,character,limits \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/expose-outputs-to-galaxy-history.sh Mon Apr 30 16:40:21 2018 -0400 @@ -0,0 +1,5 @@ +# change directory to tool outputs directory +cd ${REPORT_FILES_PATH} + +# copy outputs from tool outputs directory to corresponding galaxy output path +cp fastqc.html ${REPORT} \ No newline at end of file
--- a/expose-outputs.sh Wed Apr 25 15:55:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,5 +0,0 @@ -# change directory to tool outputs directory -cd ${REPORT_FILES_PATH} - -# copy outputs from tool outputs directory to corresponding galaxy output path -cp fastqc.html ${REPORT} \ No newline at end of file
--- a/fastqc.Rmd Wed Apr 25 15:55:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,416 +0,0 @@ ---- -title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)' -output: - html_document: - highlight: pygments ---- - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_knit$set(progress = FALSE) -knitr::opts_chunk$set(error = TRUE, echo = FALSE) -``` - -```{r, echo=FALSE} -# to make the css theme to work, <link></link> tags cannot be added directly -# as <script></script> tags as below. -# it has to be added using a code chunk with the htmltool functions!!! -css_link = tags$link() -css_link$attribs = list(rel="stylesheet", href="vakata-jstree-3.3.5/dist/themes/default/style.min.css") -css_link -``` - -```{r, eval=FALSE, echo=FALSE} -# this code chunk is purely for adding comments -# below is to add jQuery and jstree javascripts -``` -<script src="https://code.jquery.com/jquery-3.3.1.min.js"></script> -<script src="vakata-jstree-3.3.5/dist/jstree.min.js"></script> - -```{r, eval=FALSE, echo=FALSE} -# this code chunk is purely for adding comments -# javascript code below is to build the file tree interface -# see this for how to implement opening hyperlink: https://stackoverflow.com/questions/18611317/how-to-get-i-get-leaf-nodes-in-jstree-to-open-their-hyperlink-when-clicked-when -``` -<script> - $(function () { - // create an instance when the DOM is ready - $('#jstree').jstree().bind("select_node.jstree", function (e, data) { - window.open( data.node.a_attr.href, data.node.a_attr.target ) - }); - }); -</script> - -```{css} -pre code, pre, code { - white-space: pre !important; - overflow-x: scroll !important; - word-break: keep-all !important; - word-wrap: initial !important; -} -``` ----------------------- - - - - - -# Run FastQC - -```{bash} -sh ${TOOL_INSTALL_DIR}/shell-script-template.sh -``` - -```{r echo=FALSE,results='asis'} -# display fastqc job script -cat('```bash\n') -cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/fastqc.sh')), sep = '\n') -cat('\n```') -``` - -# Fastqc Output Visualization - -## Overview - -```{r eval=TRUE} -read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), - stringsAsFactors = FALSE, - header = FALSE, sep = '\t')[, 2:1] -read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), - stringsAsFactors = FALSE, - header = FALSE, sep = '\t')[, 1] -combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE) -names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming') -combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)' -combined_summary[combined_summary == 'WARN'] = 'WARN (!)' -knitr::kable(combined_summary) -``` - -```{r 'function definition', echo=FALSE} -extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { - f = readLines(fastqc_data) - start_line = grep(module_name, f) - end_module_lines = grep('END_MODULE', f) - end_line = end_module_lines[which(end_module_lines > start_line)[1]] - module_data = f[(start_line+1):(end_line-1)] - writeLines(module_data, '/tmp/temp.txt') - read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char) -} -``` - - - -### Per base sequence quality - -```{r 'per base sequence quality', fig.width=10} -## reads 1 -pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality') -pbsq_1$id = 1:length(pbsq_1$X.Base) -pbsq_1$trim = 'before' - -## reads 2 -pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality') -pbsq_2$id = 1:length(pbsq_2$X.Base) -pbsq_2$trim = 'after' - -comb_pbsq = rbind(pbsq_1, pbsq_2) -comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) - -p = ggplot(data = comb_pbsq) + - geom_boxplot(mapping = aes(x = id, - lower = Lower.Quartile, - upper = Upper.Quartile, - middle = Median, - ymin = X10th.Percentile, - ymax = X90th.Percentile, - fill = "yellow"), - stat = 'identity') + - geom_line(mapping = aes(x = id, y = Mean, color = "red")) + - scale_x_continuous(name = 'Position in read (bp)', breaks = pbsq_2$id, labels = pbsq_2$X.Base) + - scale_y_continuous(limits = c(0, max(comb_pbsq$Upper.Quartile) + 5)) + - scale_fill_identity() + - scale_color_identity() + - facet_grid(. ~ trim) + - theme(axis.text.x = element_text(size = 5), - panel.background = element_rect(fill = NA), - panel.grid.major.y = element_line(color = 'blue', size = 0.1)) -p -``` - - -### Per tile sequence quality - -```{r 'per tile sequence quality', fig.width=10} -## check if 'per tile sequence quality' module exits or not -check_ptsq = grep('Per tile sequence quality', readLines(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'))) -if (length(check_ptsq) > 0) { - ## reads 1 - ptsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per tile sequence quality') - ptsq_1$trim = 'before' - - ## reads 2 - ptsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per tile sequence quality') - ptsq_2$trim = 'after' - - comb_ptsq = rbind(ptsq_1, ptsq_2) - comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim) - comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base) - - # convert integers to charaters - # comb_ptsq$Tile = as.character(comb_ptsq$X.Tile) - - p = ggplot(data = comb_ptsq) + - geom_raster(mapping = aes(x = Base, y = X.Tile, fill = Mean)) + - facet_grid(. ~ trim) + - scale_x_discrete(name = "Position in read (bp)") + - scale_y_continuous(name = "") + - scale_fill_gradient(low = "blue", high = "red") + - theme(axis.text.x = element_text(size = 5, angle = 90), - axis.text.y = element_text(size = 5), - panel.background = element_rect(fill = NA)) - ggplotly(p) -} else { - print('No "per tile sequence quality" data') -} -``` - -### Per sequence quality score - -```{r 'Per sequence quality score', fig.width=10} -## reads 1 -psqs_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence quality scores') -psqs_1$trim = 'before' - -## reads 2 -psqs_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence quality scores') -psqs_2$trim = 'after' - -comb_psqs = rbind(psqs_1, psqs_2) -comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim) - -p = ggplot(data = comb_psqs) + - geom_line(mapping = aes(x = X.Quality, y = Count), color = 'red') + - facet_grid(. ~ trim) + - scale_x_continuous(name = 'Mean Sequence Qaulity (Phred Score)', - limits = c(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality))) + - scale_y_continuous(name = '') + - theme(panel.background = element_rect(fill = NA), - axis.line = element_line(), - panel.grid.major.y = element_line(color = 'blue', size = 0.1)) -p -``` - -### Per base sequence content - -```{r 'Per base sequence content', fig.width=10} -## reads 1 -pbsc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence content') -pbsc_1$id = 1:length(pbsc_1$X.Base) - -melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id')) -melt_pbsc_1$trim = 'before' - - -## reads 2 -pbsc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence content') -pbsc_2$id = 1:length(pbsc_2$X.Base) - -melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id')) -melt_pbsc_2$trim = 'after' - -comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2) -comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim) - -p = ggplot(data = comb_pbsc) + - geom_line(mapping = aes(x = id, y = value, color = variable)) + - facet_grid(. ~ trim) + - xlim(min(comb_pbsc$id), max(comb_pbsc$id)) + - ylim(0, 100) + - xlab('Position in read (bp)') + - ylab('') + - scale_color_discrete(name = '') + - theme_classic() -ggplotly(p) -``` - -### Per sequence GC content - -```{r 'Per sequence GC content', fig.width=10} -## reads 1 -psGCc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence GC content') -psGCc_1$trim = 'before' - -## reads 2 -psGCc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence GC content') -psGCc_2$trim = 'after' - -comb_psGCc = rbind(psGCc_1, psGCc_2) -comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim) - -p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) + - geom_line(color = 'red') + - facet_grid(. ~ trim) + - xlab('Mean Sequence Qaulity (Phred Score)') + - ylab('') + - scale_color_discrete(name = '') + - theme_classic() -ggplotly(p) -``` - - -### Per base N content - -```{r 'Per base N content', fig.width=10} -## reads 1 -pbNc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base N content') -pbNc_1$id = 1:length(pbNc_1$X.Base) -pbNc_1$trim = 'before' - -## reads 2 -pbNc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base N content') -pbNc_2$id = 1:length(pbNc_2$X.Base) -pbNc_2$trim = 'after' - -comb_pbNc = rbind(pbNc_1, pbNc_2) -comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim) - -p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) + - geom_line(color = 'red') + - scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) + - facet_grid(. ~ trim) + - ylim(0, 1) + - xlab('N-Count') + - ylab('') + - theme(axis.text.x = element_text(size = 5), - axis.line = element_line(), - panel.background = element_rect(fill = NA)) -ggplotly(p) -``` - - -### Sequence Length Distribution - -```{r 'Sequence Length Distribution', fig.width=10} -## reads 1 -sld_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Length Distribution') -sld_1$id = 1:length(sld_1$X.Length) -sld_1$trim = 'before' - -## reads 2 -sld_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Length Distribution') -sld_2$id = 1:length(sld_2$X.Length) -sld_2$trim = 'after' - -comb_sld = rbind(sld_1, sld_2) -comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim) - -p = ggplot(data = comb_sld, aes(x = id, y = Count)) + - geom_line(color = 'red') + - scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) + - facet_grid(. ~ trim) + - xlab('Sequence Length (bp)') + - ylab('') + - theme(axis.text.x = element_text(size = 5), - panel.background = element_rect(fill = NA), - axis.line = element_line(), - plot.margin = margin(2,2,2,10) ) -ggplotly(p) -``` - -### Sequence Duplication Levels - -```{r 'Sequence Duplication Levels', fig.width=10} -## reads 1 -sdl_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#') -names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total') -sdl_1$id = 1:length(sdl_1$Duplication_Level) - -melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id')) -melt_sdl_1$trim = 'before' - - -## reads 2 -sdl_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#') -names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total') -sdl_2$id = 1:length(sdl_2$Duplication_Level) - -melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id')) -melt_sdl_2$trim = 'after' - -comb_sdl = rbind(melt_sdl_1, melt_sdl_2) -comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim) - -p = ggplot(data = comb_sdl) + - geom_line(mapping = aes(x = id, y = value, color = variable)) + - scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) + - facet_grid(. ~ trim) + - xlab('Sequence Duplication Level') + - ylab('') + - scale_color_discrete(name = '') + - theme(axis.text.x = element_text(size = 5), - panel.background = element_rect(fill = NA), - axis.line = element_line()) -p -``` - -### Adapter Content - -```{r 'Adapter Content', fig.width=10} -## reads 1 -ac_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Adapter Content') -ac_1$id = 1:length(ac_1$X.Position) - -melt_ac_1 = melt(ac_1, id=c('X.Position', 'id')) -melt_ac_1$trim = 'before' - -## reads 2 -ac_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Adapter Content') -ac_2$id = 1:length(ac_2$X.Position) - -melt_ac_2 = melt(ac_2, id=c('X.Position', 'id')) -melt_ac_2$trim = 'after' - -comb_ac = rbind(melt_ac_1, melt_ac_2) -comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim) - -p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) + - geom_line() + - facet_grid(. ~ trim) + - xlim(min(comb_ac$id), max(comb_ac$id)) + - ylim(0, 1) + - xlab('Position in read (bp)') + - ylab('') + - scale_color_discrete(name = '') + - theme(axis.text.x = element_text(size = 5), - panel.background = element_rect(fill = NA), - axis.line = element_line()) -ggplotly(p) -``` - -### Kmer Content {.tabset} - -#### Before - -```{r 'Kmer Content (before)', fig.width=10} -kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content') -knitr::kable(kc_1) -``` - -#### After -```{r 'Kmer Content (after)', fig.width=10} -kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content') -knitr::kable(kc_2) -``` - - - - ------------------------------------------ -## Output - -```{r, echo=FALSE} -# create a div container to store the file tree interface -tags$div( - id="jstree", - file_tree(Sys.getenv('REPORT_FILES_PATH')) -) -``` \ No newline at end of file
--- a/fastqc.xml Wed Apr 25 15:55:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,124 +0,0 @@ -<tool name="aurora_fastqc" id='aurora_fastqc' version="2.1.1"> - <description> - Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads - files. - </description> - <requirements> - <requirement type="package" version="1.15.0.6-0">pandoc</requirement> - <requirement type="package" version="1.20.0">r-getopt</requirement> - <requirement type="package" version="1.3">r-rmarkdown</requirement> - <requirement type="package" version="1.8.4">r-plyr</requirement> - <requirement type="package" version="1.1.0">r-stringr</requirement> - <requirement type="package" version="0.2">r-dt</requirement> - <requirement type="package" version="1.4.2">r-reshape2</requirement> - <requirement type="package" version="4.5.6">r-plotly</requirement> - <requirement type="package" version="0.3.5">r-htmltools</requirement> - <requirement type="package" version="0.11.5">fastqc</requirement> - <requirement type="package" version="0.9.10">xorg-libxrender</requirement> - <requirement type="package" version="1.2.2">xorg-libsm</requirement> - <requirement type="package" version="6.0">unzip</requirement> - </requirements> - <stdio> - <regex match="XXX" source="stderr" level="warning" - description="Check the warnings_and_errors.txt file for more details."/> - </stdio> - <command><![CDATA[ - - ######### each aurora tool generates a html file and have an files path directory associated with it. - mkdir -p $report.files_path && - - ######### three important paths: - ######### 1. path to tool installation directory - ######### 2. path to report html - ######### 3. path to files_path directory associated with the report output. - export TOOL_INSTALL_DIR='${__tool_directory__}' && - export REPORT='$report' && - export REPORT_FILES_PATH='$report.files_path' && - - ############ copy jstree directory cp -r ${__tool_directory__}/vakata-jstree-3.3.5 ./ && - - ############ create a hidden file to store r markdown rendering log - touch $report.files_path/.r_rendering.log.txt && - - Rscript '${__tool_directory__}/fastqc_render.R' - - -o $report - -d $report.files_path - - -r $reads_1 - -n '$reads_1.name' - -R $reads_2 - -N '$reads_2.name' - -c $contaminants - -l $limits - - ]]></command> - <inputs> - <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_1" type="data" optional="false" - label="Short reads before trimming" - help="Short reads data from history. This could be reads before trimming."/> - <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data" label="Short reads after trimming" - help="Short reads data from history. This could be reads after trimming."/> - <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" - help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly - searched against the library. The file must contain sets of named adapters in the form name[tab]sequence. - Lines prefixed with a hash will be ignored."/> - <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file" - help="Specifies a non-default file which contains a set of criteria which will be used to determine the - warn/error limits for the various modules. This file can also be used to selectively remove some modules - from the output all together. The format needs to mirror the default limits.txt file found in the - Configuration folder."/> - - </inputs> - <outputs> - <data format="html" name="report" label="${tool.name} on ${on_string}"/> - </outputs> - <citations> - <citation type="bibtex"><![CDATA[ - @misc{bioinformatics2014fastqc, - title={FastQC}, - author={Bioinformatics, Babraham}, - year={2014} - } - ]]></citation> - <citation type="bibtex"><![CDATA[ - @article{allaire2016rmarkdown, - title={rmarkdown: Dynamic Documents for R, 2016}, - author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff - and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, - journal={R package version 0.9}, - volume={6}, - year={2016} - } - ]]></citation> - <citation type="bibtex"><![CDATA[ - @book{xie2015dynamic, - title={Dynamic Documents with R and knitr}, - author={Xie, Yihui}, - volume={29}, - year={2015}, - publisher={CRC Press} - } - ]]></citation> - <citation type="bibtex"><![CDATA[ - @misc{plotly2017, - title = {plotly: Create Interactive Web Graphics via 'plotly.js'}, - author = {Carson Sievert and Chris Parmer and Toby Hocking and Scott Chamberlain and Karthik Ram and - Marianne Corvellec and Pedro Despouy}, - year = {2017}, - note = {R package version 4.6.0}, - url = {https://CRAN.R-project.org/package=plotly}, - } - ]]></citation> - <citation type="bibtex"><![CDATA[ - @Book{ggplot22016, - author = {Hadley Wickham}, - title = {ggplot2: Elegant Graphics for Data Analysis}, - publisher = {Springer-Verlag New York}, - year = {2009}, - isbn = {978-0-387-98140-6}, - url = {http://ggplot2.org}, - } - ]]></citation> - </citations> -</tool>
--- a/fastqc_render.R Wed Apr 25 15:55:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,57 +0,0 @@ -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file(paste0(Sys.getenv('REPORT_FILES_PATH'), '/.r_rendering.log.txt')) -sink(zz) -sink(zz, type = 'message') - -#============== preparation ==================================== -options(stringsAsFactors = FALSE) -# import libraries -#------------------------------------------------------------------ -# ADD MORE LIBRARIES HERE IF YOUR TOOL DEPENDS ON OTHER R LIBRARIES -#------------------------------------------------------------------ -library('getopt') -library('rmarkdown') -library('htmltools') -library(plyr) -library(dplyr) -library(stringr) -library(DT) -library(reshape2) -library(plotly) - -# load helper functions -source(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/helper.R')) -# import getopt specification matrix from a csv file -opt = getopt(getopt_specification_matrix('getopt_specification.csv', - tool_dir=Sys.getenv('TOOL_INSTALL_DIR'))) -# define environment variables for all input values. this is useful when we -# want to use input values by other programming language in r markdown -do.call(Sys.setenv, opt[-1]) -#=============================================================== - - -#======================== render Rmd files ========================= -# NOTICE: -# we should copy all rmarkdown files from tool install directory to REPORT_FILES_PATH directory. -# and render rmarkdown files in the REPORT_FILES_PATH directory. -file.copy(from = paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/vakata-jstree-3.3.5'), - to = Sys.getenv('REPORT_FILES_PATH'), recursive = TRUE) -system(command = 'cp -r ${TOOL_INSTALL_DIR}/*.Rmd ${REPORT_FILES_PATH}') - -#----------------BELOW IS WHERE YOU NEED TO CUSTOMIZE --------------------- -render(input = paste0(Sys.getenv('REPORT_FILES_PATH'), '/fastqc.Rmd')) -# add more lines below if there are more Rmd files to be rendered - -#=============================================================== - - -#============== expose outputs to galaxy history =============== -system(command = 'sh ${TOOL_INSTALL_DIR}/expose-outputs.sh') -#=============================================================== - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file
--- a/getopt_specification.csv Wed Apr 25 15:55:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,9 +0,0 @@ -short flag,argument mask,data type,variable name -o,1,character,report -d,1,character,report.files_path -r,1,character,reads_1 -n,1,character,reads_1.name -R,1,character,reads_2 -N,1,character,reads_2.name -c,1,character,contaminants -l,1,character,limits \ No newline at end of file
--- a/helper.R Wed Apr 25 15:55:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -#' \code{getopt_specification_matrix} returns a getopt specification matrix. -#' -#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data. -#' The first column are short flags, the second column are argument masks, the third column -#' is data types. The fourth column are variable names used in the tool XML. These three columns are required. -#' @param gtg_name the name of a running GTG. -getopt_specification_matrix = function(specification_file, - gtg_name = 'gtg', - tool_dir = Sys.getenv('TOOL_DIR')) { - df = read.csv( - paste0(tool_dir, '/', specification_file), - header = TRUE, - stringsAsFactors = FALSE - ) - # check if there are duplicated short flags - short_flags = df[, 1] - if (length(unique(short_flags)) < length(short_flags)) { - cat('----Duplicated short flags found ----\n') - cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n') - stop('Duplicated short flags are not allowed.') - } - - # use short flags to generate long flags - long_flags = paste0('X_', df[, 1]) - - # specification matrix - df2 = data.frame( - long_flags = long_flags, - short_flags = df[, 1], - argument_mask = df[, 2], - data_type = df[, 3] - ) - - as.matrix(df2) -} - - - -#' \code{file_tree} generate file tree of a directory in the format of HTML lists. -#' -#' @param dir the path to the directory for generating the file tree. -#' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files. -# define a recursive function to build html string of the file tree -file_tree = function(dir = '.') { - # get the OUTPUT_DIR folder data: dataset_NUMBER_files - report_files_path = Sys.getenv('REPORT_FILES_PATH') - output_dir = tail(strsplit(report_files_path, '/')[[1]], 1) - - files = list.files(path = dir, - recursive = FALSE, - full.names = TRUE) - # files also include directorys, need to remove directorys - files = files[!dir.exists(files)] - dirs = list.dirs(path = dir, - recursive = FALSE, - full.names = TRUE) - tags$ul({ - if (length(files) > 0) { - lapply(files, function(x) { - path_end = tail(strsplit(x, '/')[[1]], 1) - href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] - li_item = tags$li(tags$a(path_end, href = href_path)) - li_item$attribs = list('data-jstree' = '{"icon":"jstree-file"}') - li_item - }) - } - }, - { - if (length(dirs) > 0) { - lapply(dirs, function(x) { - path_end = tail(strsplit(x, '/')[[1]], 1) - # hide vakata-jstree-3.3.5 folder - if (path_end != 'vakata-jstree-3.3.5') { - # x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] - li_item = tags$li(path_end, file_tree(x)) - li_item$attribs = list('data-jstree' = '{"icon":"jstree-folder"}') - li_item - } - }) - } - }) -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_report.Rmd Mon Apr 30 16:40:21 2018 -0400 @@ -0,0 +1,404 @@ +--- +title: 'Short reads evaluation with [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/)' +output: + html_document: + highlight: pygments +--- + +```{css} +pre code, pre, code { + white-space: pre !important; + overflow-x: scroll !important; + word-break: keep-all !important; + word-wrap: initial !important; +} +``` + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_knit$set(progress = FALSE) +knitr::opts_chunk$set(error = TRUE, echo = FALSE) +``` + +```{r, echo=FALSE} +# to make the css theme to work, <link></link> tags cannot be added directly +# as <script></script> tags as below. +# it has to be added using a code chunk with the htmltool functions!!! +css_link = tags$link() +css_link$attribs = list(rel="stylesheet", href="vakata-jstree-3.3.5/dist/themes/default/style.min.css") +css_link +``` + +```{r, eval=FALSE, echo=FALSE} +# this code chunk is purely for adding comments +# below is to add jQuery and jstree javascripts +``` +<script src="https://code.jquery.com/jquery-3.3.1.min.js"></script> +<script src="vakata-jstree-3.3.5/dist/jstree.min.js"></script> + +```{r, eval=FALSE, echo=FALSE} +# this code chunk is purely for adding comments +# javascript code below is to build the file tree interface +# see this for how to implement opening hyperlink: https://stackoverflow.com/questions/18611317/how-to-get-i-get-leaf-nodes-in-jstree-to-open-their-hyperlink-when-clicked-when +``` +<script> + $(function () { + // create an instance when the DOM is ready + $('#jstree').jstree().bind("select_node.jstree", function (e, data) { + window.open( data.node.a_attr.href, data.node.a_attr.target ) + }); + }); +</script> + + +--- +# ADD YOUR DATA ANALYSIS CODE AND MARKUP TEXT BELOW TO EXTEND THIS R MARKDOWN FILE +--- + + +# Run FastQC + +```{bash} +sh ${TOOL_INSTALL_DIR}/build-and-run-job-scripts.sh +``` + +```{r echo=FALSE,results='asis'} +# display fastqc job script +cat('```bash\n') +cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/job-1-script.sh')), sep = '\n') +cat('\n```') +``` + +# Fastqc Output Visualization + +## Overview + +```{r eval=TRUE} +read_1_summary = read.csv(paste0(opt$X_d, '/read_1_fastqc/summary.txt'), + stringsAsFactors = FALSE, + header = FALSE, sep = '\t')[, 2:1] +read_2_summary = read.csv(paste0(opt$X_d, '/read_2_fastqc/summary.txt'), + stringsAsFactors = FALSE, + header = FALSE, sep = '\t')[, 1] +combined_summary = data.frame(read_1_summary, read_2_summary, stringsAsFactors = FALSE) +names(combined_summary) = c('MODULE', 'Pre-trimming', 'Post-trimming') +combined_summary[combined_summary == 'FAIL'] = 'FAIL (X)' +combined_summary[combined_summary == 'WARN'] = 'WARN (!)' +knitr::kable(combined_summary) +``` + +```{r 'function definition', echo=FALSE} +extract_data_module = function(fastqc_data, module_name, header = TRUE, comment.char = "") { + f = readLines(fastqc_data) + start_line = grep(module_name, f) + end_module_lines = grep('END_MODULE', f) + end_line = end_module_lines[which(end_module_lines > start_line)[1]] + module_data = f[(start_line+1):(end_line-1)] + writeLines(module_data, '/tmp/temp.txt') + read.csv('/tmp/temp.txt', sep = '\t', header = header, comment.char = comment.char) +} +``` + + + +### Per base sequence quality + +```{r 'per base sequence quality', fig.width=10} +## reads 1 +pbsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence quality') +pbsq_1$id = 1:length(pbsq_1$X.Base) +pbsq_1$trim = 'before' + +## reads 2 +pbsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence quality') +pbsq_2$id = 1:length(pbsq_2$X.Base) +pbsq_2$trim = 'after' + +comb_pbsq = rbind(pbsq_1, pbsq_2) +comb_pbsq$trim = factor(levels = c('before', 'after'), comb_pbsq$trim) + +p = ggplot(data = comb_pbsq) + + geom_boxplot(mapping = aes(x = id, + lower = Lower.Quartile, + upper = Upper.Quartile, + middle = Median, + ymin = X10th.Percentile, + ymax = X90th.Percentile, + fill = "yellow"), + stat = 'identity') + + geom_line(mapping = aes(x = id, y = Mean, color = "red")) + + scale_x_continuous(name = 'Position in read (bp)', breaks = pbsq_2$id, labels = pbsq_2$X.Base) + + scale_y_continuous(limits = c(0, max(comb_pbsq$Upper.Quartile) + 5)) + + scale_fill_identity() + + scale_color_identity() + + facet_grid(. ~ trim) + + theme(axis.text.x = element_text(size = 5), + panel.background = element_rect(fill = NA), + panel.grid.major.y = element_line(color = 'blue', size = 0.1)) +p +``` + + +### Per tile sequence quality + +```{r 'per tile sequence quality', fig.width=10} +## check if 'per tile sequence quality' module exits or not +check_ptsq = grep('Per tile sequence quality', readLines(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'))) +if (length(check_ptsq) > 0) { + ## reads 1 + ptsq_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per tile sequence quality') + ptsq_1$trim = 'before' + + ## reads 2 + ptsq_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per tile sequence quality') + ptsq_2$trim = 'after' + + comb_ptsq = rbind(ptsq_1, ptsq_2) + comb_ptsq$trim = factor(levels = c('before', 'after'), comb_ptsq$trim) + comb_ptsq$Base = factor(levels = unique(comb_ptsq$Base), comb_ptsq$Base) + + # convert integers to charaters + # comb_ptsq$Tile = as.character(comb_ptsq$X.Tile) + + p = ggplot(data = comb_ptsq) + + geom_raster(mapping = aes(x = Base, y = X.Tile, fill = Mean)) + + facet_grid(. ~ trim) + + scale_x_discrete(name = "Position in read (bp)") + + scale_y_continuous(name = "") + + scale_fill_gradient(low = "blue", high = "red") + + theme(axis.text.x = element_text(size = 5, angle = 90), + axis.text.y = element_text(size = 5), + panel.background = element_rect(fill = NA)) + ggplotly(p) +} else { + print('No "per tile sequence quality" data') +} +``` + +### Per sequence quality score + +```{r 'Per sequence quality score', fig.width=10} +## reads 1 +psqs_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence quality scores') +psqs_1$trim = 'before' + +## reads 2 +psqs_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence quality scores') +psqs_2$trim = 'after' + +comb_psqs = rbind(psqs_1, psqs_2) +comb_psqs$trim = factor(levels = c('before', 'after'), comb_psqs$trim) + +p = ggplot(data = comb_psqs) + + geom_line(mapping = aes(x = X.Quality, y = Count), color = 'red') + + facet_grid(. ~ trim) + + scale_x_continuous(name = 'Mean Sequence Qaulity (Phred Score)', + limits = c(min(comb_psqs$X.Quality), max(comb_psqs$X.Quality))) + + scale_y_continuous(name = '') + + theme(panel.background = element_rect(fill = NA), + axis.line = element_line(), + panel.grid.major.y = element_line(color = 'blue', size = 0.1)) +p +``` + +### Per base sequence content + +```{r 'Per base sequence content', fig.width=10} +## reads 1 +pbsc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base sequence content') +pbsc_1$id = 1:length(pbsc_1$X.Base) + +melt_pbsc_1 = melt(pbsc_1, id=c('X.Base', 'id')) +melt_pbsc_1$trim = 'before' + + +## reads 2 +pbsc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base sequence content') +pbsc_2$id = 1:length(pbsc_2$X.Base) + +melt_pbsc_2 = melt(pbsc_2, id=c('X.Base', 'id')) +melt_pbsc_2$trim = 'after' + +comb_pbsc = rbind(melt_pbsc_1, melt_pbsc_2) +comb_pbsc$trim = factor(levels = c('before', 'after'), comb_pbsc$trim) + +p = ggplot(data = comb_pbsc) + + geom_line(mapping = aes(x = id, y = value, color = variable)) + + facet_grid(. ~ trim) + + xlim(min(comb_pbsc$id), max(comb_pbsc$id)) + + ylim(0, 100) + + xlab('Position in read (bp)') + + ylab('') + + scale_color_discrete(name = '') + + theme_classic() +ggplotly(p) +``` + +### Per sequence GC content + +```{r 'Per sequence GC content', fig.width=10} +## reads 1 +psGCc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per sequence GC content') +psGCc_1$trim = 'before' + +## reads 2 +psGCc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per sequence GC content') +psGCc_2$trim = 'after' + +comb_psGCc = rbind(psGCc_1, psGCc_2) +comb_psGCc$trim = factor(levels = c('before', 'after'), comb_psGCc$trim) + +p = ggplot(data = comb_psGCc, aes(x = X.GC.Content, y = Count)) + + geom_line(color = 'red') + + facet_grid(. ~ trim) + + xlab('Mean Sequence Qaulity (Phred Score)') + + ylab('') + + scale_color_discrete(name = '') + + theme_classic() +ggplotly(p) +``` + + +### Per base N content + +```{r 'Per base N content', fig.width=10} +## reads 1 +pbNc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Per base N content') +pbNc_1$id = 1:length(pbNc_1$X.Base) +pbNc_1$trim = 'before' + +## reads 2 +pbNc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Per base N content') +pbNc_2$id = 1:length(pbNc_2$X.Base) +pbNc_2$trim = 'after' + +comb_pbNc = rbind(pbNc_1, pbNc_2) +comb_pbNc$trim = factor(levels = c('before', 'after'), comb_pbNc$trim) + +p = ggplot(data = comb_pbNc, aes(x = id, y = N.Count)) + + geom_line(color = 'red') + + scale_x_continuous(breaks = pbNc_2$id, labels = pbNc_2$X.Base) + + facet_grid(. ~ trim) + + ylim(0, 1) + + xlab('N-Count') + + ylab('') + + theme(axis.text.x = element_text(size = 5), + axis.line = element_line(), + panel.background = element_rect(fill = NA)) +ggplotly(p) +``` + + +### Sequence Length Distribution + +```{r 'Sequence Length Distribution', fig.width=10} +## reads 1 +sld_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Length Distribution') +sld_1$id = 1:length(sld_1$X.Length) +sld_1$trim = 'before' + +## reads 2 +sld_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Length Distribution') +sld_2$id = 1:length(sld_2$X.Length) +sld_2$trim = 'after' + +comb_sld = rbind(sld_1, sld_2) +comb_sld$trim = factor(levels = c('before', 'after'), comb_sld$trim) + +p = ggplot(data = comb_sld, aes(x = id, y = Count)) + + geom_line(color = 'red') + + scale_x_continuous(breaks = sld_2$id, labels = sld_2$X.Length) + + facet_grid(. ~ trim) + + xlab('Sequence Length (bp)') + + ylab('') + + theme(axis.text.x = element_text(size = 5), + panel.background = element_rect(fill = NA), + axis.line = element_line(), + plot.margin = margin(2,2,2,10) ) +ggplotly(p) +``` + +### Sequence Duplication Levels + +```{r 'Sequence Duplication Levels', fig.width=10} +## reads 1 +sdl_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#') +names(sdl_1) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total') +sdl_1$id = 1:length(sdl_1$Duplication_Level) + +melt_sdl_1 = melt(sdl_1, id=c('Duplication_Level', 'id')) +melt_sdl_1$trim = 'before' + + +## reads 2 +sdl_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Sequence Duplication Levels', header = FALSE, comment.char = '#') +names(sdl_2) = c('Duplication_Level', 'Percentage_of_deduplicated', 'Percentage_of_total') +sdl_2$id = 1:length(sdl_2$Duplication_Level) + +melt_sdl_2 = melt(sdl_2, id=c('Duplication_Level', 'id')) +melt_sdl_2$trim = 'after' + +comb_sdl = rbind(melt_sdl_1, melt_sdl_2) +comb_sdl$trim = factor(levels = c('before', 'after'), comb_sdl$trim) + +p = ggplot(data = comb_sdl) + + geom_line(mapping = aes(x = id, y = value, color = variable)) + + scale_x_continuous(breaks = sdl_2$id, labels = sdl_2$Duplication_Level) + + facet_grid(. ~ trim) + + xlab('Sequence Duplication Level') + + ylab('') + + scale_color_discrete(name = '') + + theme(axis.text.x = element_text(size = 5), + panel.background = element_rect(fill = NA), + axis.line = element_line()) +p +``` + +### Adapter Content + +```{r 'Adapter Content', fig.width=10} +## reads 1 +ac_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Adapter Content') +ac_1$id = 1:length(ac_1$X.Position) + +melt_ac_1 = melt(ac_1, id=c('X.Position', 'id')) +melt_ac_1$trim = 'before' + +## reads 2 +ac_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Adapter Content') +ac_2$id = 1:length(ac_2$X.Position) + +melt_ac_2 = melt(ac_2, id=c('X.Position', 'id')) +melt_ac_2$trim = 'after' + +comb_ac = rbind(melt_ac_1, melt_ac_2) +comb_ac$trim = factor(levels = c('before', 'after'), comb_ac$trim) + +p = ggplot(data = comb_ac, aes(x = id, y = value, color = variable)) + + geom_line() + + facet_grid(. ~ trim) + + xlim(min(comb_ac$id), max(comb_ac$id)) + + ylim(0, 1) + + xlab('Position in read (bp)') + + ylab('') + + scale_color_discrete(name = '') + + theme(axis.text.x = element_text(size = 5), + panel.background = element_rect(fill = NA), + axis.line = element_line()) +ggplotly(p) +``` + +### Kmer Content {.tabset} + +#### Before + +```{r 'Kmer Content (before)', fig.width=10} +kc_1 = extract_data_module(paste0(opt$X_d, '/read_1_fastqc/fastqc_data.txt'), 'Kmer Content') +knitr::kable(kc_1) +``` + +#### After +```{r 'Kmer Content (after)', fig.width=10} +kc_2 = extract_data_module(paste0(opt$X_d, '/read_2_fastqc/fastqc_data.txt'), 'Kmer Content') +knitr::kable(kc_2) +``` +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_report.xml Mon Apr 30 16:40:21 2018 -0400 @@ -0,0 +1,111 @@ +<tool name="aurora_fastqc" id='aurora_fastqc' version="2.1.2"> + <description> + Evaluate short reads with FastQC software on a single or a paired of untrimmed and trimmed reads + files. + </description> + <requirements> + <requirement type="package" version="1.15.0.6-0">pandoc</requirement> + <requirement type="package" version="1.20.0">r-getopt</requirement> + <requirement type="package" version="1.3">r-rmarkdown</requirement> + <requirement type="package" version="1.8.4">r-plyr</requirement> + <requirement type="package" version="1.1.0">r-stringr</requirement> + <requirement type="package" version="0.2">r-dt</requirement> + <requirement type="package" version="1.4.2">r-reshape2</requirement> + <requirement type="package" version="4.5.6">r-plotly</requirement> + <requirement type="package" version="0.3.5">r-htmltools</requirement> + <requirement type="package" version="0.11.5">fastqc</requirement> + <requirement type="package" version="0.9.10">xorg-libxrender</requirement> + <requirement type="package" version="1.2.2">xorg-libsm</requirement> + <requirement type="package" version="6.0">unzip</requirement> + </requirements> + <stdio> + <regex match="XXX" source="stderr" level="warning" + description="Check the warnings_and_errors.txt file for more details."/> + </stdio> + <command><![CDATA[ + + ######### each aurora tool generates a html file and have an files path directory associated with it. + mkdir -p $report.files_path && + + ######### three important paths: + ######### 1. path to tool installation directory + ######### 2. path to report html + ######### 3. path to files_path directory associated with the report output. + export TOOL_INSTALL_DIR='${__tool_directory__}' && + export REPORT='$report' && + export REPORT_FILES_PATH='$report.files_path' && + + ############ copy jstree directory cp -r ${__tool_directory__}/vakata-jstree-3.3.5 ./ && + + ############ create a hidden file to store r markdown rendering log + touch $report.files_path/.r_rendering.log.txt && + + Rscript '${__tool_directory__}/rmarkdown_report_render.R' + + -o $report + -d $report.files_path + + -r $reads_1 + -n '$reads_1.name' + -R $reads_2 + -N '$reads_2.name' + -c $contaminants + -l $limits + + ]]></command> + <inputs> + <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_1" type="data" optional="false" + label="Short reads before trimming" + help="Short reads data from history. This could be reads before trimming."/> + <param format="fastq,fastq.gz,fastq.bz2,bam,sam" name="reads_2" type="data" label="Short reads after trimming" + help="Short reads data from history. This could be reads after trimming."/> + <param name="contaminants" type="data" format="tabular" optional="true" label="Contaminant list" + help="Specifies a non-default file which contains the list of adapter sequences which will be explicitly + searched against the library. The file must contain sets of named adapters in the form name[tab]sequence. + Lines prefixed with a hash will be ignored."/> + <param name="limits" type="data" format="txt" optional="true" label="Submodule and Limit specifing file" + help="Specifies a non-default file which contains a set of criteria which will be used to determine the + warn/error limits for the various modules. This file can also be used to selectively remove some modules + from the output all together. The format needs to mirror the default limits.txt file found in the + Configuration folder."/> + + </inputs> + <outputs> + <data format="html" name="report" label="${tool.name} on ${on_string}"/> + </outputs> + <citations> + <citation type="bibtex"><![CDATA[ + @misc{bioinformatics2014fastqc, + title={FastQC}, + author={Bioinformatics, Babraham}, + year={2014} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @article{allaire2016rmarkdown, + title={rmarkdown: Dynamic Documents for R, 2016}, + author={Allaire, J and Cheng, Joe and Xie, Yihui and McPherson, Jonathan and Chang, Winston and Allen, Jeff + and Wickham, Hadley and Atkins, Aron and Hyndman, Rob}, + journal={R package version 0.9}, + volume={6}, + year={2016} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @book{xie2015dynamic, + title={Dynamic Documents with R and knitr}, + author={Xie, Yihui}, + volume={29}, + year={2015}, + publisher={CRC Press} + } + ]]></citation> + <citation type="bibtex"><![CDATA[ + @online{jstree, + author={Bozhanov, Ivan}, + year = 2018, + url = {https://www.jstree.com/} + } + ]]></citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_report_render.R Mon Apr 30 16:40:21 2018 -0400 @@ -0,0 +1,161 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file(paste0(Sys.getenv('REPORT_FILES_PATH'), '/.r_rendering.log.txt')) +sink(zz) +sink(zz, type = 'message') + +#============== preparation ==================================== +# import libraries +#------------------------------------------------------------------ +# ADD MORE LIBRARIES HERE IF YOUR TOOL DEPENDS ON OTHER R LIBRARIES +#------------------------------------------------------------------ +library('getopt') +library('rmarkdown') +library('htmltools') +#------------------------------------------------------------------ +options(stringsAsFactors = FALSE) + + +# define two helper functions +#-----: helper function 1 +#' \code{getopt_specification_matrix} returns a getopt specification matrix. +#' +#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data. +#' The first column are short flags, the second column are argument masks, the third column +#' is data types. The fourth column are variable names used in the tool XML. These three columns are required. +#' @param gtg_name the name of a running GTG. +getopt_specification_matrix = function(specification_file, + gtg_name = 'gtg', + tool_dir = Sys.getenv('TOOL_INSTALL_DIR')) { + df = read.csv( + paste0(tool_dir, '/', specification_file), + header = TRUE, + stringsAsFactors = FALSE + ) + # check if there are duplicated short flags + short_flags = df[, 1] + if (length(unique(short_flags)) < length(short_flags)) { + cat('----Duplicated short flags found ----\n') + cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n') + stop('Duplicated short flags are not allowed.') + } + + # use short flags to generate long flags + long_flags = paste0('X_', df[, 1]) + + # specification matrix + df2 = data.frame( + long_flags = long_flags, + short_flags = df[, 1], + argument_mask = df[, 2], + data_type = df[, 3] + ) + + as.matrix(df2) +} + +#-----: helper function 2 +#' \code{file_tree} generate file tree of a directory in the format of HTML lists. +#' +#' @param dir the path to the directory for generating the file tree. +#' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files. +# define a recursive function to build html string of the file tree +file_tree = function(dir = '.') { + # get the OUTPUT_DIR folder data: dataset_NUMBER_files + report_files_path = Sys.getenv('REPORT_FILES_PATH') + output_dir = tail(strsplit(report_files_path, '/')[[1]], 1) + + files = list.files(path = dir, + recursive = FALSE, + full.names = TRUE) + # files also include directorys, need to remove directorys + files = files[!dir.exists(files)] + dirs = list.dirs(path = dir, + recursive = FALSE, + full.names = TRUE) + tags$ul({ + if (length(files) > 0) { + lapply(files, function(x) { + path_end = tail(strsplit(x, '/')[[1]], 1) + href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] + li_item = tags$li(tags$a(path_end, href = href_path)) + li_item$attribs = list('data-jstree' = '{"icon":"jstree-file"}') + li_item + }) + } + }, + { + if (length(dirs) > 0) { + lapply(dirs, function(x) { + path_end = tail(strsplit(x, '/')[[1]], 1) + # hide vakata-jstree-3.3.5 folder + if (path_end != 'vakata-jstree-3.3.5') { + # x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] + li_item = tags$li(path_end, file_tree(x)) + li_item$attribs = list('data-jstree' = '{"icon":"jstree-folder"}') + li_item + } + }) + } + }) +} +#----------------- end of help functions ------------------------- + + +# import getopt specification matrix from a csv file +opt = getopt(getopt_specification_matrix('command_line_arguments.csv', + tool_dir = Sys.getenv('TOOL_INSTALL_DIR'))) +# define environment variables for all input values. this is useful when we +# want to use input values by other programming language in r markdown +do.call(Sys.setenv, opt[-1]) +#=============================================================== + + +#======================== render Rmd files ========================= +# copy jstree javascript library to tool output directory +file.copy( + from = paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/vakata-jstree-3.3.5'), + to = Sys.getenv('REPORT_FILES_PATH'), + recursive = TRUE +) + +# if '_site.yml' file exists, this tool is assumed to render a website. +# otherwise, it renders a single html. +if (file.exists(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/_site.yml'))) { + # render a website + system(command = 'cp -r ${TOOL_INSTALL_DIR}/*.Rmd ${REPORT_FILES_PATH}') + system(command = 'cp -r ${TOOL_INSTALL_DIR}/_site.yml ${REPORT_FILES_PATH}') + render_site(input = Sys.getenv('REPORT_FILES_PATH')) +} else { + # render a single html + system(command = 'cp -r ${TOOL_INSTALL_DIR}/rmarkdown_report.Rmd ${REPORT_FILES_PATH}') + # add a few lines to 'rmarkdown_report.Rmd' to generate file tree outputs + jstree_lines = ' + + ## Outputs + + ```{r, echo=FALSE} + # create a div container to store the file tree interface + library(htmltools) + tags$div( + id="jstree", + file_tree(Sys.getenv(\'REPORT_FILES_PATH\')) + ) + ``` + ' + write(x = jstree_lines, append = TRUE, + file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/rmarkdown_report.Rmd')) + render(input = paste0(Sys.getenv('REPORT_FILES_PATH'), '/rmarkdown_report.Rmd')) +} +#=============================================================== + + +#============== expose outputs to galaxy history =============== +system(command = 'sh ${TOOL_INSTALL_DIR}/expose-outputs.sh') +#=============================================================== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output=============================
--- a/shell-script-template.sh Wed Apr 25 15:55:23 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,36 +0,0 @@ -# SHELL_SCRIPT file name -SHELL_SCRIPT='fastqc.sh' - -# run SHELL_SCRIPT within tool outputs directory -cd ${REPORT_FILES_PATH} - -# build job-script.sh -cat >temp.sh <<EOF - -cp ${X_r} read_1.fq -cp ${X_R} read_2.fq - -fastqc \\ - -q \\ - -c ${X_c} \\ - -l ${X_l} \\ - ${X_d}/read_1.fq > /dev/null 2>&1 - -fastqc \\ - -q \\ - -c ${X_c} \\ - -l ${X_l} \\ - ${X_d}/read_2.fq > /dev/null 2>&1 - -EOF - -grep -v None temp.sh > ${SHELL_SCRIPT} -rm temp.sh - -# run SHELL_SCRIPT -sh ${SHELL_SCRIPT} - - -# unzip outputs -unzip -q read_1_fastqc.zip -unzip -q read_2_fastqc.zip \ No newline at end of file