# HG changeset patch # User mingchen0919 # Date 1525181570 14400 # Node ID ce76fb2791e529af1d561a8fa97064fe8fad55f1 # Parent ec906e72784d81161a8aa6d28ce18500c4e26e70 planemo upload commit d28f1764ef50ca42f996cada585cfcea9ec04e5a diff -r ec906e72784d -r ce76fb2791e5 build-and-run-job-scripts.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/build-and-run-job-scripts.sh Tue May 01 09:32:50 2018 -0400 @@ -0,0 +1,23 @@ +# run job scripts within the tool outputs directory +cd ${REPORT_FILES_PATH} + +#========== build and run job 1 script ============ +cat >htseq-count.sh < htseq-counts-raw.txt + + grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' htseq-counts-raw.txt > counts.txt + +EOF + +# run job 1 script +sh htseq-count.sh + diff -r ec906e72784d -r ce76fb2791e5 command-line-arguments.csv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/command-line-arguments.csv Tue May 01 09:32:50 2018 -0400 @@ -0,0 +1,14 @@ +short flag,argument mask,data type,variable name +o,1,character,report +d,1,character,report.files_path +A,1,character,alignment_files +B,1,character,sample_names +G,1,character,gff +f,1,character,format +r,1,character,order +S,1,character,stranded +a,1,character,minaqual +T,1,character,feature_type +i,1,character,idattr +m,1,character,mode +c,1,character,count diff -r ec906e72784d -r ce76fb2791e5 expose-outputs-to-galaxy-history.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/expose-outputs-to-galaxy-history.sh Tue May 01 09:32:50 2018 -0400 @@ -0,0 +1,13 @@ +# change directory to tool outputs directory +cd ${REPORT_FILES_PATH} + +# copy outputs from tool outputs directory to corresponding galaxy output path +if [ -e "rmarkdown_report.html" ]; then + cp rmarkdown_report.html ${REPORT} +fi + +if [ -e "index.html" ]; then + cp index.html ${REPORT} +fi + +cp count_data.csv ${X_c} \ No newline at end of file diff -r ec906e72784d -r ce76fb2791e5 expose-outputs.sh --- a/expose-outputs.sh Thu Apr 26 12:24:33 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -# change directory to tool outputs directory -cd ${REPORT_FILES_PATH} - -# copy files to corresponding tool output paths. -cp htseq_count.html ${REPORT} -cp count_data.csv ${X_c} \ No newline at end of file diff -r ec906e72784d -r ce76fb2791e5 getopt_specification.csv --- a/getopt_specification.csv Thu Apr 26 12:24:33 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,14 +0,0 @@ -short flag,argument mask,data type,variable name -o,1,character,report -d,1,character,report.files_path -A,1,character,alignment_files -B,1,character,sample_names -G,1,character,gff -f,1,character,format -r,1,character,order -S,1,character,stranded -a,1,character,minaqual -T,1,character,feature_type -i,1,character,idattr -m,1,character,mode -c,1,character,count diff -r ec906e72784d -r ce76fb2791e5 helper.R --- a/helper.R Thu Apr 26 12:24:33 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,82 +0,0 @@ -#' \code{getopt_specification_matrix} returns a getopt specification matrix. -#' -#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data. -#' The first column are short flags, the second column are argument masks, the third column -#' is data types. The fourth column are variable names used in the tool XML. These three columns are required. -#' @param gtg_name the name of a running GTG. -getopt_specification_matrix = function(specification_file, - gtg_name = 'gtg', - tool_dir = Sys.getenv('TOOL_DIR')) { - df = read.csv( - paste0(tool_dir, '/', specification_file), - header = TRUE, - stringsAsFactors = FALSE - ) - # check if there are duplicated short flags - short_flags = df[, 1] - if (length(unique(short_flags)) < length(short_flags)) { - cat('----Duplicated short flags found ----\n') - cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n') - stop('Duplicated short flags are not allowed.') - } - - # use short flags to generate long flags - long_flags = paste0('X_', df[, 1]) - - # specification matrix - df2 = data.frame( - long_flags = long_flags, - short_flags = df[, 1], - argument_mask = df[, 2], - data_type = df[, 3] - ) - - as.matrix(df2) -} - - - -#' \code{file_tree} generate file tree of a directory in the format of HTML lists. -#' -#' @param dir the path to the directory for generating the file tree. -#' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files. -# define a recursive function to build html string of the file tree -file_tree = function(dir = '.') { - # get the OUTPUT_DIR folder data: dataset_NUMBER_files - report_files_path = Sys.getenv('REPORT_FILES_PATH') - output_dir = tail(strsplit(report_files_path, '/')[[1]], 1) - - files = list.files(path = dir, - recursive = FALSE, - full.names = TRUE) - # files also include directorys, need to remove directorys - files = files[!dir.exists(files)] - dirs = list.dirs(path = dir, - recursive = FALSE, - full.names = TRUE) - tags$ul({ - if (length(files) > 0) { - lapply(files, function(x) { - path_end = tail(strsplit(x, '/')[[1]], 1) - href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] - li_item = tags$li(tags$a(path_end, href = href_path)) - li_item$attribs = list('data-jstree' = '{"icon":"jstree-file"}') - li_item - }) - } - }, - { - if (length(dirs) > 0) { - lapply(dirs, function(x) { - path_end = tail(strsplit(x, '/')[[1]], 1) - # hide vakata-jstree-3.3.5 folder - if (path_end != 'vakata-jstree-3.3.5') { - # x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] - li_item = tags$li(path_end, file_tree(x)) - li_item$attribs = list('data-jstree' = '{"icon":"jstree-folder"}') - li_item - } - }) - } - }) -} diff -r ec906e72784d -r ce76fb2791e5 htseq_count.Rmd --- a/htseq_count.Rmd Thu Apr 26 12:24:33 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,94 +0,0 @@ ---- -title: 'htseq-count analysis report' -output: - html_document: - highlight: pygments ---- - -```{css echo=FALSE} -# code chunks scrollable -pre code, pre, code { - white-space: pre !important; - overflow-x: scroll !important; - word-break: keep-all !important; - word-wrap: initial !important; -} -``` - -```{r setup, include=FALSE, warning=FALSE, message=FALSE} -knitr::opts_knit$set(progress = FALSE) -knitr::opts_chunk$set(error = TRUE, echo = FALSE) -``` - -```{r, echo=FALSE} -# to make the css theme to work, tags cannot be added directly -# as tags as below. -# it has to be added using a code chunk with the htmltool functions!!! -css_link = tags$link() -css_link$attribs = list(rel="stylesheet", href="vakata-jstree-3.3.5/dist/themes/default/style.min.css") -css_link -``` - -```{r, eval=FALSE, echo=FALSE} -# this code chunk is purely for adding comments -# below is to add jQuery and jstree javascripts -``` - - - -```{r, eval=FALSE, echo=FALSE} -# this code chunk is purely for adding comments -# javascript code below is to build the file tree interface -# see this for how to implement opening hyperlink: https://stackoverflow.com/questions/18611317/how-to-get-i-get-leaf-nodes-in-jstree-to-open-their-hyperlink-when-clicked-when -``` - ------------------------------------------ - - -## Job script - -```{bash, echo=FALSE} -sh ${TOOL_INSTALL_DIR}/shell-script-template.sh -``` - -```{r echo=FALSE, comment='', results='asis'} -cat('```bash\n') -cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/htseq-count.sh')), sep = '\n') -cat('\n```') -``` - -## Counts - -Only the first few rows are displayed. - -```{r, echo=FALSE} -count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1) -sample_names = trimws(strsplit(opt$X_B, ',')[[1]]) -colnames(count_data) = rep(sample_names, length = ncol(count_data)) -kable(head(count_data)) - -# modify column names -count_data = data.frame(feature_id = rownames(count_data), count_data) -write.csv(count_data, - file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/count_data.csv'), - quote = FALSE, row.names = FALSE) -``` - - ------------------------------------------ -## Output - -```{r, echo=FALSE} -# create a div container to store the file tree interface -tags$div( - id="jstree", - file_tree(Sys.getenv('REPORT_FILES_PATH')) -) -``` diff -r ec906e72784d -r ce76fb2791e5 htseq_count.xml --- a/htseq_count.xml Thu Apr 26 12:24:33 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,121 +0,0 @@ - - Counting reads in features. - - pandoc - r-getopt - r-rmarkdown - htseq - - - - - $report.files_path/.r_rendering.log.txt && - - Rscript '${__tool_directory__}/htseq_count_render.R' - - -o $report - -d $report.files_path - - -A '$alignment_files' - -B '$sample_names' - -G $gff - -f $format - -r $order - -S $stranded - -a $minaqual - -T $feature_type - -i $idattr - -m $mode - -c $count - - ]]> - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff -r ec906e72784d -r ce76fb2791e5 htseq_count_render.R --- a/htseq_count_render.R Thu Apr 26 12:24:33 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,48 +0,0 @@ -##============ Sink warnings and errors to a file ============== -## use the sink() function to wrap all code within it. -##============================================================== -zz = file(paste0(Sys.getenv('REPORT_FILES_PATH'), '/.r_rendering.log.txt')) -sink(zz) -sink(zz, type = 'message') - -#-------------------preparation ----------------- -options(stringsAsFactors = FALSE) -# import libraries -library(getopt) -library(rmarkdown) -library(htmltools) -library(magrittr) -library(knitr) -# load helper functions -source(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/helper.R')) -# import getopt specification matrix from a csv file -opt = getopt(getopt_specification_matrix('getopt_specification.csv', - tool_dir=Sys.getenv('TOOL_INSTALL_DIR'))) -# define a unix variable versions for all input values. this is useful when we -# want to use input values by other programming language in r markdown -do.call(Sys.setenv, opt[-1]) -#------------------------------------------------ - - -#-----------------render Rmd files -------------- -# NOTICE: -# we should copy all rmarkdown files from tool install directory to REPORT_FILES_PATH directory. -# and render rmarkdown files in the REPORT_FILES_PATH directory. -file.copy(from = paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/vakata-jstree-3.3.5'), - to = Sys.getenv('REPORT_FILES_PATH'), recursive = TRUE) -system(command = 'cp -r ${TOOL_INSTALL_DIR}/*.Rmd ${REPORT_FILES_PATH}') -# render Rmd files in order -render(input = paste0(Sys.getenv('REPORT_FILES_PATH'), '/htseq_count.Rmd')) -# add more lines below if there are more Rmd files to be rendered - -#------------------------------------------ - - -#--------- expose outputs to galaxy history ---- -system(command = 'sh ${TOOL_INSTALL_DIR}/expose-outputs.sh') -#==============the end============== - - -##--------end of code rendering .Rmd templates---------------- -sink() -##=========== End of sinking output============================= \ No newline at end of file diff -r ec906e72784d -r ce76fb2791e5 rmarkdown_report.Rmd --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_report.Rmd Tue May 01 09:32:50 2018 -0400 @@ -0,0 +1,84 @@ +--- +title: 'HTSeq-count Report' +output: + html_document: + highlight: pygments +--- + +```{r setup, include=FALSE, warning=FALSE, message=FALSE} +knitr::opts_chunk$set(error = TRUE, echo = FALSE) +``` + +```{css echo=FALSE} +# code chunks scrollable +pre code, pre, code { + white-space: pre !important; + overflow-x: scroll !important; + word-break: keep-all !important; + word-wrap: initial !important; +} +``` + + +```{r, echo=FALSE} +# to make the css theme to work, tags cannot be added directly +# as tags as below. +# it has to be added using a code chunk with the htmltool functions!!! +css_link = tags$link() +css_link$attribs = list(rel="stylesheet", href="vakata-jstree-3.3.5/dist/themes/default/style.min.css") +css_link +``` + +```{r, eval=FALSE, echo=FALSE} +# this code chunk is purely for adding comments +# below is to add jQuery and jstree javascripts +``` + + + +--- +# javascript code below is to build the file tree interface +# see this for how to implement opening hyperlink: https://stackoverflow.com/questions/18611317/how-to-get-i-get-leaf-nodes-in-jstree-to-open-their-hyperlink-when-clicked-when +--- + + +--- +# ADD YOUR DATA ANALYSIS CODE AND MARKUP TEXT BELOW TO EXTEND THIS R MARKDOWN FILE +--- + +## Job script + +```{bash, echo=FALSE} +sh ${TOOL_INSTALL_DIR}/build-and-run-job-scripts.sh +``` + +```{r echo=FALSE, comment='', results='asis'} +cat('```bash\n') +cat(readLines(paste0(Sys.getenv('REPORT_FILES_PATH'), '/htseq-count.sh')), sep = '\n') +cat('\n```') +``` + +## Counts + +Only the first few rows are displayed. + +```{r, echo=FALSE} +count_data = read.table(paste0(opt$X_d, '/counts.txt'), row.names = 1) +sample_names = trimws(strsplit(opt$X_B, ',')[[1]]) +colnames(count_data) = rep(sample_names, length = ncol(count_data)) +kable(head(count_data)) + +# modify column names +count_data = data.frame(feature_id = rownames(count_data), count_data) +write.csv(count_data, + file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/count_data.csv'), + quote = FALSE, row.names = FALSE) +``` + diff -r ec906e72784d -r ce76fb2791e5 rmarkdown_report.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_report.xml Tue May 01 09:32:50 2018 -0400 @@ -0,0 +1,130 @@ + + + some description + + + pandoc + r-getopt + r-rmarkdown + htseq + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff -r ec906e72784d -r ce76fb2791e5 rmarkdown_report_render.R --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/rmarkdown_report_render.R Tue May 01 09:32:50 2018 -0400 @@ -0,0 +1,157 @@ +##============ Sink warnings and errors to a file ============== +## use the sink() function to wrap all code within it. +##============================================================== +zz = file(paste0(Sys.getenv('REPORT_FILES_PATH'), '/.r_rendering.log.txt')) +sink(zz) +sink(zz, type = 'message') + +#============== preparation ==================================== +# import libraries +#------------------------------------------------------------------ +# ADD MORE LIBRARIES HERE IF YOUR TOOL DEPENDS ON OTHER R LIBRARIES +#------------------------------------------------------------------ +library('getopt') +library('rmarkdown') +library('htmltools') +#------------------------------------------------------------------ +options(stringsAsFactors = FALSE) + + +# define two helper functions +#-----: helper function 1 +#' \code{getopt_specification_matrix} returns a getopt specification matrix. +#' +#' @param specification_file a cvs file within the \code{galaxy_tool_directory} which stores getopt specification matrix data. +#' The first column are short flags, the second column are argument masks, the third column +#' is data types. The fourth column are variable names used in the tool XML. These three columns are required. +#' @param gtg_name the name of a running GTG. +getopt_specification_matrix = function(specification_file, + gtg_name = 'gtg', + tool_dir = Sys.getenv('TOOL_INSTALL_DIR')) { + df = read.csv( + paste0(tool_dir, '/', specification_file), + header = TRUE, + stringsAsFactors = FALSE + ) + # check if there are duplicated short flags + short_flags = df[, 1] + if (length(unique(short_flags)) < length(short_flags)) { + cat('----Duplicated short flags found ----\n') + cat('short flags: ', df[, 1][duplicated(df[, 1])], '\n') + stop('Duplicated short flags are not allowed.') + } + + # use short flags to generate long flags + long_flags = paste0('X_', df[, 1]) + + # specification matrix + df2 = data.frame( + long_flags = long_flags, + short_flags = df[, 1], + argument_mask = df[, 2], + data_type = df[, 3] + ) + + as.matrix(df2) +} + +#-----: helper function 2 +#' \code{file_tree} generate file tree of a directory in the format of HTML lists. +#' +#' @param dir the path to the directory for generating the file tree. +#' @param output_dir the REPORT_FILES_PATH folder name, which has the name style: dataset_NUMBER_files. +# define a recursive function to build html string of the file tree +file_tree = function(dir = '.') { + # get the OUTPUT_DIR folder data: dataset_NUMBER_files + report_files_path = Sys.getenv('REPORT_FILES_PATH') + output_dir = tail(strsplit(report_files_path, '/')[[1]], 1) + + files = list.files(path = dir, + recursive = FALSE, + full.names = TRUE) + # files also include directorys, need to remove directorys + files = files[!dir.exists(files)] + dirs = list.dirs(path = dir, + recursive = FALSE, + full.names = TRUE) + tags$ul({ + if (length(files) > 0) { + lapply(files, function(x) { + path_end = tail(strsplit(x, '/')[[1]], 1) + href_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] + li_item = tags$li(tags$a(path_end, href = href_path)) + li_item$attribs = list('data-jstree' = '{"icon":"jstree-file"}') + li_item + }) + } + }, + { + if (length(dirs) > 0) { + lapply(dirs, function(x) { + path_end = tail(strsplit(x, '/')[[1]], 1) + # hide vakata-jstree-3.3.5 folder + if (path_end != 'vakata-jstree-3.3.5') { + # x_path = strsplit(x, paste0(output_dir, '/'))[[1]][2] + li_item = tags$li(path_end, file_tree(x)) + li_item$attribs = list('data-jstree' = '{"icon":"jstree-folder"}') + li_item + } + }) + } + }) +} +#----------------- end of help functions ------------------------- + + +# import getopt specification matrix from a csv file +opt = getopt(getopt_specification_matrix('command-line-arguments.csv', + tool_dir = Sys.getenv('TOOL_INSTALL_DIR'))) +# define environment variables for all input values. this is useful when we +# want to use input values by other programming language in r markdown +do.call(Sys.setenv, opt[-1]) +#=============================================================== + + +#======================== render Rmd files ========================= +# copy jstree javascript library to tool output directory +file.copy( + from = paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/vakata-jstree-3.3.5'), + to = Sys.getenv('REPORT_FILES_PATH'), + recursive = TRUE +) + +# if '_site.yml' file exists, this tool is assumed to render a website. +# otherwise, it renders a single html. +if (file.exists(paste0(Sys.getenv('TOOL_INSTALL_DIR'), '/_site.yml'))) { + # render a website + system(command = 'cp -r ${TOOL_INSTALL_DIR}/*.Rmd ${REPORT_FILES_PATH}') + system(command = 'cp -r ${TOOL_INSTALL_DIR}/_site.yml ${REPORT_FILES_PATH}') + render_site(input = Sys.getenv('REPORT_FILES_PATH')) +} else { + # render a single html + system(command = 'cp -r ${TOOL_INSTALL_DIR}/rmarkdown_report.Rmd ${REPORT_FILES_PATH}') + # add a few lines to 'rmarkdown_report.Rmd' to generate file tree outputs + jstree_lines = ' +## Outputs + +```{r, echo=FALSE} +tags$div(id="jstree", file_tree(Sys.getenv(\'REPORT_FILES_PATH\'))) +```' + write( + x = jstree_lines, + append = TRUE, + file = paste0(Sys.getenv('REPORT_FILES_PATH'), '/rmarkdown_report.Rmd') + ) + render(input = paste0(Sys.getenv('REPORT_FILES_PATH'), '/rmarkdown_report.Rmd')) +} +#=============================================================== + + +#============== expose outputs to galaxy history =============== +system(command = 'sh ${TOOL_INSTALL_DIR}/expose-outputs-to-galaxy-history.sh') +#=============================================================== + + +##--------end of code rendering .Rmd templates---------------- +sink() +##=========== End of sinking output============================= diff -r ec906e72784d -r ce76fb2791e5 shell-script-template.sh --- a/shell-script-template.sh Thu Apr 26 12:24:33 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -# SHELL_SCRIPT file name -SHELL_SCRIPT='htseq-count.sh' - -# run SHELL_SCRIPT within tool outputs directory -cd ${REPORT_FILES_PATH} - -# build job-script.sh -cat >${SHELL_SCRIPT} < htseq-counts-raw.txt - - grep -v '__no_feature\|__ambiguous\|__too_low_aQual\|__not_aligned\|__alignment_not_unique' htseq-counts-raw.txt > counts.txt - -EOF - -# run SHELL_SCRIPT -sh ${SHELL_SCRIPT} \ No newline at end of file